1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

stabalize buzzfeed test

This commit is contained in:
Jan Lukas Gernert 2023-03-12 23:13:52 +01:00
parent 848291e4f3
commit b5d8f43ef8
3 changed files with 13 additions and 13 deletions

View file

@ -10,7 +10,7 @@
<p>“She was literally burning up from within.”</p> <p>“She was literally burning up from within.”</p>
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p> <p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
</div> </div>
<div rel:buzz_num="3" id="superlist_3758406_5547140"> <div id="superlist_3758406_5547140">
<div> <div>
<div><p><img src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" rel:bf_image_src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" height="412" width="203"></p></div> <div><p><img src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" rel:bf_image_src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" height="412" width="203"></p></div>
<p>Facebook</p> <p>Facebook</p>

View file

@ -3519,7 +3519,7 @@
<p>“She was literally burning up from within.”</p> <p>“She was literally burning up from within.”</p>
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p> <p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
</div> </div>
<div class="buzz_superlist_item buzz_superlist_item_grid_row buzz_superlist_item_wide no_caption " id="superlist_3758406_5547140" rel:buzz_num="3"> <div class="buzz_superlist_item buzz_superlist_item_grid_row buzz_superlist_item_wide no_caption " id="superlist_3758406_5547140">
<div class="grid_row two_pl grid_height_l"> <div class="grid_row two_pl grid_height_l">
<div class="grid_cell cell_1"> <div class="grid_cell cell_1">
<div class="grid_cell_image_wrapper"> <div class="grid_cell_image_wrapper">

View file

@ -188,17 +188,17 @@ impl Readability {
node = Util::next_node(node_ref, false); node = Util::next_node(node_ref, false);
} }
let html = document.to_string_with_options(libxml::tree::SaveOptions { // let html = document.to_string_with_options(libxml::tree::SaveOptions {
format: true, // format: true,
no_declaration: false, // no_declaration: false,
no_empty_tags: true, // no_empty_tags: true,
no_xhtml: false, // no_xhtml: false,
xhtml: false, // xhtml: false,
as_xml: false, // as_xml: false,
as_html: true, // as_html: true,
non_significant_whitespace: false, // non_significant_whitespace: false,
}); // });
std::fs::write("debug.html", &html).unwrap(); // std::fs::write("debug.html", &html).unwrap();
let mut candidates = Vec::new(); let mut candidates = Vec::new();
// Loop through all paragraphs, and assign a score to them based on how content-y they look. // Loop through all paragraphs, and assign a score to them based on how content-y they look.