mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
stabalize buzzfeed test
This commit is contained in:
parent
848291e4f3
commit
b5d8f43ef8
3 changed files with 13 additions and 13 deletions
|
@ -10,7 +10,7 @@
|
||||||
<p>“She was literally burning up from within.”</p>
|
<p>“She was literally burning up from within.”</p>
|
||||||
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
|
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
|
||||||
</div>
|
</div>
|
||||||
<div rel:buzz_num="3" id="superlist_3758406_5547140">
|
<div id="superlist_3758406_5547140">
|
||||||
<div>
|
<div>
|
||||||
<div><p><img src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" rel:bf_image_src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" height="412" width="203"></p></div>
|
<div><p><img src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" rel:bf_image_src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" height="412" width="203"></p></div>
|
||||||
<p>Facebook</p>
|
<p>Facebook</p>
|
||||||
|
|
|
@ -3519,7 +3519,7 @@
|
||||||
<p>“She was literally burning up from within.”</p>
|
<p>“She was literally burning up from within.”</p>
|
||||||
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
|
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="buzz_superlist_item buzz_superlist_item_grid_row buzz_superlist_item_wide no_caption " id="superlist_3758406_5547140" rel:buzz_num="3">
|
<div class="buzz_superlist_item buzz_superlist_item_grid_row buzz_superlist_item_wide no_caption " id="superlist_3758406_5547140">
|
||||||
<div class="grid_row two_pl grid_height_l">
|
<div class="grid_row two_pl grid_height_l">
|
||||||
<div class="grid_cell cell_1">
|
<div class="grid_cell cell_1">
|
||||||
<div class="grid_cell_image_wrapper">
|
<div class="grid_cell_image_wrapper">
|
||||||
|
|
|
@ -188,17 +188,17 @@ impl Readability {
|
||||||
node = Util::next_node(node_ref, false);
|
node = Util::next_node(node_ref, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
let html = document.to_string_with_options(libxml::tree::SaveOptions {
|
// let html = document.to_string_with_options(libxml::tree::SaveOptions {
|
||||||
format: true,
|
// format: true,
|
||||||
no_declaration: false,
|
// no_declaration: false,
|
||||||
no_empty_tags: true,
|
// no_empty_tags: true,
|
||||||
no_xhtml: false,
|
// no_xhtml: false,
|
||||||
xhtml: false,
|
// xhtml: false,
|
||||||
as_xml: false,
|
// as_xml: false,
|
||||||
as_html: true,
|
// as_html: true,
|
||||||
non_significant_whitespace: false,
|
// non_significant_whitespace: false,
|
||||||
});
|
// });
|
||||||
std::fs::write("debug.html", &html).unwrap();
|
// std::fs::write("debug.html", &html).unwrap();
|
||||||
|
|
||||||
let mut candidates = Vec::new();
|
let mut candidates = Vec::new();
|
||||||
// Loop through all paragraphs, and assign a score to them based on how content-y they look.
|
// Loop through all paragraphs, and assign a score to them based on how content-y they look.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue