mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
stabalize buzzfeed test
This commit is contained in:
parent
848291e4f3
commit
b5d8f43ef8
3 changed files with 13 additions and 13 deletions
|
@ -10,7 +10,7 @@
|
|||
<p>“She was literally burning up from within.”</p>
|
||||
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
|
||||
</div>
|
||||
<div rel:buzz_num="3" id="superlist_3758406_5547140">
|
||||
<div id="superlist_3758406_5547140">
|
||||
<div>
|
||||
<div><p><img src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" rel:bf_image_src="http://ak-hdl.buzzfed.com/static/2015-04/21/5/enhanced/webdr12/grid-cell-2501-1429608056-15.jpg" height="412" width="203"></p></div>
|
||||
<p>Facebook</p>
|
||||
|
|
|
@ -3519,7 +3519,7 @@
|
|||
<p>“She was literally burning up from within.”</p>
|
||||
<p>She added: “They never stood a chance of saving her. She burned and crashed.”</p>
|
||||
</div>
|
||||
<div class="buzz_superlist_item buzz_superlist_item_grid_row buzz_superlist_item_wide no_caption " id="superlist_3758406_5547140" rel:buzz_num="3">
|
||||
<div class="buzz_superlist_item buzz_superlist_item_grid_row buzz_superlist_item_wide no_caption " id="superlist_3758406_5547140">
|
||||
<div class="grid_row two_pl grid_height_l">
|
||||
<div class="grid_cell cell_1">
|
||||
<div class="grid_cell_image_wrapper">
|
||||
|
|
|
@ -188,17 +188,17 @@ impl Readability {
|
|||
node = Util::next_node(node_ref, false);
|
||||
}
|
||||
|
||||
let html = document.to_string_with_options(libxml::tree::SaveOptions {
|
||||
format: true,
|
||||
no_declaration: false,
|
||||
no_empty_tags: true,
|
||||
no_xhtml: false,
|
||||
xhtml: false,
|
||||
as_xml: false,
|
||||
as_html: true,
|
||||
non_significant_whitespace: false,
|
||||
});
|
||||
std::fs::write("debug.html", &html).unwrap();
|
||||
// let html = document.to_string_with_options(libxml::tree::SaveOptions {
|
||||
// format: true,
|
||||
// no_declaration: false,
|
||||
// no_empty_tags: true,
|
||||
// no_xhtml: false,
|
||||
// xhtml: false,
|
||||
// as_xml: false,
|
||||
// as_html: true,
|
||||
// non_significant_whitespace: false,
|
||||
// });
|
||||
// std::fs::write("debug.html", &html).unwrap();
|
||||
|
||||
let mut candidates = Vec::new();
|
||||
// Loop through all paragraphs, and assign a score to them based on how content-y they look.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue