1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-08 16:40:00 +02:00

stabalize buzzfeed test

This commit is contained in:
Jan Lukas Gernert 2023-03-12 23:13:52 +01:00
parent 848291e4f3
commit b5d8f43ef8
3 changed files with 13 additions and 13 deletions

View file

@ -188,17 +188,17 @@ impl Readability {
node = Util::next_node(node_ref, false);
}
let html = document.to_string_with_options(libxml::tree::SaveOptions {
format: true,
no_declaration: false,
no_empty_tags: true,
no_xhtml: false,
xhtml: false,
as_xml: false,
as_html: true,
non_significant_whitespace: false,
});
std::fs::write("debug.html", &html).unwrap();
// let html = document.to_string_with_options(libxml::tree::SaveOptions {
// format: true,
// no_declaration: false,
// no_empty_tags: true,
// no_xhtml: false,
// xhtml: false,
// as_xml: false,
// as_html: true,
// non_significant_whitespace: false,
// });
// std::fs::write("debug.html", &html).unwrap();
let mut candidates = Vec::new();
// Loop through all paragraphs, and assign a score to them based on how content-y they look.