1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

fix conditional clean of nested tags

This commit is contained in:
Jan Lukas Gernert 2023-03-06 00:03:59 +01:00
parent 9c5ffda5de
commit 7060e30911

View file

@ -511,13 +511,11 @@ impl Util {
//
// TODO: Consider taking into account original contentScore here.
let nodes = Util::get_elements_by_tag_name(root, tag);
let nodes_to_remove = nodes
.into_iter()
.filter(|node| Self::should_remove(node, tag))
.collect::<Vec<_>>();
for mut node in nodes_to_remove.into_iter().rev() {
node.unlink();
for mut node in nodes.into_iter().rev() {
if Self::should_remove(&node, tag) {
node.unlink();
}
}
}
@ -588,7 +586,8 @@ impl Util {
}
let link_density = Self::get_link_density(node);
let content_length = Self::get_inner_text(node, false).len();
let content = Self::get_inner_text(node, false);
let content_length = content.len();
(img > 1
&& (p as f64 / img as f64) < 0.5