mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-08 16:40:00 +02:00
fix post processing
This commit is contained in:
parent
2528aa3e18
commit
f5b7ff198a
2 changed files with 32 additions and 11 deletions
|
@ -504,7 +504,7 @@ impl Util {
|
|||
|
||||
// Clean an element of all tags of type "tag" if they look fishy.
|
||||
// "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
|
||||
pub fn clean_conditionally(root: &mut Node, tag: &str) -> Result<(), FullTextParserError> {
|
||||
pub fn clean_conditionally(root: &mut Node, tag: &str) {
|
||||
// Gather counts for other typical elements embedded within.
|
||||
// Traverse backwards so we can remove nodes at the same time
|
||||
// without effecting the traversal.
|
||||
|
@ -516,11 +516,9 @@ impl Util {
|
|||
.filter(|node| Self::should_remove(node, tag))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for mut node in nodes_to_remove {
|
||||
for mut node in nodes_to_remove.into_iter().rev() {
|
||||
node.unlink();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn should_remove(node: &Node, tag: &str) -> bool {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue