mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
remove duplicate code
This commit is contained in:
parent
a356ced646
commit
1e71aa2bfb
1 changed files with 0 additions and 16 deletions
|
@ -845,22 +845,6 @@ impl FullTextParser {
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove extra paragraphs & divs
|
|
||||||
let mut nodes = Util::evaluate_xpath(&context, "//p", false)?;
|
|
||||||
nodes.append(&mut Util::evaluate_xpath(&context, "//P", false)?);
|
|
||||||
nodes.append(&mut Util::evaluate_xpath(&context, "//div", false)?);
|
|
||||||
for mut node in nodes {
|
|
||||||
let img_count = Util::get_elements_by_tag_name(&node, "img").len();
|
|
||||||
let embed_count = Util::get_elements_by_tag_name(&node, "embed").len();
|
|
||||||
let object_count = Util::get_elements_by_tag_name(&node, "object").len();
|
|
||||||
let iframe_count = Util::get_elements_by_tag_name(&node, "iframe").len();
|
|
||||||
let total_count = img_count + embed_count + object_count + iframe_count;
|
|
||||||
|
|
||||||
if total_count == 0 && Util::get_inner_text(&node, false).trim().is_empty() {
|
|
||||||
node.unlink();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Util::mark_data_tables(&context)?;
|
Util::mark_data_tables(&context)?;
|
||||||
|
|
||||||
if let Some(mut root) = document.get_root_element() {
|
if let Some(mut root) = document.get_root_element() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue