From 1e71aa2bfb7a408a2a331ace02820bb4b5809c05 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Fri, 10 Mar 2023 22:17:53 +0100 Subject: [PATCH] remove duplicate code --- src/full_text_parser/mod.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/full_text_parser/mod.rs b/src/full_text_parser/mod.rs index 404646c..66cd5f5 100644 --- a/src/full_text_parser/mod.rs +++ b/src/full_text_parser/mod.rs @@ -845,22 +845,6 @@ impl FullTextParser { })?; } - // Remove extra paragraphs & divs - let mut nodes = Util::evaluate_xpath(&context, "//p", false)?; - nodes.append(&mut Util::evaluate_xpath(&context, "//P", false)?); - nodes.append(&mut Util::evaluate_xpath(&context, "//div", false)?); - for mut node in nodes { - let img_count = Util::get_elements_by_tag_name(&node, "img").len(); - let embed_count = Util::get_elements_by_tag_name(&node, "embed").len(); - let object_count = Util::get_elements_by_tag_name(&node, "object").len(); - let iframe_count = Util::get_elements_by_tag_name(&node, "iframe").len(); - let total_count = img_count + embed_count + object_count + iframe_count; - - if total_count == 0 && Util::get_inner_text(&node, false).trim().is_empty() { - node.unlink(); - } - } - Util::mark_data_tables(&context)?; if let Some(mut root) = document.get_root_element() {