diff --git a/resources/tests/readability/cnn/expected.html b/resources/tests/readability/cnn/expected.html index ce8de01..ef9bcf8 100644 --- a/resources/tests/readability/cnn/expected.html +++ b/resources/tests/readability/cnn/expected.html @@ -1,7 +1,6 @@

The U.S. has long been heralded as a land of opportunity -- a place where anyone can succeed regardless of the economic class they were born into.

But a new report released on Monday by Stanford University's Center on Poverty and Inequality calls that into question.

-

The report assessed poverty levels, income and wealth inequality, economic mobility and unemployment levels among 10 wealthy countries with social welfare programs.

Powered by SmartAsset.com diff --git a/src/full_text_parser/mod.rs b/src/full_text_parser/mod.rs index 66cd5f5..959e518 100644 --- a/src/full_text_parser/mod.rs +++ b/src/full_text_parser/mod.rs @@ -848,8 +848,6 @@ impl FullTextParser { Util::mark_data_tables(&context)?; if let Some(mut root) = document.get_root_element() { - Self::remove_extra_p_and_div(&mut root); - Util::clean_conditionally(&mut root, "fieldset"); Util::clean_conditionally(&mut root, "table"); Util::clean_conditionally(&mut root, "ul"); @@ -857,6 +855,8 @@ impl FullTextParser { Self::clean_attributes(&mut root)?; Self::simplify_nested_elements(&mut root)?; + + Self::remove_extra_p_and_div(&mut root); } Ok(()) @@ -866,7 +866,7 @@ impl FullTextParser { let mut node_iter = Some(root.clone()); while let Some(mut node) = node_iter { - let tag_name = node.get_name(); + let tag_name = node.get_name().to_uppercase(); if tag_name == "P" || tag_name == "DIV" { let img_count = Util::get_elements_by_tag_name(&node, "img").len(); let embed_count = Util::get_elements_by_tag_name(&node, "embed").len(); @@ -875,7 +875,9 @@ impl FullTextParser { let total_count = img_count + embed_count + object_count + iframe_count; if total_count == 0 && Util::get_inner_text(&node, false).trim().is_empty() { + node_iter = Util::next_node(&node, false); node.unlink(); + continue; } }