diff --git a/resources/tests/readability/replace-font-tags/expected.html b/resources/tests/readability/replace-font-tags/expected.html new file mode 100644 index 0000000..6b2b9d4 --- /dev/null +++ b/resources/tests/readability/replace-font-tags/expected.html @@ -0,0 +1,18 @@ +
+

Lorem

+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod + tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non + proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +

+

Foo

+

+ Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non + proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +

+
diff --git a/src/full_text_parser/mod.rs b/src/full_text_parser/mod.rs index 6e7e386..9450f78 100644 --- a/src/full_text_parser/mod.rs +++ b/src/full_text_parser/mod.rs @@ -646,6 +646,13 @@ impl FullTextParser { } } + // rename all font nodes to span + if let Ok(font_nodes) = Util::evaluate_xpath(context, "//font", false) { + for mut font_node in font_nodes { + _ = font_node.set_name("span"); + } + } + _ = Util::mark_data_tables(context); // strip specified xpath @@ -1071,10 +1078,6 @@ impl FullTextParser { FullTextParserError::Xml })?; - if node.get_name().to_uppercase() == "FONT" { - node.set_name("span").unwrap(); - } - node_iter = Util::next_node(&node, false); } Ok(()) diff --git a/src/full_text_parser/readability/mod.rs b/src/full_text_parser/readability/mod.rs index 5205157..db2494d 100644 --- a/src/full_text_parser/readability/mod.rs +++ b/src/full_text_parser/readability/mod.rs @@ -705,6 +705,7 @@ impl Readability { 0 }; let score = score + class_weight; + log::debug!("initialize node {} {}: {score}", node.get_name(), node.get_attribute("class").unwrap_or_default()); Self::set_content_score(node, score as f64)?; Ok(()) } diff --git a/src/full_text_parser/readability/tests.rs b/src/full_text_parser/readability/tests.rs index d4950a1..c6d8e8b 100644 --- a/src/full_text_parser/readability/tests.rs +++ b/src/full_text_parser/readability/tests.rs @@ -433,10 +433,10 @@ async fn remove_script_tags() { run_test("remove-script-tags").await } -// #[tokio::test] -// async fn replace_font_tags() { -// run_test("replace-font-tags").await -// } +#[tokio::test] +async fn replace_font_tags() { + run_test("replace-font-tags").await +} #[tokio::test] async fn webmd_1() {