1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-09 00:45:31 +02:00

fix replacing font tags

This commit is contained in:
Jan Lukas Gernert 2023-04-01 12:31:56 +02:00
parent 253afc48f0
commit be6e08bd6d
4 changed files with 30 additions and 8 deletions

View file

@ -646,6 +646,13 @@ impl FullTextParser {
}
}
// rename all font nodes to span
if let Ok(font_nodes) = Util::evaluate_xpath(context, "//font", false) {
for mut font_node in font_nodes {
_ = font_node.set_name("span");
}
}
_ = Util::mark_data_tables(context);
// strip specified xpath
@ -1071,10 +1078,6 @@ impl FullTextParser {
FullTextParserError::Xml
})?;
if node.get_name().to_uppercase() == "FONT" {
node.set_name("span").unwrap();
}
node_iter = Util::next_node(&node, false);
}
Ok(())

View file

@ -705,6 +705,7 @@ impl Readability {
0
};
let score = score + class_weight;
log::debug!("initialize node {} {}: {score}", node.get_name(), node.get_attribute("class").unwrap_or_default());
Self::set_content_score(node, score as f64)?;
Ok(())
}

View file

@ -433,10 +433,10 @@ async fn remove_script_tags() {
run_test("remove-script-tags").await
}
// #[tokio::test]
// async fn replace_font_tags() {
// run_test("replace-font-tags").await
// }
#[tokio::test]
async fn replace_font_tags() {
run_test("replace-font-tags").await
}
#[tokio::test]
async fn webmd_1() {