mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
fix replacing font tags
This commit is contained in:
parent
253afc48f0
commit
be6e08bd6d
4 changed files with 30 additions and 8 deletions
18
resources/tests/readability/replace-font-tags/expected.html
Normal file
18
resources/tests/readability/replace-font-tags/expected.html
Normal file
|
@ -0,0 +1,18 @@
|
|||
<article><DIV id="readability-page-1"><article>
|
||||
<h2>Lorem</h2>
|
||||
<p><span face="Arial" size="2"><span face="Times" size="10">Lorem</span> ipsum dolor</span> sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. <span face="Arial" size="2">Duis</span> aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
</p>
|
||||
<h2>Foo</h2>
|
||||
<p>
|
||||
Tempor incididunt ut labore et <span face="Arial" size="2">dolore</span> magna aliqua. Ut enim ad minim veniam,
|
||||
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||
cillum dolore eu fugiat nulla pariatur. <span face="Arial" size="2">Excepteur sint occaecat</span> cupidatat non
|
||||
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
</p>
|
||||
</article></DIV></article>
|
|
@ -646,6 +646,13 @@ impl FullTextParser {
|
|||
}
|
||||
}
|
||||
|
||||
// rename all font nodes to span
|
||||
if let Ok(font_nodes) = Util::evaluate_xpath(context, "//font", false) {
|
||||
for mut font_node in font_nodes {
|
||||
_ = font_node.set_name("span");
|
||||
}
|
||||
}
|
||||
|
||||
_ = Util::mark_data_tables(context);
|
||||
|
||||
// strip specified xpath
|
||||
|
@ -1071,10 +1078,6 @@ impl FullTextParser {
|
|||
FullTextParserError::Xml
|
||||
})?;
|
||||
|
||||
if node.get_name().to_uppercase() == "FONT" {
|
||||
node.set_name("span").unwrap();
|
||||
}
|
||||
|
||||
node_iter = Util::next_node(&node, false);
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
@ -705,6 +705,7 @@ impl Readability {
|
|||
0
|
||||
};
|
||||
let score = score + class_weight;
|
||||
log::debug!("initialize node {} {}: {score}", node.get_name(), node.get_attribute("class").unwrap_or_default());
|
||||
Self::set_content_score(node, score as f64)?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -433,10 +433,10 @@ async fn remove_script_tags() {
|
|||
run_test("remove-script-tags").await
|
||||
}
|
||||
|
||||
// #[tokio::test]
|
||||
// async fn replace_font_tags() {
|
||||
// run_test("replace-font-tags").await
|
||||
// }
|
||||
#[tokio::test]
|
||||
async fn replace_font_tags() {
|
||||
run_test("replace-font-tags").await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn webmd_1() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue