1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-09 00:45:31 +02:00

make cleaning more obvious

This commit is contained in:
Jan Lukas Gernert 2023-03-19 23:09:06 +01:00
parent 11e08ae505
commit 280c516cbe
4 changed files with 27 additions and 23 deletions

View file

@ -497,10 +497,9 @@ impl Readability {
}
}
crate::FullTextParser::post_process_content(
&mut article_content,
state.clean_conditionally,
)?;
if state.clean_conditionally {
crate::FullTextParser::post_process_page(&mut article_content)?;
}
if needed_to_create_top_candidate {
// We already created a fake div thing, and there wouldn't have been any siblings left

View file

@ -34,9 +34,7 @@ async fn run_test(name: &str) {
metadata::extract(&xpath_ctx, None, None, &mut article);
super::Readability::extract_body(document, &mut root, article.title.as_deref()).unwrap();
if let Some(mut root) = article_document.get_root_element() {
crate::FullTextParser::post_process_content(&mut root, false).unwrap();
}
crate::FullTextParser::post_process_document(&article_document).unwrap();
article.document = Some(article_document);
let html = article.get_content().unwrap();