mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-09 00:45:31 +02:00
make cleaning more obvious
This commit is contained in:
parent
11e08ae505
commit
280c516cbe
4 changed files with 27 additions and 23 deletions
|
@ -497,10 +497,9 @@ impl Readability {
|
|||
}
|
||||
}
|
||||
|
||||
crate::FullTextParser::post_process_content(
|
||||
&mut article_content,
|
||||
state.clean_conditionally,
|
||||
)?;
|
||||
if state.clean_conditionally {
|
||||
crate::FullTextParser::post_process_page(&mut article_content)?;
|
||||
}
|
||||
|
||||
if needed_to_create_top_candidate {
|
||||
// We already created a fake div thing, and there wouldn't have been any siblings left
|
||||
|
|
|
@ -34,9 +34,7 @@ async fn run_test(name: &str) {
|
|||
|
||||
metadata::extract(&xpath_ctx, None, None, &mut article);
|
||||
super::Readability::extract_body(document, &mut root, article.title.as_deref()).unwrap();
|
||||
if let Some(mut root) = article_document.get_root_element() {
|
||||
crate::FullTextParser::post_process_content(&mut root, false).unwrap();
|
||||
}
|
||||
crate::FullTextParser::post_process_document(&article_document).unwrap();
|
||||
|
||||
article.document = Some(article_document);
|
||||
let html = article.get_content().unwrap();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue