mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
fmt
This commit is contained in:
parent
e3246af28b
commit
63035ca028
4 changed files with 11 additions and 9 deletions
|
@ -23,7 +23,10 @@ pub fn extract(
|
|||
let new_title = constants::TITLE_CUT_END.replace(&title, "$1");
|
||||
let word_count = constants::WORD_COUNT.split(&title).count();
|
||||
if word_count < 3 {
|
||||
constants::TITLE_CUT_FRONT.replace(&title, "$1").trim().to_string()
|
||||
constants::TITLE_CUT_FRONT
|
||||
.replace(&title, "$1")
|
||||
.trim()
|
||||
.to_string()
|
||||
} else {
|
||||
new_title.trim().to_string()
|
||||
}
|
||||
|
|
|
@ -837,9 +837,7 @@ impl FullTextParser {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn post_process_content(
|
||||
root: &mut Node
|
||||
) -> Result<(), FullTextParserError> {
|
||||
pub(crate) fn post_process_content(root: &mut Node) -> Result<(), FullTextParserError> {
|
||||
Self::clean_classes(root)?;
|
||||
Self::simplify_nested_elements(root)?;
|
||||
Ok(())
|
||||
|
|
|
@ -280,8 +280,7 @@ impl Readability {
|
|||
constants::MINIMUM_TOPCANDIDATES,
|
||||
);
|
||||
for ancestor in alternative_candidate_ancestors.iter().take(tmp) {
|
||||
lists_containing_this_ancestor +=
|
||||
if ancestor == parent { 1 } else { 0 };
|
||||
lists_containing_this_ancestor += if ancestor == parent { 1 } else { 0 };
|
||||
}
|
||||
|
||||
if lists_containing_this_ancestor >= constants::MINIMUM_TOPCANDIDATES {
|
||||
|
|
|
@ -43,11 +43,13 @@ async fn run_test(name: &str) {
|
|||
article.document = Some(article_document);
|
||||
let html = article.get_content().unwrap();
|
||||
|
||||
let expected = std::fs::read_to_string(format!("./resources/tests/readability/{name}/expected.html"))
|
||||
.expect("Failed to read expected HTML");
|
||||
let expected = std::fs::read_to_string(format!(
|
||||
"./resources/tests/readability/{name}/expected.html"
|
||||
))
|
||||
.expect("Failed to read expected HTML");
|
||||
|
||||
//std::fs::write("expected.html", &html).unwrap();
|
||||
|
||||
|
||||
assert_eq!(expected, html);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue