1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-10 01:15:31 +02:00

fix hidden fallback images for wikipedia & add more tests

This commit is contained in:
Jan Lukas Gernert 2023-04-02 09:55:25 +02:00
parent 3fa8c9674d
commit fcc5cb0e88
9 changed files with 16936 additions and 1 deletions

View file

@ -48,6 +48,7 @@ impl Readability {
};
if !Util::is_probably_visible(node_ref) {
log::debug!("removing hidden node {match_string}");
node = Util::remove_and_next(node_ref);
continue;
}

View file

@ -547,3 +547,18 @@ async fn wikia() {
async fn wikipedia() {
run_test("wikipedia").await
}
#[tokio::test]
async fn wikipedia_2() {
run_test("wikipedia-2").await
}
#[tokio::test]
async fn wikipedia_3() {
run_test("wikipedia-3").await
}
#[tokio::test]
async fn wordpress() {
run_test("wordpress").await
}

View file

@ -245,7 +245,10 @@ impl Util {
.get_attribute("aria-hidden")
.map(|attr| attr == "true")
.unwrap_or(false);
let has_fallback_image = node.get_class_names().contains("fallback-image");
let has_fallback_image = node
.get_class_names()
.iter()
.any(|class| class.contains("fallback-image"));
!is_hidden && !aria_hidden || has_fallback_image
}