mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-10 01:15:31 +02:00
fix hidden fallback images for wikipedia & add more tests
This commit is contained in:
parent
3fa8c9674d
commit
fcc5cb0e88
9 changed files with 16936 additions and 1 deletions
|
@ -48,6 +48,7 @@ impl Readability {
|
|||
};
|
||||
|
||||
if !Util::is_probably_visible(node_ref) {
|
||||
log::debug!("removing hidden node {match_string}");
|
||||
node = Util::remove_and_next(node_ref);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -547,3 +547,18 @@ async fn wikia() {
|
|||
async fn wikipedia() {
|
||||
run_test("wikipedia").await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wikipedia_2() {
|
||||
run_test("wikipedia-2").await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wikipedia_3() {
|
||||
run_test("wikipedia-3").await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wordpress() {
|
||||
run_test("wordpress").await
|
||||
}
|
||||
|
|
|
@ -245,7 +245,10 @@ impl Util {
|
|||
.get_attribute("aria-hidden")
|
||||
.map(|attr| attr == "true")
|
||||
.unwrap_or(false);
|
||||
let has_fallback_image = node.get_class_names().contains("fallback-image");
|
||||
let has_fallback_image = node
|
||||
.get_class_names()
|
||||
.iter()
|
||||
.any(|class| class.contains("fallback-image"));
|
||||
|
||||
!is_hidden && !aria_hidden || has_fallback_image
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue