diff --git a/Cargo.toml b/Cargo.toml index b423f30..b85cc40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,8 @@ edition = "2018" [dependencies] failure = "0.1" libxml = { git = "https://github.com/KWARC/rust-libxml.git" } -reqwest = { version = "0.10.0-alpha.2", features = ["json"] } -tokio = { version = "=0.2.0-alpha.6" } +reqwest = { version = "0.10", features = ["json", "native-tls"] } +tokio = { version = "=0.2", features = ["macros"] } url = "2.1" regex = "1.3" encoding_rs = "0.8" diff --git a/src/images/mod.rs b/src/images/mod.rs index e2d2b54..33f2dc9 100644 --- a/src/images/mod.rs +++ b/src/images/mod.rs @@ -265,7 +265,7 @@ mod tests { use std::fs; use std::io::Write; - #[tokio::test] + #[tokio::test(basic_scheduler)] async fn close_tags() { let image_dowloader = ImageDownloader::new((2048, 2048)); let hdyleaflet = fs::read_to_string(r"./resources/tests/planetGnome/fedora31.html") diff --git a/src/lib.rs b/src/lib.rs index a331c25..e3c7c22 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -392,7 +392,14 @@ impl ArticleScraper { fn strip_id_or_class(context: &Context, id_or_class: &String) -> Result<(), ScraperError> { let xpath = &format!("//*[contains(@class, '{}') or contains(@id, '{}')]", id_or_class, id_or_class); - let node_vec = Self::evaluate_xpath(context, xpath, false)?; + + let mut ancestor = xpath.clone(); + if ancestor.starts_with("//") { + ancestor = ancestor.chars().skip(2).collect(); + } + + let query = &format!("{}[not(ancestor::{})]", xpath, ancestor); + let node_vec = Self::evaluate_xpath(context, query, false)?; for mut node in node_vec { node.unlink(); } @@ -725,7 +732,7 @@ impl ArticleScraper { mod tests { use crate::*; - #[tokio::test] + #[tokio::test(basic_scheduler)] async fn golem() { let config_path = PathBuf::from(r"./resources/tests/golem"); let out_path = PathBuf::from(r"./test_output"); @@ -739,7 +746,7 @@ mod tests { assert_eq!(article.author, Some(String::from("Hauke Gierow"))); } - #[tokio::test] + #[tokio::test(basic_scheduler)] async fn phoronix() { let config_path = PathBuf::from(r"./resources/tests/phoronix"); let out_path = PathBuf::from(r"./test_output");