1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

Merge branch 'async' into 'master'

Async

See merge request news-flash/article_scraper!3
This commit is contained in:
Jan Lukas Gernert 2020-01-19 21:15:13 +00:00
commit 8025e8f004
3 changed files with 13 additions and 6 deletions

View file

@ -7,8 +7,8 @@ edition = "2018"
[dependencies]
failure = "0.1"
libxml = { git = "https://github.com/KWARC/rust-libxml.git" }
reqwest = { version = "0.10.0-alpha.2", features = ["json"] }
tokio = { version = "=0.2.0-alpha.6" }
reqwest = { version = "0.10", features = ["json", "native-tls"] }
tokio = { version = "=0.2", features = ["macros"] }
url = "2.1"
regex = "1.3"
encoding_rs = "0.8"

View file

@ -265,7 +265,7 @@ mod tests {
use std::fs;
use std::io::Write;
#[tokio::test]
#[tokio::test(basic_scheduler)]
async fn close_tags() {
let image_dowloader = ImageDownloader::new((2048, 2048));
let hdyleaflet = fs::read_to_string(r"./resources/tests/planetGnome/fedora31.html")

View file

@ -392,7 +392,14 @@ impl ArticleScraper {
fn strip_id_or_class(context: &Context, id_or_class: &String) -> Result<(), ScraperError> {
let xpath = &format!("//*[contains(@class, '{}') or contains(@id, '{}')]", id_or_class, id_or_class);
let node_vec = Self::evaluate_xpath(context, xpath, false)?;
let mut ancestor = xpath.clone();
if ancestor.starts_with("//") {
ancestor = ancestor.chars().skip(2).collect();
}
let query = &format!("{}[not(ancestor::{})]", xpath, ancestor);
let node_vec = Self::evaluate_xpath(context, query, false)?;
for mut node in node_vec {
node.unlink();
}
@ -725,7 +732,7 @@ impl ArticleScraper {
mod tests {
use crate::*;
#[tokio::test]
#[tokio::test(basic_scheduler)]
async fn golem() {
let config_path = PathBuf::from(r"./resources/tests/golem");
let out_path = PathBuf::from(r"./test_output");
@ -739,7 +746,7 @@ mod tests {
assert_eq!(article.author, Some(String::from("Hauke Gierow")));
}
#[tokio::test]
#[tokio::test(basic_scheduler)]
async fn phoronix() {
let config_path = PathBuf::from(r"./resources/tests/phoronix");
let out_path = PathBuf::from(r"./test_output");