mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
Merge branch 'async' into 'master'
Async See merge request news-flash/article_scraper!3
This commit is contained in:
commit
8025e8f004
3 changed files with 13 additions and 6 deletions
|
@ -7,8 +7,8 @@ edition = "2018"
|
|||
[dependencies]
|
||||
failure = "0.1"
|
||||
libxml = { git = "https://github.com/KWARC/rust-libxml.git" }
|
||||
reqwest = { version = "0.10.0-alpha.2", features = ["json"] }
|
||||
tokio = { version = "=0.2.0-alpha.6" }
|
||||
reqwest = { version = "0.10", features = ["json", "native-tls"] }
|
||||
tokio = { version = "=0.2", features = ["macros"] }
|
||||
url = "2.1"
|
||||
regex = "1.3"
|
||||
encoding_rs = "0.8"
|
||||
|
|
|
@ -265,7 +265,7 @@ mod tests {
|
|||
use std::fs;
|
||||
use std::io::Write;
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(basic_scheduler)]
|
||||
async fn close_tags() {
|
||||
let image_dowloader = ImageDownloader::new((2048, 2048));
|
||||
let hdyleaflet = fs::read_to_string(r"./resources/tests/planetGnome/fedora31.html")
|
||||
|
|
13
src/lib.rs
13
src/lib.rs
|
@ -392,7 +392,14 @@ impl ArticleScraper {
|
|||
|
||||
fn strip_id_or_class(context: &Context, id_or_class: &String) -> Result<(), ScraperError> {
|
||||
let xpath = &format!("//*[contains(@class, '{}') or contains(@id, '{}')]", id_or_class, id_or_class);
|
||||
let node_vec = Self::evaluate_xpath(context, xpath, false)?;
|
||||
|
||||
let mut ancestor = xpath.clone();
|
||||
if ancestor.starts_with("//") {
|
||||
ancestor = ancestor.chars().skip(2).collect();
|
||||
}
|
||||
|
||||
let query = &format!("{}[not(ancestor::{})]", xpath, ancestor);
|
||||
let node_vec = Self::evaluate_xpath(context, query, false)?;
|
||||
for mut node in node_vec {
|
||||
node.unlink();
|
||||
}
|
||||
|
@ -725,7 +732,7 @@ impl ArticleScraper {
|
|||
mod tests {
|
||||
use crate::*;
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(basic_scheduler)]
|
||||
async fn golem() {
|
||||
let config_path = PathBuf::from(r"./resources/tests/golem");
|
||||
let out_path = PathBuf::from(r"./test_output");
|
||||
|
@ -739,7 +746,7 @@ mod tests {
|
|||
assert_eq!(article.author, Some(String::from("Hauke Gierow")));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(basic_scheduler)]
|
||||
async fn phoronix() {
|
||||
let config_path = PathBuf::from(r"./resources/tests/phoronix");
|
||||
let out_path = PathBuf::from(r"./test_output");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue