1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

take url reference

This commit is contained in:
Jan Lukas Gernert 2021-01-21 08:53:51 +01:00
parent cf4c6c42c5
commit 76940232a5

View file

@ -49,7 +49,7 @@ impl ArticleScraper {
pub async fn parse( pub async fn parse(
&self, &self,
url: url::Url, url: &url::Url,
download_images: bool, download_images: bool,
client: &Client, client: &Client,
) -> Result<Article, ScraperError> { ) -> Result<Article, ScraperError> {
@ -77,7 +77,7 @@ impl ArticleScraper {
debug!("Url '{}' redirects to '{}'", url.as_str(), new_url.as_str()); debug!("Url '{}' redirects to '{}'", url.as_str(), new_url.as_str());
new_url new_url
} else { } else {
url url.clone()
}; };
// check if we are dealing with text/html // check if we are dealing with text/html
@ -213,7 +213,7 @@ impl ArticleScraper {
})?) })?)
} }
pub fn evaluate_xpath( fn evaluate_xpath(
xpath_ctx: &Context, xpath_ctx: &Context,
xpath: &str, xpath: &str,
thorw_if_empty: bool, thorw_if_empty: bool,
@ -792,7 +792,7 @@ mod tests {
let url = url::Url::parse("https://www.golem.de/news/http-error-418-fehlercode-ich-bin-eine-teekanne-darf-bleiben-1708-129460.html").unwrap(); let url = url::Url::parse("https://www.golem.de/news/http-error-418-fehlercode-ich-bin-eine-teekanne-darf-bleiben-1708-129460.html").unwrap();
let grabber = ArticleScraper::new(config_path); let grabber = ArticleScraper::new(config_path);
let article = grabber.parse(url, true, &Client::new()).await.unwrap(); let article = grabber.parse(&url, true, &Client::new()).await.unwrap();
article.save_html(&out_path).unwrap(); article.save_html(&out_path).unwrap();
assert_eq!( assert_eq!(
@ -814,7 +814,7 @@ mod tests {
.unwrap(); .unwrap();
let grabber = ArticleScraper::new(config_path); let grabber = ArticleScraper::new(config_path);
let article = grabber.parse(url, true, &Client::new()).await.unwrap(); let article = grabber.parse(&url, true, &Client::new()).await.unwrap();
article.save_html(&out_path).unwrap(); article.save_html(&out_path).unwrap();
assert_eq!( assert_eq!(
@ -831,7 +831,7 @@ mod tests {
let url = url::Url::parse("https://www.youtube.com/watch?v=lHRkYLcmFY8").unwrap(); let url = url::Url::parse("https://www.youtube.com/watch?v=lHRkYLcmFY8").unwrap();
let grabber = ArticleScraper::new(config_path); let grabber = ArticleScraper::new(config_path);
let article = grabber.parse(url, false, &Client::new()).await.unwrap(); let article = grabber.parse(&url, false, &Client::new()).await.unwrap();
assert_eq!( assert_eq!(
article.html, article.html,