From 1ecc0fc4b46618e3358f9316943d221f070f7820 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Mon, 3 Feb 2020 17:46:54 +0100 Subject: [PATCH] option to set custom reqwest client --- src/images/mod.rs | 22 +++++++++++++++------- src/lib.rs | 33 +++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/images/mod.rs b/src/images/mod.rs index c3326af..42d1a03 100644 --- a/src/images/mod.rs +++ b/src/images/mod.rs @@ -7,7 +7,7 @@ use libxml::parser::Parser; use libxml::tree::{Node, SaveOptions}; use libxml::xpath::Context; use log::{debug, error}; -use reqwest; +use reqwest::{Client, Response}; use std; use std::error::Error; use url; @@ -15,18 +15,26 @@ use url; mod error; pub struct ImageDownloader { - client: reqwest::Client, + client: Client, max_size: (u32, u32), } impl ImageDownloader { - pub fn new(max_size: (u32, u32)) -> ImageDownloader { + pub fn new(max_size: (u32, u32)) -> Self { + Self::new_with_client(max_size, Client::new()) + } + + pub fn new_with_client(max_size: (u32, u32), client: Client) -> Self { ImageDownloader { - client: reqwest::Client::new(), - max_size: max_size, + client, + max_size, } } + pub fn set_client(&mut self, client: Client) { + self.client = client; + } + pub async fn download_images_from_string( &self, html: &str, @@ -185,7 +193,7 @@ impl ImageDownloader { } fn check_image_content_type( - response: &reqwest::Response, + response: &Response, ) -> Result { if response.status().is_success() { if let Some(content_type) = response.headers().get(reqwest::header::CONTENT_TYPE) { @@ -301,7 +309,7 @@ impl ImageDownloader { Err(ImageDownloadErrorKind::ParentDownload)? } - fn get_content_lenght(response: &reqwest::Response) -> Result { + fn get_content_lenght(response: &Response) -> Result { if response.status().is_success() { if let Some(content_length) = response.headers().get(reqwest::header::CONTENT_LENGTH) { if let Ok(content_length) = content_length.to_str() { diff --git a/src/lib.rs b/src/lib.rs index 51ebb68..2043bda 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ use libxml::tree::{Document, Node, SaveOptions}; use libxml::xpath::Context; use log::{debug, error, info, warn}; use regex; -use reqwest; +use reqwest::{Client, Response}; use std::collections; use std::error::Error; use std::path::PathBuf; @@ -27,11 +27,15 @@ use url; pub struct ArticleScraper { pub image_downloader: ImageDownloader, config_files: Arc>>, - client: reqwest::Client, + client: Client, } impl ArticleScraper { - pub fn new(config_path: PathBuf) -> Result { + pub fn new(config_path: PathBuf) -> Self { + Self::new_with_client(config_path, Client::new()) + } + + pub fn new_with_client(config_path: PathBuf, client: Client) -> Self { let config_files = Arc::new(RwLock::new(None)); let locked_config_files = config_files.clone(); @@ -49,11 +53,16 @@ impl ArticleScraper { } }); - Ok(ArticleScraper { - image_downloader: ImageDownloader::new((2048, 2048)), + ArticleScraper { + image_downloader: ImageDownloader::new_with_client((2048, 2048), client.clone()), config_files, - client: reqwest::Client::new(), - }) + client, + } + } + + pub fn set_client(&mut self, client: Client) { + self.client = client.clone(); + self.image_downloader.set_client(client); } pub async fn parse( @@ -263,7 +272,7 @@ impl ArticleScraper { Ok(()) } - async fn download(url: &url::Url, client: &reqwest::Client) -> Result { + async fn download(url: &url::Url, client: &Client) -> Result { let response = client .get(url.as_str()) .send() @@ -373,7 +382,7 @@ impl ArticleScraper { } } - fn check_content_type(response: &reqwest::Response) -> Result { + fn check_content_type(response: &Response) -> Result { if response.status().is_success() { if let Some(content_type) = response.headers().get(reqwest::header::CONTENT_TYPE) { if let Ok(content_type) = content_type.to_str() { @@ -391,7 +400,7 @@ impl ArticleScraper { Err(ScraperErrorKind::Http)? } - fn check_redirect(response: &reqwest::Response) -> Option { + fn check_redirect(response: &Response) -> Option { if response.status() == reqwest::StatusCode::PERMANENT_REDIRECT { debug!("Article url redirects to '{}'", response.url().as_str()); return Some(response.url().clone()); @@ -809,7 +818,7 @@ mod tests { let out_path = PathBuf::from(r"./test_output"); let url = url::Url::parse("https://www.golem.de/news/http-error-418-fehlercode-ich-bin-eine-teekanne-darf-bleiben-1708-129460.html").unwrap(); - let grabber = ArticleScraper::new(config_path).unwrap(); + let grabber = ArticleScraper::new(config_path); let article = grabber.parse(url, true).await.unwrap(); article.save_html(&out_path).unwrap(); @@ -831,7 +840,7 @@ mod tests { ) .unwrap(); - let grabber = ArticleScraper::new(config_path).unwrap(); + let grabber = ArticleScraper::new(config_path); let article = grabber.parse(url, true).await.unwrap(); article.save_html(&out_path).unwrap();