From 27be5a3204faef0d3ab709eecabb606e9caef549 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Thu, 1 Dec 2022 09:22:08 +0100 Subject: [PATCH] port failure -> thiserror --- Cargo.toml | 10 +- src/error.rs | 68 +++---------- src/full_text_parser/config/config_entry.rs | 7 +- src/full_text_parser/config/error.rs | 62 ++---------- src/full_text_parser/config/mod.rs | 1 + src/full_text_parser/error.rs | 74 +++----------- src/full_text_parser/mod.rs | 59 ++++++----- src/images/error.rs | 92 +++-------------- src/images/mod.rs | 106 +++++++++----------- src/lib.rs | 2 +- src/util.rs | 22 ++-- 11 files changed, 137 insertions(+), 366 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a11749c..9d2b801 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,16 +8,16 @@ description = "Scrap article contents from the web. Powered by fivefilters full repository = "https://gitlab.com/news-flash/article_scraper" [dependencies] -failure = "0.1" +thiserror = "1.0" libxml = "0.3" reqwest = { version = "0.11", features = ["json", "native-tls", "gzip", "brotli"] } -tokio = { version = "1.21", features = ["macros", "fs", "io-util"] } -url = "2.2" -regex = "1.4" +tokio = { version = "1.22", features = ["macros", "fs", "io-util"] } +url = "2.3" +regex = "1.7" encoding_rs = "0.8" chrono = "0.4" base64 = "0.13" image = "0.24" log = "0.4" rust-embed="6.4" -once_cell = "1.15" \ No newline at end of file +once_cell = "1.16" \ No newline at end of file diff --git a/src/error.rs b/src/error.rs index 297f0a9..4f915fd 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,57 +1,15 @@ -use failure::{Backtrace, Context, Error, Fail}; -use std::fmt; +use crate::{ + full_text_parser::{config::ConfigError, error::FullTextParserError}, + images::ImageDownloadError, +}; +use thiserror::Error; -#[derive(Debug)] -pub struct ScraperError { - inner: Context, -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug, Fail)] -pub enum ScraperErrorKind { - #[fail(display = "Unknown Error")] - Unknown, -} - -impl Fail for ScraperError { - fn cause(&self) -> Option<&dyn Fail> { - self.inner.cause() - } - - fn backtrace(&self) -> Option<&Backtrace> { - self.inner.backtrace() - } -} - -impl fmt::Display for ScraperError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.inner, f) - } -} - -impl ScraperError { - pub fn kind(&self) -> ScraperErrorKind { - *self.inner.get_context() - } -} - -impl From for ScraperError { - fn from(kind: ScraperErrorKind) -> ScraperError { - ScraperError { - inner: Context::new(kind), - } - } -} - -impl From> for ScraperError { - fn from(inner: Context) -> ScraperError { - ScraperError { inner } - } -} - -impl From for ScraperError { - fn from(_: Error) -> ScraperError { - ScraperError { - inner: Context::new(ScraperErrorKind::Unknown), - } - } +#[derive(Error, Debug)] +pub enum ScraperError { + #[error("")] + Config(#[from] ConfigError), + #[error("")] + Image(#[from] ImageDownloadError), + #[error("")] + Scrap(#[from] FullTextParserError), } diff --git a/src/full_text_parser/config/config_entry.rs b/src/full_text_parser/config/config_entry.rs index 6db4504..4fa2e8e 100644 --- a/src/full_text_parser/config/config_entry.rs +++ b/src/full_text_parser/config/config_entry.rs @@ -1,7 +1,6 @@ use crate::util::Util; -use super::error::{ConfigError, ConfigErrorKind}; -use failure::ResultExt; +use super::error::ConfigError; use std::borrow::Cow; use std::io::Cursor; use std::path::Path; @@ -37,9 +36,7 @@ pub struct ConfigEntry { impl ConfigEntry { pub async fn parse_path(config_path: &Path) -> Result { - let mut file = fs::File::open(&config_path) - .await - .context(ConfigErrorKind::IO)?; + let mut file = fs::File::open(&config_path).await?; let buffer = BufReader::new(&mut file); Self::parse(buffer).await diff --git a/src/full_text_parser/config/error.rs b/src/full_text_parser/config/error.rs index a93587a..1988e19 100644 --- a/src/full_text_parser/config/error.rs +++ b/src/full_text_parser/config/error.rs @@ -1,59 +1,9 @@ -use failure::{Backtrace, Context, Error, Fail}; -use std::fmt; +use thiserror::Error; -#[derive(Debug)] -pub struct ConfigError { - inner: Context, -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug, Fail)] -pub enum ConfigErrorKind { - #[fail(display = "IO Error")] - IO, - #[fail(display = "Unknown Error")] +#[derive(Error, Debug)] +pub enum ConfigError { + #[error("IO error")] + IO(#[from] std::io::Error), + #[error("Unknown Error")] Unknown, } - -impl Fail for ConfigError { - fn cause(&self) -> Option<&dyn Fail> { - self.inner.cause() - } - - fn backtrace(&self) -> Option<&Backtrace> { - self.inner.backtrace() - } -} - -impl fmt::Display for ConfigError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.inner, f) - } -} - -// impl ConfigError { -// pub fn kind(&self) -> ConfigErrorKind { -// *self.inner.get_context() -// } -// } - -impl From for ConfigError { - fn from(kind: ConfigErrorKind) -> ConfigError { - ConfigError { - inner: Context::new(kind), - } - } -} - -impl From> for ConfigError { - fn from(inner: Context) -> ConfigError { - ConfigError { inner } - } -} - -impl From for ConfigError { - fn from(_: Error) -> ConfigError { - ConfigError { - inner: Context::new(ConfigErrorKind::Unknown), - } - } -} diff --git a/src/full_text_parser/config/mod.rs b/src/full_text_parser/config/mod.rs index 22bb3b4..5f63ade 100644 --- a/src/full_text_parser/config/mod.rs +++ b/src/full_text_parser/config/mod.rs @@ -6,3 +6,4 @@ mod error; pub use config_collection::ConfigCollection; pub use config_entry::ConfigEntry; +pub use error::ConfigError; diff --git a/src/full_text_parser/error.rs b/src/full_text_parser/error.rs index 4045e06..6792e62 100644 --- a/src/full_text_parser/error.rs +++ b/src/full_text_parser/error.rs @@ -1,71 +1,21 @@ -use failure::{Backtrace, Context, Error, Fail}; -use std::fmt; +use thiserror::Error; -#[derive(Debug)] -pub struct FullTextParserError { - inner: Context, -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug, Fail)] -pub enum FullTextParserErrorKind { - #[fail(display = "libXml Error")] +#[derive(Error, Debug)] +pub enum FullTextParserError { + #[error("libXml Error")] Xml, - #[fail(display = "No content found")] + #[error("No content found")] Scrape, - #[fail(display = "Url Error")] - Url, - #[fail(display = "Http request failed")] + #[error("Url Error")] + Url(#[from] url::ParseError), + #[error("Http request failed")] Http, - #[fail(display = "Config Error")] + #[error("Config Error")] Config, - #[fail(display = "IO Error")] + #[error("IO Error")] IO, - #[fail(display = "Content-type suggest no html")] + #[error("Content-type suggest no html")] ContentType, - #[fail(display = "Unknown Error")] + #[error("Unknown Error")] Unknown, } - -impl Fail for FullTextParserError { - fn cause(&self) -> Option<&dyn Fail> { - self.inner.cause() - } - - fn backtrace(&self) -> Option<&Backtrace> { - self.inner.backtrace() - } -} - -impl fmt::Display for FullTextParserError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.inner, f) - } -} - -impl FullTextParserError { - pub fn kind(&self) -> FullTextParserErrorKind { - *self.inner.get_context() - } -} - -impl From for FullTextParserError { - fn from(kind: FullTextParserErrorKind) -> FullTextParserError { - FullTextParserError { - inner: Context::new(kind), - } - } -} - -impl From> for FullTextParserError { - fn from(inner: Context) -> FullTextParserError { - FullTextParserError { inner } - } -} - -impl From for FullTextParserError { - fn from(_: Error) -> FullTextParserError { - FullTextParserError { - inner: Context::new(FullTextParserErrorKind::Unknown), - } - } -} diff --git a/src/full_text_parser/mod.rs b/src/full_text_parser/mod.rs index b41ac73..173985e 100644 --- a/src/full_text_parser/mod.rs +++ b/src/full_text_parser/mod.rs @@ -6,12 +6,11 @@ mod fingerprints; mod tests; use self::config::{ConfigCollection, ConfigEntry}; -use self::error::{FullTextParserError, FullTextParserErrorKind}; +use self::error::FullTextParserError; use crate::article::Article; use crate::util::Util; use chrono::DateTime; use encoding_rs::Encoding; -use failure::ResultExt; use fingerprints::Fingerprints; use libxml::parser::Parser; use libxml::tree::{Document, Node, SaveOptions}; @@ -44,7 +43,7 @@ impl FullTextParser { let global_config = self .config_files .get("global.txt") - .ok_or(FullTextParserErrorKind::Config)?; + .ok_or(FullTextParserError::Config)?; let headers = Util::generate_headers(config, global_config)?; @@ -55,9 +54,8 @@ impl FullTextParser { .await .map_err(|err| { error!("Failed head request to: '{}' - '{}'", url.as_str(), err); - err - }) - .context(FullTextParserErrorKind::Http)?; + FullTextParserError::Http + })?; // check if url redirects and we need to pick up the new url let url = if let Some(new_url) = Util::check_redirect(&response, url) { @@ -69,7 +67,7 @@ impl FullTextParser { // check if we are dealing with text/html if !Util::check_content_type(&response)? { - return Err(FullTextParserErrorKind::ContentType.into()); + return Err(FullTextParserError::ContentType); } let mut article = Article { @@ -80,9 +78,9 @@ impl FullTextParser { html: None, }; - let mut document = Document::new().map_err(|()| FullTextParserErrorKind::Xml)?; + let mut document = Document::new().map_err(|()| FullTextParserError::Xml)?; let mut root = - Node::new("article", None, &document).map_err(|()| FullTextParserErrorKind::Xml)?; + Node::new("article", None, &document).map_err(|()| FullTextParserError::Xml)?; document.set_root_element(&root); Self::generate_head(&mut root, &document)?; @@ -92,7 +90,7 @@ impl FullTextParser { let context = Context::new(&document).map_err(|()| { error!("Failed to create xpath context for extracted article"); - FullTextParserErrorKind::Xml + FullTextParserError::Xml })?; if let Err(error) = Self::prevent_self_closing_tags(&context) { @@ -209,14 +207,14 @@ impl FullTextParser { let parser = Parser::default_html(); Ok(parser.parse_string(html.as_str()).map_err(|err| { error!("Parsing HTML failed for downloaded HTML {:?}", err); - FullTextParserErrorKind::Xml + FullTextParserError::Xml })?) } fn get_xpath_ctx(doc: &Document) -> Result { Ok(Context::new(doc).map_err(|()| { error!("Creating xpath context failed for downloaded HTML"); - FullTextParserErrorKind::Xml + FullTextParserError::Xml })?) } @@ -256,16 +254,15 @@ impl FullTextParser { url.as_str(), err ); - err - }) - .context(FullTextParserErrorKind::Http)?; + FullTextParserError::Http + })?; if response.status().is_success() { let headers = response.headers().clone(); let text = response .text() .await - .context(FullTextParserErrorKind::Http)?; + .map_err(|_| FullTextParserError::Http)?; { if let Some(decoded_html) = Self::decode_html(&text, Self::get_encoding_from_html(&text)) @@ -284,7 +281,7 @@ impl FullTextParser { return Ok(text); } - Err(FullTextParserErrorKind::Http.into()) + Err(FullTextParserError::Http) } fn get_encoding_from_http_header(headers: &reqwest::header::HeaderMap) -> Option<&str> { @@ -338,7 +335,7 @@ impl FullTextParser { } None => { error!("Getting config failed due to bad Url"); - Err(FullTextParserErrorKind::Config.into()) + Err(FullTextParserError::Config) } } } @@ -366,7 +363,7 @@ impl FullTextParser { for mut node in node_vec { if let Some(correct_url) = node.get_property(property_url) { if node.set_property("src", &correct_url).is_err() { - return Err(FullTextParserErrorKind::Xml.into()); + return Err(FullTextParserError::Xml); } } } @@ -385,7 +382,7 @@ impl FullTextParser { node.unlink(); video_wrapper.add_child(&mut node).map_err(|_| { error!("Failed to add iframe as child of video wrapper
"); - FullTextParserErrorKind::Xml + FullTextParserError::Xml })?; } } @@ -393,7 +390,7 @@ impl FullTextParser { } error!("Failed to add video wrapper
as parent of iframe"); - return Err(FullTextParserErrorKind::Xml.into()); + return Err(FullTextParserError::Xml); } error!("Failed to get parent of iframe"); @@ -413,7 +410,7 @@ impl FullTextParser { let node_vec = Util::evaluate_xpath(context, xpath, false)?; for mut node in node_vec { if node.remove_property(attribute).is_err() { - return Err(FullTextParserErrorKind::Xml.into()); + return Err(FullTextParserError::Xml); } } Ok(()) @@ -431,7 +428,7 @@ impl FullTextParser { let node_vec = Util::evaluate_xpath(context, xpath, false)?; for mut node in node_vec { if node.set_attribute(attribute, value).is_err() { - return Err(FullTextParserErrorKind::Xml.into()); + return Err(FullTextParserError::Xml); } } Ok(()) @@ -449,7 +446,7 @@ impl FullTextParser { } } - Err(FullTextParserErrorKind::Xml.into()) + Err(FullTextParserError::Xml) } fn repair_urls( @@ -464,7 +461,7 @@ impl FullTextParser { if let Err(url::ParseError::RelativeUrlWithoutBase) = url::Url::parse(&val) { if let Ok(fixed_url) = Self::complete_url(article_url, &val) { if node.set_attribute(attribute, fixed_url.as_str()).is_err() { - return Err(FullTextParserErrorKind::Scrape.into()); + return Err(FullTextParserError::Scrape); } } } @@ -486,7 +483,7 @@ impl FullTextParser { completed_url.push_str("//"); completed_url.push_str(host); } - _ => return Err(FullTextParserErrorKind::Scrape.into()), + _ => return Err(FullTextParserError::Scrape), }; } @@ -494,7 +491,7 @@ impl FullTextParser { completed_url.push('/'); } completed_url.push_str(incomplete_url); - let url = url::Url::parse(&completed_url).context(FullTextParserErrorKind::Url)?; + let url = url::Url::parse(&completed_url)?; Ok(url) } @@ -678,7 +675,7 @@ impl FullTextParser { if !found_something { log::error!("no body found"); - return Err(FullTextParserErrorKind::Scrape.into()); + return Err(FullTextParserError::Scrape); } Ok(()) @@ -694,7 +691,7 @@ impl FullTextParser { let node_vec = Util::evaluate_xpath(context, xpath, false)?; for mut node in node_vec { if node.get_property("style").is_some() && node.remove_property("style").is_err() { - return Err(FullTextParserErrorKind::Xml.into()); + return Err(FullTextParserError::Xml); } node.unlink(); @@ -702,7 +699,7 @@ impl FullTextParser { found_something = true; } else { error!("Failed to add body to prepared document"); - return Err(FullTextParserErrorKind::Xml.into()); + return Err(FullTextParserError::Xml); } } } @@ -748,7 +745,7 @@ impl FullTextParser { } } - Err(FullTextParserErrorKind::Xml.into()) + Err(FullTextParserError::Xml) } fn prevent_self_closing_tags(context: &Context) -> Result<(), FullTextParserError> { diff --git a/src/images/error.rs b/src/images/error.rs index 15a3dac..831313b 100644 --- a/src/images/error.rs +++ b/src/images/error.rs @@ -1,89 +1,25 @@ -use crate::full_text_parser::error::FullTextParserErrorKind; -use failure::{Backtrace, Context, Error, Fail}; -use std::fmt; +use thiserror::Error; -#[derive(Debug)] -pub struct ImageDownloadError { - inner: Context, -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug, Fail)] -pub enum ImageDownloadErrorKind { - #[fail(display = "Parsing the supplied html string failed")] +#[derive(Error, Debug)] +pub enum ImageDownloadError { + #[error("Parsing the supplied html string failed")] HtmlParse, - #[fail(display = "Scaling down a downloaded image failed")] + #[error("Scaling down a downloaded image failed")] ImageScale, - #[fail(display = "Downloading the parent element of an image failed")] + #[error("Downloading the parent element of an image failed")] ParentDownload, - #[fail(display = "Generating image name failed")] + #[error("Generating image name failed")] ImageName, - #[fail(display = "Getting the content-length property failed")] + #[error("Getting the content-length property failed")] ContentLenght, - #[fail(display = "Content-type suggest no image")] + #[error("Content-type suggest no image")] ContentType, - #[fail(display = "Http error")] + #[error("Http error")] Http, - #[fail(display = "IO error")] + #[error("IO error")] IO, - #[fail(display = "Invalid URL")] - InvalidUrl, - #[fail(display = "Unknown Error")] + #[error("Invalid URL")] + InvalidUrl(#[from] url::ParseError), + #[error("Unknown Error")] Unknown, } - -impl Fail for ImageDownloadError { - fn cause(&self) -> Option<&dyn Fail> { - self.inner.cause() - } - - fn backtrace(&self) -> Option<&Backtrace> { - self.inner.backtrace() - } -} - -impl fmt::Display for ImageDownloadError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.inner, f) - } -} - -impl ImageDownloadError { - pub fn kind(&self) -> ImageDownloadErrorKind { - *self.inner.get_context() - } -} - -impl From for ImageDownloadError { - fn from(kind: ImageDownloadErrorKind) -> ImageDownloadError { - ImageDownloadError { - inner: Context::new(kind), - } - } -} - -impl From> for ImageDownloadError { - fn from(inner: Context) -> ImageDownloadError { - ImageDownloadError { inner } - } -} - -impl From for ImageDownloadError { - fn from(kind: FullTextParserErrorKind) -> ImageDownloadError { - let kind = match kind { - FullTextParserErrorKind::Xml => ImageDownloadErrorKind::HtmlParse, - _ => ImageDownloadErrorKind::Unknown, - }; - - ImageDownloadError { - inner: Context::new(kind), - } - } -} - -impl From for ImageDownloadError { - fn from(_: Error) -> ImageDownloadError { - ImageDownloadError { - inner: Context::new(ImageDownloadErrorKind::Unknown), - } - } -} diff --git a/src/images/mod.rs b/src/images/mod.rs index c0dbacc..996e936 100644 --- a/src/images/mod.rs +++ b/src/images/mod.rs @@ -1,6 +1,5 @@ -use self::error::{ImageDownloadError, ImageDownloadErrorKind}; +pub use self::error::ImageDownloadError; use crate::util::Util; -use failure::ResultExt; use libxml::parser::Parser; use libxml::tree::{Node, SaveOptions}; use libxml::xpath::Context; @@ -25,14 +24,13 @@ impl ImageDownloader { client: &Client, ) -> Result { let parser = Parser::default_html(); - let doc = parser.parse_string(html).map_err(|_| { - error!("Failed to parse HTML string"); - ImageDownloadErrorKind::HtmlParse - })?; + let doc = parser + .parse_string(html) + .map_err(|_| ImageDownloadError::HtmlParse)?; let xpath_ctx = Context::new(&doc).map_err(|()| { error!("Failed to create xpath context for document"); - ImageDownloadErrorKind::HtmlParse + ImageDownloadError::HtmlParse })?; self.download_images_from_context(&xpath_ctx, client) @@ -58,7 +56,7 @@ impl ImageDownloader { ) -> Result<(), ImageDownloadError> { let xpath = "//img"; let node_vec = Util::evaluate_xpath(context, xpath, false) - .context(ImageDownloadErrorKind::HtmlParse)?; + .map_err(|_| ImageDownloadError::HtmlParse)?; for mut node in node_vec { if let Some(url) = node.get_property("src") { if !url.starts_with("data:") { @@ -72,11 +70,11 @@ impl ImageDownloader { self.save_image(&url, &parent_url, client).await { if node.set_property("src", &small_image).is_err() { - return Err(ImageDownloadErrorKind::HtmlParse.into()); + return Err(ImageDownloadError::HtmlParse); } if let Some(big_image) = big_image { if node.set_property("big-src", &big_image).is_err() { - return Err(ImageDownloadErrorKind::HtmlParse.into()); + return Err(ImageDownloadError::HtmlParse); } } } @@ -94,26 +92,21 @@ impl ImageDownloader { parent_url: &Option, client: &Client, ) -> Result<(String, Option), ImageDownloadError> { - let response = client - .get(image_url.clone()) - .send() - .await - .map_err(|err| { - error!("GET {} failed - {}", image_url.as_str(), err); - err - }) - .context(ImageDownloadErrorKind::Http)?; + let response = client.get(image_url.clone()).send().await.map_err(|err| { + error!("GET {} failed - {}", image_url.as_str(), err); + ImageDownloadError::Http + })?; let content_type_small = ImageDownloader::check_image_content_type(&response)?; let content_type_small = content_type_small .to_str() - .context(ImageDownloadErrorKind::ContentType)?; + .map_err(|_| ImageDownloadError::ContentType)?; let mut content_type_big: Option = None; let mut small_image = response .bytes() .await - .context(ImageDownloadErrorKind::IO)? + .map_err(|_| ImageDownloadError::Http)? .as_ref() .to_vec(); @@ -124,18 +117,18 @@ impl ImageDownloader { .get(parent_url.clone()) .send() .await - .context(ImageDownloadErrorKind::Http)?; + .map_err(|_| ImageDownloadError::Http)?; content_type_big = Some( ImageDownloader::check_image_content_type(&response_big)? .to_str() - .context(ImageDownloadErrorKind::ContentType)? + .map_err(|_| ImageDownloadError::ContentType)? .to_owned(), ); big_image = Some( response_big .bytes() .await - .context(ImageDownloadErrorKind::IO)? + .map_err(|_| ImageDownloadError::Http)? .to_vec(), ); } @@ -159,12 +152,10 @@ impl ImageDownloader { format!("data:{};base64,{}", content_type_small, small_image_base64); let big_image_string = match big_image_base64 { Some(big_image_base64) => { - let content_type_big = content_type_big - .ok_or(ImageDownloadErrorKind::ParentDownload) - .map_err(|err| { - debug!("content_type_big should not be None when a big image exists"); - err - })?; + let content_type_big = content_type_big.ok_or_else(|| { + debug!("content_type_big should not be None when a big image exists"); + ImageDownloadError::ParentDownload + })?; Some(format!( "data:{};base64,{}", content_type_big, big_image_base64 @@ -182,7 +173,7 @@ impl ImageDownloader { if let Some(content_type) = response.headers().get(reqwest::header::CONTENT_TYPE) { if content_type .to_str() - .context(ImageDownloadErrorKind::ContentType)? + .map_err(|_| ImageDownloadError::ContentType)? .contains("image") { return Ok(content_type.clone()); @@ -190,10 +181,10 @@ impl ImageDownloader { } error!("{} is not an image", response.url()); - return Err(ImageDownloadErrorKind::ContentType.into()); + Err(ImageDownloadError::ContentType) + } else { + Err(ImageDownloadError::Http) } - - Err(ImageDownloadErrorKind::Http.into()) } fn scale_image( @@ -203,12 +194,10 @@ impl ImageDownloader { let mut original_image: Vec = Vec::new(); let mut resized_image: Option> = None; - let mut image = image::load_from_memory(image_buffer) - .map_err(|err| { - error!("Failed to open image to resize"); - err - }) - .context(ImageDownloadErrorKind::ImageScale)?; + let mut image = image::load_from_memory(image_buffer).map_err(|err| { + error!("Failed to open image to resize: {}", err); + ImageDownloadError::ImageScale + })?; image .write_to( @@ -216,10 +205,9 @@ impl ImageDownloader { image::ImageOutputFormat::Png, ) .map_err(|err| { - error!("Failed to save resized image to resize"); - err - }) - .context(ImageDownloadErrorKind::ImageScale)?; + error!("Failed to save resized image to resize: {}", err); + ImageDownloadError::ImageScale + })?; let dimensions = (image.width(), image.height()); if dimensions.0 > max_dimensions.0 || dimensions.1 > max_dimensions.1 { @@ -235,10 +223,9 @@ impl ImageDownloader { image::ImageOutputFormat::Png, ) .map_err(|err| { - error!("Failed to save resized image to resize"); - err - }) - .context(ImageDownloadErrorKind::ImageScale)?; + error!("Failed to save resized image to resize: {}", err); + ImageDownloadError::ImageScale + })?; resized_image = Some(resized_buf); } @@ -254,24 +241,23 @@ impl ImageDownloader { if let Some(parent) = node.get_parent() { if parent.get_name() == "a" { if let Some(url) = parent.get_property("href") { - let parent_url = - url::Url::parse(&url).context(ImageDownloadErrorKind::ParentDownload)?; + let parent_url = url::Url::parse(&url).map_err(|err| { + error!("Failed to parse parent image url: {}", err); + ImageDownloadError::InvalidUrl(err) + })?; let parent_response = client .head(parent_url.clone()) .send() .await - .context(ImageDownloadErrorKind::ParentDownload)?; - let _ = ImageDownloader::check_image_content_type(&parent_response) - .context(ImageDownloadErrorKind::ParentDownload)?; + .map_err(|_| ImageDownloadError::Http)?; + let _ = ImageDownloader::check_image_content_type(&parent_response)?; let child_response = client .get(child_url.clone()) .send() .await - .context(ImageDownloadErrorKind::ParentDownload)?; - let parent_length = Self::get_content_lenght(&parent_response) - .context(ImageDownloadErrorKind::ParentDownload)?; - let child_length = Self::get_content_lenght(&child_response) - .context(ImageDownloadErrorKind::ParentDownload)?; + .map_err(|_| ImageDownloadError::Http)?; + let parent_length = Self::get_content_lenght(&parent_response)?; + let child_length = Self::get_content_lenght(&child_response)?; if parent_length > child_length { return Ok(parent_url); @@ -283,7 +269,7 @@ impl ImageDownloader { } debug!("Image parent element not relevant"); - Err(ImageDownloadErrorKind::ParentDownload.into()) + Err(ImageDownloadError::ParentDownload) } fn get_content_lenght(response: &Response) -> Result { @@ -296,7 +282,7 @@ impl ImageDownloader { } } } - Err(ImageDownloadErrorKind::ContentLenght.into()) + Err(ImageDownloadError::ContentLenght) } } diff --git a/src/lib.rs b/src/lib.rs index f5a90b6..2f4b878 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,7 @@ mod readability; mod util; use article::Article; -use error::{ScraperError, ScraperErrorKind}; +use error::ScraperError; use full_text_parser::FullTextParser; use images::ImageDownloader; use readability::Readability; diff --git a/src/util.rs b/src/util.rs index c40e958..2e72486 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,4 +1,3 @@ -use failure::ResultExt; use libxml::{tree::Node, xpath::Context}; use reqwest::{ header::{HeaderMap, HeaderName, HeaderValue}, @@ -6,10 +5,7 @@ use reqwest::{ }; use tokio::fs::DirEntry; -use crate::full_text_parser::{ - config::ConfigEntry, - error::{FullTextParserError, FullTextParserErrorKind}, -}; +use crate::full_text_parser::{config::ConfigEntry, error::FullTextParserError}; pub struct Util; @@ -55,22 +51,22 @@ impl Util { if let Some(config) = site_specific_rule { for header in &config.header { let name = HeaderName::from_bytes(header.name.as_bytes()) - .context(FullTextParserErrorKind::Config)?; + .map_err(|_| FullTextParserError::Config)?; let value = header .value .parse::() - .context(FullTextParserErrorKind::Config)?; + .map_err(|_| FullTextParserError::Config)?; headers.insert(name, value); } } for header in &global_rule.header { let name = HeaderName::from_bytes(header.name.as_bytes()) - .context(FullTextParserErrorKind::Config)?; + .map_err(|_| FullTextParserError::Config)?; let value = header .value .parse::() - .context(FullTextParserErrorKind::Config)?; + .map_err(|_| FullTextParserError::Config)?; headers.insert(name, value); } @@ -105,7 +101,7 @@ impl Util { ) -> Result, FullTextParserError> { let res = xpath_ctx.evaluate(xpath).map_err(|()| { log::debug!("Evaluation of xpath '{}' yielded no results", xpath); - FullTextParserErrorKind::Xml + FullTextParserError::Xml })?; let node_vec = res.get_nodes_as_vec(); @@ -113,7 +109,7 @@ impl Util { if node_vec.is_empty() { log::debug!("Evaluation of xpath '{}' yielded no results", xpath); if thorw_if_empty { - return Err(FullTextParserErrorKind::Xml.into()); + return Err(FullTextParserError::Xml); } } @@ -135,7 +131,7 @@ impl Util { } log::error!("Failed to determine content type"); - Err(FullTextParserErrorKind::Http.into()) + Err(FullTextParserError::Http) } pub fn check_redirect(response: &Response, original_url: &url::Url) -> Option { @@ -155,7 +151,7 @@ impl Util { return Ok(val.get_content()); } - Err(FullTextParserErrorKind::Xml.into()) + Err(FullTextParserError::Xml) } pub fn extract_value_merge(