From fcea6cf5d1e246519b273dec66ada3741d5a2c9e Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Fri, 7 Dec 2018 02:14:50 +0100 Subject: [PATCH] update to reqwest 0.9 --- Cargo.toml | 8 ++++---- src/images/mod.rs | 39 ++++++++++++++++++++++++++++----------- src/lib.rs | 23 +++++++++++++++-------- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 16f896d..ef7bb11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,13 +6,13 @@ authors = ["Jan Lukas Gernert "] [dependencies] failure = "0.1" libxml = "0.2" -reqwest = "0.8" +reqwest = "0.9" url = "1.7" -regex = "1.0" +regex = "1.1" encoding_rs = "0.8" chrono = "0.4" htmlescape = "0.3" -base64 = "0.9" -image = "0.19" +base64 = "0.10" +image = "0.20" log = "0.4" mime_guess = "1.8" diff --git a/src/images/mod.rs b/src/images/mod.rs index 416b979..f043858 100644 --- a/src/images/mod.rs +++ b/src/images/mod.rs @@ -143,11 +143,11 @@ impl ImageDownloader { Err(ImageDownloadErrorKind::InvalidUrl)? } - fn check_image_content_type(response: &reqwest::Response) -> Result { + fn check_image_content_type(response: &reqwest::Response) -> Result { if response.status().is_success() { - if let Some(content_type) = response.headers().get::() { - if content_type.type_() == reqwest::mime::IMAGE { + if let Some(content_type) = response.headers().get(reqwest::header::CONTENT_TYPE) { + if content_type.to_str().context(ImageDownloadErrorKind::ContentType)?.contains("image") { return Ok(content_type.clone()) } } @@ -162,8 +162,12 @@ impl ImageDownloader { fn get_content_lenght(response: &reqwest::Response) -> Result { if response.status().is_success() { - if let Some(&reqwest::header::ContentLength(content_length)) = response.headers().get::() { - return Ok(content_length) + if let Some(content_length) = response.headers().get(reqwest::header::CONTENT_LENGTH) { + if let Ok(content_length) = content_length.to_str() { + if let Ok(content_length) = content_length.parse::() { + return Ok(content_length) + } + } } } @@ -188,7 +192,7 @@ impl ImageDownloader { None } - fn extract_image_name(url: &url::Url, content_type: reqwest::header::ContentType) -> Result { + fn extract_image_name(url: &url::Url, content_type: reqwest::header::HeaderValue) -> Result { if let Some(file_name) = url.path_segments().and_then(|segments| segments.last()) { let mut image_name = file_name.to_owned(); @@ -197,12 +201,25 @@ impl ImageDownloader { image_name.push_str(query); } - let primary_type = content_type.type_().as_str(); - let mut sub_type = content_type.subtype().as_str().to_owned(); - if let Some(suffix) = content_type.suffix() { + let header = content_type.to_str().context(ImageDownloadErrorKind::ContentType)?; + let primary_type = match header.find("/") { + Some(end) => header[..end-1].to_string(), + None => "unknown".to_string(), + }; + let mut sub_type = match header.find("/") { + None => "unknown".to_string(), + Some(start) => { + match header.find("+") { + None => "unknown".to_string(), + Some(end) => header[start..end-1].to_string(), + } + }, + }; + if let Some(start) = header.find("+") { sub_type.push_str("+"); - sub_type.push_str(suffix.as_str()); - } + sub_type.push_str(&header[start..].to_string()); + }; + if let Some(extensions) = mime_guess::get_extensions(primary_type, &sub_type) { let mut extension_present = false; for extension in extensions { diff --git a/src/lib.rs b/src/lib.rs index a650ec5..baffcc3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -223,11 +223,16 @@ impl ArticleScraper { Err(ScraperErrorKind::Http)? } - fn get_encoding_from_http_header(headers: &reqwest::header::Headers) -> Option<&str> { + fn get_encoding_from_http_header(headers: &reqwest::header::HeaderMap) -> Option<&str> { - if let Some(content_type) = headers.get::() { - if let Some(encoding) = content_type.get_param(reqwest::mime::CHARSET) { - return Some(encoding.as_str()) + if let Some(content_type) = headers.get(reqwest::header::CONTENT_TYPE) { + if let Ok(content_type) = content_type.to_str() { + let regex = regex::Regex::new(r#"charset=([^"']+)"#).unwrap(); + if let Some(captures) = regex.captures(content_type) { + if let Some(regex_match) = captures.get(1) { + return Some(regex_match.as_str()) + } + } } } None @@ -288,9 +293,11 @@ impl ArticleScraper { fn check_content_type(response: &reqwest::Response) -> Result { if response.status().is_success() { - if let Some(content_type) = response.headers().get::() { - if content_type.type_() == reqwest::mime::TEXT && content_type.subtype() == reqwest::mime::HTML { - return Ok(true) + if let Some(content_type) = response.headers().get(reqwest::header::CONTENT_TYPE) { + if let Ok(content_type) = content_type.to_str() { + if content_type.contains("text/html") { + return Ok(true) + } } } @@ -304,7 +311,7 @@ impl ArticleScraper { fn check_redirect(response: &reqwest::Response) -> Option { - if response.status() == reqwest::StatusCode::PermanentRedirect { + if response.status() == reqwest::StatusCode::PERMANENT_REDIRECT { debug!("Article url redirects to {}", response.url().as_str()); return Some(response.url().clone()) }