diff --git a/src/full_text_parser/mod.rs b/src/full_text_parser/mod.rs index 173985e..5067d30 100644 --- a/src/full_text_parser/mod.rs +++ b/src/full_text_parser/mod.rs @@ -263,18 +263,17 @@ impl FullTextParser { .text() .await .map_err(|_| FullTextParserError::Http)?; - { - if let Some(decoded_html) = - Self::decode_html(&text, Self::get_encoding_from_html(&text)) - { - return Ok(decoded_html); - } - if let Some(decoded_html) = - Self::decode_html(&text, Self::get_encoding_from_http_header(&headers)) - { - return Ok(decoded_html); - } + if let Some(decoded_html) = + Self::decode_html(&text, Self::get_encoding_from_html(&text)) + { + return Ok(decoded_html); + } + + if let Some(decoded_html) = + Self::decode_html(&text, Self::get_encoding_from_http_header(&headers)) + { + return Ok(decoded_html); } warn!("No encoding of HTML detected - assuming utf-8"); @@ -285,18 +284,16 @@ impl FullTextParser { } fn get_encoding_from_http_header(headers: &reqwest::header::HeaderMap) -> Option<&str> { - if let Some(content_type) = headers.get(reqwest::header::CONTENT_TYPE) { - if let Ok(content_type) = content_type.to_str() { - let regex = - regex::Regex::new(r#"charset=([^"']+)"#).expect("Failed to parse regex"); - if let Some(captures) = regex.captures(content_type) { - if let Some(regex_match) = captures.get(1) { - return Some(regex_match.as_str()); - } - } - } - } - None + headers + .get(reqwest::header::CONTENT_TYPE) + .and_then(|header| header.to_str().ok()) + .and_then(|content_type| { + regex::Regex::new(r#"charset=([^"']+)"#) + .expect("Failed to parse regex") + .captures(content_type) + }) + .and_then(|captures| captures.get(1)) + .map(|regex_match| regex_match.as_str()) } fn get_encoding_from_html(html: &str) -> Option<&str> { @@ -361,10 +358,12 @@ impl FullTextParser { let xpath = &format!("//img[contains(@class, '{}')]", class); let node_vec = Util::evaluate_xpath(context, xpath, false)?; for mut node in node_vec { - if let Some(correct_url) = node.get_property(property_url) { - if node.set_property("src", &correct_url).is_err() { - return Err(FullTextParserError::Xml); - } + if node + .get_property(property_url) + .and_then(|correct_url| node.set_property("src", &correct_url).ok()) + .is_none() + { + warn!("Failed to fix lazy loading image"); } } Ok(()) @@ -374,27 +373,26 @@ impl FullTextParser { let xpath = &format!("//iframe[contains(@src, '{}')]", site_name); let node_vec = Util::evaluate_xpath(context, xpath, false)?; for mut node in node_vec { - if let Some(mut parent) = node.get_parent() { - if let Ok(mut video_wrapper) = parent.new_child(None, "div") { - if let Ok(()) = video_wrapper.set_property("class", "videoWrapper") { - if let Ok(()) = node.set_property("width", "100%") { - if let Ok(()) = node.set_property("height", "100%") { - node.unlink(); - video_wrapper.add_child(&mut node).map_err(|_| { - error!("Failed to add iframe as child of video wrapper