diff --git a/article_scraper/src/full_text_parser/mod.rs b/article_scraper/src/full_text_parser/mod.rs index e130ba8..8250b02 100644 --- a/article_scraper/src/full_text_parser/mod.rs +++ b/article_scraper/src/full_text_parser/mod.rs @@ -381,6 +381,7 @@ impl FullTextParser { if let Some(encoding) = Self::get_encoding_from_html(&lossy_string) { log::debug!("Encoding extracted from HTML: '{}'", encoding); if let Some(decoded_html) = Self::decode_html(&bytes, encoding) { + let decoded_html = decoded_html.replace(&format!("charset=\"{encoding}\""), "charset=\"utf-8\""); return Ok(decoded_html); } } @@ -388,6 +389,7 @@ impl FullTextParser { if let Some(encoding) = Self::get_encoding_from_http_header(&headers) { log::debug!("Encoding extracted from headers: '{}'", encoding); if let Some(decoded_html) = Self::decode_html(&bytes, encoding) { + let decoded_html = decoded_html.replace(&format!("charset=\"{encoding}\""), "charset=\"utf-8\""); return Ok(decoded_html); } }