From fbb65855965477520e8f40e0fb2c78f629cb8454 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Wed, 26 Apr 2023 09:09:06 +0200 Subject: [PATCH] replace first occurence only --- article_scraper/ftr-site-config | 2 +- article_scraper/src/full_text_parser/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/article_scraper/ftr-site-config b/article_scraper/ftr-site-config index f552f1d..75e4e96 160000 --- a/article_scraper/ftr-site-config +++ b/article_scraper/ftr-site-config @@ -1 +1 @@ -Subproject commit f552f1d5178786e3bdbdbe88952244eac8e8838f +Subproject commit 75e4e96639b4eb41502e669894255833d00937bc diff --git a/article_scraper/src/full_text_parser/mod.rs b/article_scraper/src/full_text_parser/mod.rs index 8250b02..d9c26c4 100644 --- a/article_scraper/src/full_text_parser/mod.rs +++ b/article_scraper/src/full_text_parser/mod.rs @@ -381,7 +381,7 @@ impl FullTextParser { if let Some(encoding) = Self::get_encoding_from_html(&lossy_string) { log::debug!("Encoding extracted from HTML: '{}'", encoding); if let Some(decoded_html) = Self::decode_html(&bytes, encoding) { - let decoded_html = decoded_html.replace(&format!("charset=\"{encoding}\""), "charset=\"utf-8\""); + let decoded_html = decoded_html.replacen(&format!("charset=\"{encoding}\""), "charset=\"utf-8\"", 1); return Ok(decoded_html); } } @@ -389,7 +389,7 @@ impl FullTextParser { if let Some(encoding) = Self::get_encoding_from_http_header(&headers) { log::debug!("Encoding extracted from headers: '{}'", encoding); if let Some(decoded_html) = Self::decode_html(&bytes, encoding) { - let decoded_html = decoded_html.replace(&format!("charset=\"{encoding}\""), "charset=\"utf-8\""); + let decoded_html = decoded_html.replacen(&format!("charset=\"{encoding}\""), "charset=\"utf-8\"", 1); return Ok(decoded_html); } }