mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
fmt
This commit is contained in:
parent
fbb6585596
commit
5621a0ea54
1 changed files with 10 additions and 2 deletions
|
@ -381,7 +381,11 @@ impl FullTextParser {
|
||||||
if let Some(encoding) = Self::get_encoding_from_html(&lossy_string) {
|
if let Some(encoding) = Self::get_encoding_from_html(&lossy_string) {
|
||||||
log::debug!("Encoding extracted from HTML: '{}'", encoding);
|
log::debug!("Encoding extracted from HTML: '{}'", encoding);
|
||||||
if let Some(decoded_html) = Self::decode_html(&bytes, encoding) {
|
if let Some(decoded_html) = Self::decode_html(&bytes, encoding) {
|
||||||
let decoded_html = decoded_html.replacen(&format!("charset=\"{encoding}\""), "charset=\"utf-8\"", 1);
|
let decoded_html = decoded_html.replacen(
|
||||||
|
&format!("charset=\"{encoding}\""),
|
||||||
|
"charset=\"utf-8\"",
|
||||||
|
1,
|
||||||
|
);
|
||||||
return Ok(decoded_html);
|
return Ok(decoded_html);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -389,7 +393,11 @@ impl FullTextParser {
|
||||||
if let Some(encoding) = Self::get_encoding_from_http_header(&headers) {
|
if let Some(encoding) = Self::get_encoding_from_http_header(&headers) {
|
||||||
log::debug!("Encoding extracted from headers: '{}'", encoding);
|
log::debug!("Encoding extracted from headers: '{}'", encoding);
|
||||||
if let Some(decoded_html) = Self::decode_html(&bytes, encoding) {
|
if let Some(decoded_html) = Self::decode_html(&bytes, encoding) {
|
||||||
let decoded_html = decoded_html.replacen(&format!("charset=\"{encoding}\""), "charset=\"utf-8\"", 1);
|
let decoded_html = decoded_html.replacen(
|
||||||
|
&format!("charset=\"{encoding}\""),
|
||||||
|
"charset=\"utf-8\"",
|
||||||
|
1,
|
||||||
|
);
|
||||||
return Ok(decoded_html);
|
return Ok(decoded_html);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue