diff --git a/article_scraper/Cargo.toml b/article_scraper/Cargo.toml index 948fe98..1c7d2ac 100644 --- a/article_scraper/Cargo.toml +++ b/article_scraper/Cargo.toml @@ -14,18 +14,18 @@ exclude = ["resources/tests"] thiserror = "1.0" libxml = "0.3" reqwest = { version = "0.11", features = ["json", "native-tls", "gzip", "brotli", "stream"] } -tokio = { version = "1.28", features = ["macros", "fs", "io-util"] } -url = "2.3" -regex = "1.8" +tokio = { version = "1", features = ["macros", "fs", "io-util"] } +url = "2.5" +regex = "1.10" encoding_rs = "0.8" chrono = "0.4" base64 = "0.21" image = "0.24" log = "0.4" -rust-embed="6.6" -once_cell = "1.17" +rust-embed="8.2" +once_cell = "1.19" escaper = "0.1" futures = "0.3" [dev-dependencies] -env_logger = "0.10" +env_logger = "0.11" diff --git a/article_scraper/resources/tests/readability/hukumusume/source.html b/article_scraper/resources/tests/readability/hukumusume/source.html index 7034430..7f7afd4 100644 --- a/article_scraper/resources/tests/readability/hukumusume/source.html +++ b/article_scraper/resources/tests/readability/hukumusume/source.html @@ -175,7 +175,7 @@
- + 元旦 |
- ![]() + ![]() 松(まつ) |
@@ -269,14 +269,14 @@
- きょうの世界昔話![]() + きょうの世界昔話 ![]() モンゴルの十二支話 |
- ![]() + ![]() 仕事の取替えっこ |
diff --git a/article_scraper/src/clean.rs b/article_scraper/src/clean.rs
index 81297a2..7ab49e8 100644
--- a/article_scraper/src/clean.rs
+++ b/article_scraper/src/clean.rs
@@ -145,9 +145,7 @@ mod tests {
let url =
Url::parse("https://blogs.gnome.org/tbernard/2023/07/26/rethinking-window-management/")
.unwrap();
- let res = clean_html_fragment(html, &url).unwrap();
-
- std::fs::write("/home/jeanluc/result.html", res.html).unwrap();
+ _ = clean_html_fragment(html, &url).unwrap();
}
#[test]
@@ -156,7 +154,7 @@ mod tests {
let url = Url::parse("https://finshots.in").unwrap();
let res = clean_html_fragment(html, &url).unwrap();
- assert_eq!(res.html.len(), 11989);
+ assert!(res.html.len().abs_diff(12_000) < 200);
assert_eq!(
res.thumbnail.as_deref(),
Some("https://cdn.finshots.app/images/2023/03/Design-8-Amul.jpg")