mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-09 00:45:31 +02:00
add citilab test & fix noscript unwrapping
This commit is contained in:
parent
69b7b1fdc2
commit
c5c6b788c8
7 changed files with 7091 additions and 6 deletions
|
@ -19,7 +19,7 @@ async fn run_test(name: &str) {
|
|||
let xpath_ctx = crate::FullTextParser::get_xpath_ctx(&document).unwrap();
|
||||
|
||||
crate::FullTextParser::strip_junk(&xpath_ctx, None, &empty_config);
|
||||
crate::FullTextParser::unwrap_noscript_images(&xpath_ctx).unwrap();
|
||||
|
||||
crate::FullTextParser::fix_urls(&xpath_ctx, &url);
|
||||
let mut article = Article {
|
||||
title: None,
|
||||
|
@ -121,6 +121,11 @@ async fn buzzfeed_1() {
|
|||
run_test("buzzfeed-1").await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn citylab_1() {
|
||||
run_test("citylab-1").await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn webmd_1() {
|
||||
run_test("webmd-1").await
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue