1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-08 08:30:00 +02:00

first content extraction kinda working

This commit is contained in:
Jan Lukas Gernert 2023-02-20 00:29:44 +01:00
parent 2c76a869e7
commit cce912c354
8 changed files with 363 additions and 58 deletions

View file

@ -181,8 +181,8 @@ impl FullTextParser {
Self::strip_junk(&xpath_ctx, config, global_config, url);
let found_body = Self::extract_body(&xpath_ctx, root, config, global_config)?;
if found_body {
if let Err(error) = Readability::extract_body_readability(document, root) {
if !found_body {
if let Err(error) = Readability::extract_body(document, root) {
log::error!("Both ftr and readability failed to find content: {}", error);
return Err(error);
}