mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-08 08:30:00 +02:00
first content extraction kinda working
This commit is contained in:
parent
2c76a869e7
commit
cce912c354
8 changed files with 363 additions and 58 deletions
|
@ -181,8 +181,8 @@ impl FullTextParser {
|
|||
Self::strip_junk(&xpath_ctx, config, global_config, url);
|
||||
let found_body = Self::extract_body(&xpath_ctx, root, config, global_config)?;
|
||||
|
||||
if found_body {
|
||||
if let Err(error) = Readability::extract_body_readability(document, root) {
|
||||
if !found_body {
|
||||
if let Err(error) = Readability::extract_body(document, root) {
|
||||
log::error!("Both ftr and readability failed to find content: {}", error);
|
||||
return Err(error);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue