mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-08 16:40:00 +02:00
readability stub
This commit is contained in:
parent
273ddd832c
commit
d906f6b7fe
5 changed files with 42 additions and 21 deletions
17
src/lib.rs
17
src/lib.rs
|
@ -1,18 +1,21 @@
|
|||
pub mod images;
|
||||
mod article;
|
||||
mod full_text_parser;
|
||||
mod util;
|
||||
mod error;
|
||||
mod full_text_parser;
|
||||
pub mod images;
|
||||
mod readability;
|
||||
mod util;
|
||||
|
||||
use std::path::Path;
|
||||
use article::Article;
|
||||
use full_text_parser::FullTextParser;
|
||||
use error::{ScraperError, ScraperErrorKind};
|
||||
use full_text_parser::FullTextParser;
|
||||
use images::ImageDownloader;
|
||||
use readability::Readability;
|
||||
use reqwest::Client;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct ArticleScraper {
|
||||
full_text_parser: FullTextParser,
|
||||
readability: Readability,
|
||||
image_downloader: ImageDownloader,
|
||||
}
|
||||
|
||||
|
@ -20,6 +23,7 @@ impl ArticleScraper {
|
|||
pub async fn new(user_configs: Option<&Path>) -> Self {
|
||||
Self {
|
||||
full_text_parser: FullTextParser::new(user_configs).await,
|
||||
readability: Readability::new(),
|
||||
image_downloader: ImageDownloader::new((2048, 2048)),
|
||||
}
|
||||
}
|
||||
|
@ -30,7 +34,6 @@ impl ArticleScraper {
|
|||
download_images: bool,
|
||||
client: &Client,
|
||||
) -> Result<Article, ScraperError> {
|
||||
|
||||
let res = self.full_text_parser.parse(url, client).await;
|
||||
|
||||
if download_images {
|
||||
|
@ -45,4 +48,4 @@ impl ArticleScraper {
|
|||
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue