mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
clippy
This commit is contained in:
parent
dc1bf2ef0c
commit
88bb88a38f
3 changed files with 17 additions and 24 deletions
|
@ -16,7 +16,7 @@ pub struct Article {
|
||||||
impl Article {
|
impl Article {
|
||||||
pub fn save_html(&self, path: &PathBuf) -> Result<(), Error> {
|
pub fn save_html(&self, path: &PathBuf) -> Result<(), Error> {
|
||||||
if let Some(ref html) = self.html {
|
if let Some(ref html) = self.html {
|
||||||
if let Ok(()) = std::fs::create_dir_all(&path) {
|
if let Ok(()) = std::fs::create_dir_all(path) {
|
||||||
let mut file_name = match self.title.clone() {
|
let mut file_name = match self.title.clone() {
|
||||||
Some(file_name) => file_name.replace('/', "_"),
|
Some(file_name) => file_name.replace('/', "_"),
|
||||||
None => "Unknown Title".to_owned(),
|
None => "Unknown Title".to_owned(),
|
||||||
|
|
|
@ -28,10 +28,7 @@ impl ConfigCollection {
|
||||||
|
|
||||||
if let Some(directory) = directory {
|
if let Some(directory) = directory {
|
||||||
// create data dir if it doesn't already exist
|
// create data dir if it doesn't already exist
|
||||||
if let Err(error) = std::fs::DirBuilder::new()
|
if let Err(error) = std::fs::DirBuilder::new().recursive(true).create(directory) {
|
||||||
.recursive(true)
|
|
||||||
.create(&directory)
|
|
||||||
{
|
|
||||||
log::warn!(
|
log::warn!(
|
||||||
"Failed to create user config directory {:?}: {}",
|
"Failed to create user config directory {:?}: {}",
|
||||||
directory,
|
directory,
|
||||||
|
|
|
@ -209,17 +209,17 @@ impl FullTextParser {
|
||||||
|
|
||||||
// parse html
|
// parse html
|
||||||
let parser = Parser::default_html();
|
let parser = Parser::default_html();
|
||||||
Ok(parser.parse_string(html.as_str()).map_err(|err| {
|
parser.parse_string(html.as_str()).map_err(|err| {
|
||||||
error!("Parsing HTML failed for downloaded HTML {:?}", err);
|
error!("Parsing HTML failed for downloaded HTML {:?}", err);
|
||||||
FullTextParserError::Xml
|
FullTextParserError::Xml
|
||||||
})?)
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> {
|
fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> {
|
||||||
Ok(Context::new(doc).map_err(|()| {
|
Context::new(doc).map_err(|()| {
|
||||||
error!("Creating xpath context failed for downloaded HTML");
|
error!("Creating xpath context failed for downloaded HTML");
|
||||||
FullTextParserError::Xml
|
FullTextParserError::Xml
|
||||||
})?)
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn parse_single_page(
|
async fn parse_single_page(
|
||||||
|
@ -356,29 +356,26 @@ impl FullTextParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_for_thumbnail(context: &Context, article: &mut Article) {
|
fn check_for_thumbnail(context: &Context, article: &mut Article) {
|
||||||
if let Some(thumb) = Self::get_attribute(
|
if let Ok(thumb) = Self::get_attribute(
|
||||||
context,
|
context,
|
||||||
"//meta[contains(@name, 'twitter:image')]",
|
"//meta[contains(@name, 'twitter:image')]",
|
||||||
"content",
|
"content",
|
||||||
)
|
) {
|
||||||
.ok()
|
article.thumbnail_url = Some(thumb);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok(thumb) =
|
||||||
|
Self::get_attribute(context, "//meta[contains(@name, 'og:image')]", "content")
|
||||||
{
|
{
|
||||||
article.thumbnail_url = Some(thumb);
|
article.thumbnail_url = Some(thumb);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(thumb) =
|
if let Ok(thumb) =
|
||||||
Self::get_attribute(context, "//meta[contains(@name, 'og:image')]", "content").ok()
|
Self::get_attribute(context, "//link[contains(@rel, 'image_src')]", "href")
|
||||||
{
|
{
|
||||||
article.thumbnail_url = Some(thumb);
|
article.thumbnail_url = Some(thumb);
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(thumb) =
|
|
||||||
Self::get_attribute(context, "//link[contains(@rel, 'image_src')]", "href").ok()
|
|
||||||
{
|
|
||||||
article.thumbnail_url = Some(thumb);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,8 +492,7 @@ impl FullTextParser {
|
||||||
|
|
||||||
if is_relative_url {
|
if is_relative_url {
|
||||||
let completed_url = Self::complete_url(article_url, &url)?;
|
let completed_url = Self::complete_url(article_url, &url)?;
|
||||||
let _ = node
|
node.set_attribute(attribute, completed_url.as_str())
|
||||||
.set_attribute(attribute, completed_url.as_str())
|
|
||||||
.map_err(|_| FullTextParserError::Scrape)?;
|
.map_err(|_| FullTextParserError::Scrape)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue