From 6b6c52f3155c7e7dd3280f6d9378d44a32da63af Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Sun, 7 Jun 2020 13:21:53 +0200 Subject: [PATCH] only use builtin youtube parsing if no config is provided --- Cargo.toml | 1 + src/config/mod.rs | 7 ++--- src/lib.rs | 74 ++++++++++++++++++++--------------------------- src/youtube.rs | 10 +++++-- 4 files changed, 44 insertions(+), 48 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8f18f2f..93aec76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,3 +19,4 @@ chrono = "0.4" base64 = "0.12" image = "0.23" log = "0.4" +parking_lot = "0.10" diff --git a/src/config/mod.rs b/src/config/mod.rs index 9ea32ee..b7d6e6a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,7 +1,7 @@ use self::error::{ConfigError, ConfigErrorKind}; use failure::ResultExt; use log::warn; -use std::collections; +use std::collections::HashMap; use std::fs; use std::io; use std::io::BufRead; @@ -11,7 +11,7 @@ use std::path::PathBuf; mod macros; mod error; -pub type ConfigCollection = collections::HashMap; +pub type ConfigCollection = HashMap; #[derive(Clone)] pub struct Replace { @@ -43,8 +43,7 @@ impl GrabberConfig { let paths = fs::read_dir(directory).context(ConfigErrorKind::IO)?; - let mut collection: collections::HashMap = - collections::HashMap::new(); + let mut collection: HashMap = HashMap::new(); for path in paths { if let Ok(path) = path { diff --git a/src/lib.rs b/src/lib.rs index 4b0166f..c95affc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,11 +15,12 @@ use libxml::parser::Parser; use libxml::tree::{Document, Node, SaveOptions}; use libxml::xpath::Context; use log::{debug, error, info, warn}; +use parking_lot::RwLock; use reqwest::{Client, Response}; -use std::collections; +use std::collections::HashMap; use std::path::PathBuf; use std::str::FromStr; -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use std::thread; pub struct ArticleScraper { @@ -34,15 +35,9 @@ impl ArticleScraper { let locked_config_files = config_files.clone(); thread::spawn(move || { if let Ok(config_files) = GrabberConfig::parse_directory(&config_path) { - locked_config_files - .write() - .expect("Failed to lock config file cache") - .replace(config_files); + locked_config_files.write().replace(config_files); } else { - locked_config_files - .write() - .expect("Failed to lock config file cache") - .replace(collections::HashMap::new()); + locked_config_files.write().replace(HashMap::new()); } }); @@ -60,8 +55,11 @@ impl ArticleScraper { ) -> Result { info!("Scraping article: '{}'", url.as_str()); - if let Some(article) = youtube::Youtube::handle(&url) { - return Ok(article); + // custom youtube handling, but prefer config if exists + if !self.grabber_config_exists("youtube.com")? { + if let Some(article) = youtube::Youtube::handle(&url) { + return Ok(article); + } } let response = client @@ -119,11 +117,6 @@ impl ArticleScraper { return Err(error); } - // if let Err(error) = ArticleScraper::eliminate_noscript_tag(&context) { - // error!("Eliminating