1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

tmp: dont strip scripts

This commit is contained in:
Jan Lukas Gernert 2020-01-27 16:36:32 +01:00
parent 8247defe54
commit 90d45ea6b3

View file

@ -115,10 +115,10 @@ impl ArticleScraper {
return Err(error) return Err(error)
} }
if let Err(error) = ArticleScraper::eliminate_noscript_tag(&context) { // if let Err(error) = ArticleScraper::eliminate_noscript_tag(&context) {
error!("Eliminating <noscript> tag failed - {}", error); // error!("Eliminating <noscript> tag failed - {}", error);
return Err(error) // return Err(error)
} // }
if download_images { if download_images {
if let Err(error) = self.image_downloader.download_images_from_context(&context).await { if let Err(error) = self.image_downloader.download_images_from_context(&context).await {
@ -572,7 +572,7 @@ impl ArticleScraper {
let _ = ArticleScraper::strip_node(&context, &String::from("//*[contains(@style,'display:none')]")); let _ = ArticleScraper::strip_node(&context, &String::from("//*[contains(@style,'display:none')]"));
// strip all scripts // strip all scripts
let _ = ArticleScraper::strip_node(&context, &String::from("//script")); //let _ = ArticleScraper::strip_node(&context, &String::from("//script"));
// strip all comments // strip all comments
let _ = ArticleScraper::strip_node(&context, &String::from("//comment()")); let _ = ArticleScraper::strip_node(&context, &String::from("//comment()"));
@ -706,25 +706,24 @@ impl ArticleScraper {
Ok(()) Ok(())
} }
fn eliminate_noscript_tag(context: &Context) -> Result<(), ScraperError> { // fn eliminate_noscript_tag(context: &Context) -> Result<(), ScraperError> {
// let xpath = "//noscript";
// let node_vec = Self::evaluate_xpath(context, xpath, false)?;
let xpath = "//noscript"; // for mut node in node_vec {
let node_vec = Self::evaluate_xpath(context, xpath, false)?; // if let Some(mut parent) = node.get_parent() {
// node.unlink();
for mut node in node_vec { // let children = node.get_child_nodes();
if let Some(mut parent) = node.get_parent() { // for mut child in children {
node.unlink(); // child.unlink();
let children = node.get_child_nodes(); // let _ = parent.add_child(&mut child);
for mut child in children { // }
child.unlink(); // }
let _ = parent.add_child(&mut child);
}
}
} // }
Ok(()) // Ok(())
} // }
} }