mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 08:05:31 +02:00
tmp: dont strip scripts
This commit is contained in:
parent
8247defe54
commit
90d45ea6b3
1 changed files with 20 additions and 21 deletions
41
src/lib.rs
41
src/lib.rs
|
@ -115,10 +115,10 @@ impl ArticleScraper {
|
||||||
return Err(error)
|
return Err(error)
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(error) = ArticleScraper::eliminate_noscript_tag(&context) {
|
// if let Err(error) = ArticleScraper::eliminate_noscript_tag(&context) {
|
||||||
error!("Eliminating <noscript> tag failed - {}", error);
|
// error!("Eliminating <noscript> tag failed - {}", error);
|
||||||
return Err(error)
|
// return Err(error)
|
||||||
}
|
// }
|
||||||
|
|
||||||
if download_images {
|
if download_images {
|
||||||
if let Err(error) = self.image_downloader.download_images_from_context(&context).await {
|
if let Err(error) = self.image_downloader.download_images_from_context(&context).await {
|
||||||
|
@ -572,7 +572,7 @@ impl ArticleScraper {
|
||||||
let _ = ArticleScraper::strip_node(&context, &String::from("//*[contains(@style,'display:none')]"));
|
let _ = ArticleScraper::strip_node(&context, &String::from("//*[contains(@style,'display:none')]"));
|
||||||
|
|
||||||
// strip all scripts
|
// strip all scripts
|
||||||
let _ = ArticleScraper::strip_node(&context, &String::from("//script"));
|
//let _ = ArticleScraper::strip_node(&context, &String::from("//script"));
|
||||||
|
|
||||||
// strip all comments
|
// strip all comments
|
||||||
let _ = ArticleScraper::strip_node(&context, &String::from("//comment()"));
|
let _ = ArticleScraper::strip_node(&context, &String::from("//comment()"));
|
||||||
|
@ -706,25 +706,24 @@ impl ArticleScraper {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eliminate_noscript_tag(context: &Context) -> Result<(), ScraperError> {
|
// fn eliminate_noscript_tag(context: &Context) -> Result<(), ScraperError> {
|
||||||
|
// let xpath = "//noscript";
|
||||||
|
// let node_vec = Self::evaluate_xpath(context, xpath, false)?;
|
||||||
|
|
||||||
let xpath = "//noscript";
|
// for mut node in node_vec {
|
||||||
let node_vec = Self::evaluate_xpath(context, xpath, false)?;
|
// if let Some(mut parent) = node.get_parent() {
|
||||||
|
// node.unlink();
|
||||||
for mut node in node_vec {
|
// let children = node.get_child_nodes();
|
||||||
if let Some(mut parent) = node.get_parent() {
|
// for mut child in children {
|
||||||
node.unlink();
|
// child.unlink();
|
||||||
let children = node.get_child_nodes();
|
// let _ = parent.add_child(&mut child);
|
||||||
for mut child in children {
|
// }
|
||||||
child.unlink();
|
// }
|
||||||
let _ = parent.add_child(&mut child);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
// }
|
||||||
|
|
||||||
Ok(())
|
// Ok(())
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue