1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-08 16:40:00 +02:00

strip iframes but keep vidoes

This commit is contained in:
Jan Lukas Gernert 2023-03-01 01:37:37 +01:00
parent cea23f1638
commit 7c9e527827
3 changed files with 80 additions and 51 deletions

View file

@ -199,7 +199,18 @@ impl Util {
let query = &format!("{}[not(ancestor::{})]", xpath, ancestor);
let node_vec = Util::evaluate_xpath(context, query, false)?;
for mut node in node_vec {
let tag_name = node.get_name();
if constants::EMBED_TAG_NAMES.contains(tag_name.to_uppercase().as_str()) {
if node
.get_attributes()
.iter()
.any(|(_name, value)| constants::VIDEOS.is_match(value))
{
continue;
}
}
node.unlink();
}
Ok(())