mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-08 16:40:00 +02:00
strip iframes but keep vidoes
This commit is contained in:
parent
cea23f1638
commit
7c9e527827
3 changed files with 80 additions and 51 deletions
11
src/util.rs
11
src/util.rs
|
@ -199,7 +199,18 @@ impl Util {
|
|||
|
||||
let query = &format!("{}[not(ancestor::{})]", xpath, ancestor);
|
||||
let node_vec = Util::evaluate_xpath(context, query, false)?;
|
||||
|
||||
for mut node in node_vec {
|
||||
let tag_name = node.get_name();
|
||||
if constants::EMBED_TAG_NAMES.contains(tag_name.to_uppercase().as_str()) {
|
||||
if node
|
||||
.get_attributes()
|
||||
.iter()
|
||||
.any(|(_name, value)| constants::VIDEOS.is_match(value))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
node.unlink();
|
||||
}
|
||||
Ok(())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue