1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

only strip topmost nodes in tree branches

This commit is contained in:
Jan Lukas Gernert 2019-12-19 17:36:48 +01:00
parent 26346839f2
commit 9e995122c4

View file

@ -392,7 +392,14 @@ impl ArticleScraper {
fn strip_id_or_class(context: &Context, id_or_class: &String) -> Result<(), ScraperError> {
let xpath = &format!("//*[contains(@class, '{}') or contains(@id, '{}')]", id_or_class, id_or_class);
let node_vec = Self::evaluate_xpath(context, xpath, false)?;
let mut ancestor = xpath.clone();
if ancestor.starts_with("//") {
ancestor = ancestor.chars().skip(2).collect();
}
let query = &format!("{}[not(ancestor::{})]", xpath, ancestor);
let node_vec = Self::evaluate_xpath(context, query, false)?;
for mut node in node_vec {
node.unlink();
}