mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
only strip topmost nodes in tree branches
This commit is contained in:
parent
26346839f2
commit
9e995122c4
1 changed files with 8 additions and 1 deletions
|
@ -392,7 +392,14 @@ impl ArticleScraper {
|
|||
|
||||
fn strip_id_or_class(context: &Context, id_or_class: &String) -> Result<(), ScraperError> {
|
||||
let xpath = &format!("//*[contains(@class, '{}') or contains(@id, '{}')]", id_or_class, id_or_class);
|
||||
let node_vec = Self::evaluate_xpath(context, xpath, false)?;
|
||||
|
||||
let mut ancestor = xpath.clone();
|
||||
if ancestor.starts_with("//") {
|
||||
ancestor = ancestor.chars().skip(2).collect();
|
||||
}
|
||||
|
||||
let query = &format!("{}[not(ancestor::{})]", xpath, ancestor);
|
||||
let node_vec = Self::evaluate_xpath(context, query, false)?;
|
||||
for mut node in node_vec {
|
||||
node.unlink();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue