1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-08 16:40:00 +02:00

small fixes

This commit is contained in:
Jan Lukas Gernert 2023-03-12 23:13:28 +01:00
parent 4ca4b73823
commit 848291e4f3
4 changed files with 36 additions and 43 deletions

View file

@ -856,8 +856,8 @@ impl FullTextParser {
Self::clean_attributes(&mut root)?;
Self::simplify_nested_elements(&mut root)?;
Self::remove_extra_p_and_div(&mut root);
Self::remove_single_cell_tables(&mut root);
Self::remove_extra_p_and_div(&mut root);
}
Ok(())
@ -887,7 +887,7 @@ impl FullTextParser {
cell.set_name(if all_phrasing_content { "P" } else { "DIV" })
.unwrap();
if let Some(mut parent) = node.get_parent() {
node_iter = Util::next_node(&node, false);
node_iter = Util::next_node(&node, true);
parent.replace_child_node(cell, node.clone()).unwrap();
continue;
}
@ -914,7 +914,7 @@ impl FullTextParser {
let total_count = img_count + embed_count + object_count + iframe_count;
if total_count == 0 && Util::get_inner_text(&node, false).trim().is_empty() {
node_iter = Util::next_node(&node, false);
node_iter = Util::next_node(&node, true);
node.unlink();
continue;
}