1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-10 01:15:31 +02:00

lots of fixes

This commit is contained in:
Jan Lukas Gernert 2023-03-12 19:36:10 +01:00
parent 11d9657bdd
commit 603b373e0d
15 changed files with 660 additions and 63 deletions

View file

@ -381,9 +381,9 @@ impl Util {
pub fn has_single_tag_inside_element(node: &Node, tag: &str) -> bool {
// There should be exactly 1 element child with given tag
if node.get_child_nodes().len() != 1
if node.get_child_elements().len() != 1
|| node
.get_child_nodes()
.get_child_elements()
.first()
.map(|n| n.get_name().to_uppercase() != tag)
.unwrap_or(false)
@ -774,4 +774,17 @@ impl Util {
(rows, columns)
}
pub fn is_phrasing_content(node: &Node) -> bool {
let tag_name = node.get_name().to_uppercase();
let is_text_node = node
.get_type()
.map(|t| t == NodeType::TextNode)
.unwrap_or(false);
is_text_node
|| constants::PHRASING_ELEMS.contains(&tag_name.as_str())
|| ((tag_name == "A" || tag_name == "DEL" || tag_name == "INS")
&& node.get_child_nodes().iter().all(Self::is_phrasing_content))
}
}