1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

no need for head

This commit is contained in:
Jan Lukas Gernert 2023-08-10 02:06:52 +02:00
parent 8c7cdacd26
commit 6116ba38ae

View file

@ -122,8 +122,6 @@ impl FullTextParser {
Node::new("article", None, &document).map_err(|()| FullTextParserError::Xml)?; Node::new("article", None, &document).map_err(|()| FullTextParserError::Xml)?;
document.set_root_element(&root); document.set_root_element(&root);
Self::generate_head(&mut root, &document)?;
for page_html in pages { for page_html in pages {
self.parse_page(&mut article, &page_html, &mut root, config, global_config)?; self.parse_page(&mut article, &page_html, &mut root, config, global_config)?;
} }
@ -1091,20 +1089,6 @@ impl FullTextParser {
} }
} }
fn generate_head(root: &mut Node, document: &Document) -> Result<(), FullTextParserError> {
if let Ok(mut head_node) = Node::new("head", None, document) {
if let Ok(()) = root.add_prev_sibling(&mut head_node) {
if let Ok(mut meta) = head_node.new_child(None, "meta") {
if meta.set_property("charset", "utf-8").is_ok() {
return Ok(());
}
}
}
}
Err(FullTextParserError::Xml)
}
fn prevent_self_closing_tags(context: &Context) -> Result<(), FullTextParserError> { fn prevent_self_closing_tags(context: &Context) -> Result<(), FullTextParserError> {
// search document for empty tags and add a empty text node as child // search document for empty tags and add a empty text node as child
// this prevents libxml from self closing non void elements such as iframe // this prevents libxml from self closing non void elements such as iframe