1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

clean html

This commit is contained in:
Jan Lukas Gernert 2023-04-20 08:41:10 +02:00
parent 3096f28aae
commit cd3d3468a3
2 changed files with 47 additions and 6 deletions

File diff suppressed because one or more lines are too long

View file

@ -267,7 +267,7 @@ impl FullTextParser {
Ok(()) Ok(())
} }
fn parse_html( pub(crate) fn parse_html(
html: &str, html: &str,
config: Option<&ConfigEntry>, config: Option<&ConfigEntry>,
global_config: &ConfigEntry, global_config: &ConfigEntry,
@ -293,7 +293,7 @@ impl FullTextParser {
}) })
} }
fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> { pub(crate) fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> {
Context::new(doc).map_err(|()| { Context::new(doc).map_err(|()| {
log::error!("Creating xpath context failed for downloaded HTML"); log::error!("Creating xpath context failed for downloaded HTML");
FullTextParserError::Xml FullTextParserError::Xml
@ -727,7 +727,7 @@ impl FullTextParser {
_ = Self::repair_urls(context, "//iframe", "src", url, document); _ = Self::repair_urls(context, "//iframe", "src", url, document);
} }
fn prep_content( pub(crate) fn prep_content(
context: &Context, context: &Context,
config: Option<&ConfigEntry>, config: Option<&ConfigEntry>,
global_config: &ConfigEntry, global_config: &ConfigEntry,