1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 08:05:31 +02:00

clean html

This commit is contained in:
Jan Lukas Gernert 2023-04-20 08:41:10 +02:00
parent 3096f28aae
commit cd3d3468a3
2 changed files with 47 additions and 6 deletions

File diff suppressed because one or more lines are too long

View file

@ -267,7 +267,7 @@ impl FullTextParser {
Ok(())
}
fn parse_html(
pub(crate) fn parse_html(
html: &str,
config: Option<&ConfigEntry>,
global_config: &ConfigEntry,
@ -293,7 +293,7 @@ impl FullTextParser {
})
}
fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> {
pub(crate) fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> {
Context::new(doc).map_err(|()| {
log::error!("Creating xpath context failed for downloaded HTML");
FullTextParserError::Xml
@ -727,7 +727,7 @@ impl FullTextParser {
_ = Self::repair_urls(context, "//iframe", "src", url, document);
}
fn prep_content(
pub(crate) fn prep_content(
context: &Context,
config: Option<&ConfigEntry>,
global_config: &ConfigEntry,