mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 08:05:31 +02:00
clean html
This commit is contained in:
parent
3096f28aae
commit
cd3d3468a3
2 changed files with 47 additions and 6 deletions
File diff suppressed because one or more lines are too long
|
@ -267,7 +267,7 @@ impl FullTextParser {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_html(
|
||||
pub(crate) fn parse_html(
|
||||
html: &str,
|
||||
config: Option<&ConfigEntry>,
|
||||
global_config: &ConfigEntry,
|
||||
|
@ -293,7 +293,7 @@ impl FullTextParser {
|
|||
})
|
||||
}
|
||||
|
||||
fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> {
|
||||
pub(crate) fn get_xpath_ctx(doc: &Document) -> Result<Context, FullTextParserError> {
|
||||
Context::new(doc).map_err(|()| {
|
||||
log::error!("Creating xpath context failed for downloaded HTML");
|
||||
FullTextParserError::Xml
|
||||
|
@ -727,7 +727,7 @@ impl FullTextParser {
|
|||
_ = Self::repair_urls(context, "//iframe", "src", url, document);
|
||||
}
|
||||
|
||||
fn prep_content(
|
||||
pub(crate) fn prep_content(
|
||||
context: &Context,
|
||||
config: Option<&ConfigEntry>,
|
||||
global_config: &ConfigEntry,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue