mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
fix negative regex & fmt
This commit is contained in:
parent
1e71aa2bfb
commit
58a799b096
2 changed files with 4 additions and 2 deletions
|
@ -41,7 +41,7 @@ pub static POSITIVE: Lazy<Regex> =
|
||||||
.expect("POSITIVE regex")
|
.expect("POSITIVE regex")
|
||||||
});
|
});
|
||||||
pub static NEGATIVE: Lazy<Regex> = Lazy::new(|| {
|
pub static NEGATIVE: Lazy<Regex> = Lazy::new(|| {
|
||||||
Regex::new(r#"-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget"#).expect("NEGATIVE regex")
|
RegexBuilder::new(r#"-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget"#).case_insensitive(true).build().expect("NEGATIVE regex")
|
||||||
});
|
});
|
||||||
|
|
||||||
pub static TITLE_SEPARATOR: Lazy<Regex> =
|
pub static TITLE_SEPARATOR: Lazy<Regex> =
|
||||||
|
|
|
@ -355,7 +355,9 @@ impl Readability {
|
||||||
// The scores shouldn't get too low.
|
// The scores shouldn't get too low.
|
||||||
let score_threshold = last_score / 3.0;
|
let score_threshold = last_score / 3.0;
|
||||||
|
|
||||||
while parent_of_top_candidate.is_some() && !Util::has_tag_name(parent_of_top_candidate.as_ref(), "BODY") {
|
while parent_of_top_candidate.is_some()
|
||||||
|
&& !Util::has_tag_name(parent_of_top_candidate.as_ref(), "BODY")
|
||||||
|
{
|
||||||
if parent_of_top_candidate
|
if parent_of_top_candidate
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|n| Self::get_content_score(n).is_none())
|
.map(|n| Self::get_content_score(n).is_none())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue