From 58a799b096649d7ac9b5518466b16ab2b26c1ca5 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Sun, 12 Mar 2023 11:42:37 +0100 Subject: [PATCH] fix negative regex & fmt --- src/constants.rs | 2 +- src/full_text_parser/readability/mod.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/constants.rs b/src/constants.rs index cc188fd..4244326 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -41,7 +41,7 @@ pub static POSITIVE: Lazy = .expect("POSITIVE regex") }); pub static NEGATIVE: Lazy = Lazy::new(|| { - Regex::new(r#"-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget"#).expect("NEGATIVE regex") + RegexBuilder::new(r#"-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget"#).case_insensitive(true).build().expect("NEGATIVE regex") }); pub static TITLE_SEPARATOR: Lazy = diff --git a/src/full_text_parser/readability/mod.rs b/src/full_text_parser/readability/mod.rs index ee919f6..999ff41 100644 --- a/src/full_text_parser/readability/mod.rs +++ b/src/full_text_parser/readability/mod.rs @@ -355,7 +355,9 @@ impl Readability { // The scores shouldn't get too low. let score_threshold = last_score / 3.0; - while parent_of_top_candidate.is_some() && !Util::has_tag_name(parent_of_top_candidate.as_ref(), "BODY") { + while parent_of_top_candidate.is_some() + && !Util::has_tag_name(parent_of_top_candidate.as_ref(), "BODY") + { if parent_of_top_candidate .as_ref() .map(|n| Self::get_content_score(n).is_none())