mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
fmt
This commit is contained in:
parent
69659da983
commit
7b205e8e27
1 changed files with 5 additions and 7 deletions
12
src/lib.rs
12
src/lib.rs
|
@ -168,8 +168,7 @@ impl ArticleScraper {
|
|||
xpath_single_page_link
|
||||
);
|
||||
|
||||
if let Some(single_page_url) = Util::find_page_url(&xpath_ctx, xpath_single_page_link)
|
||||
{
|
||||
if let Some(single_page_url) = Util::find_page_url(&xpath_ctx, xpath_single_page_link) {
|
||||
// parse again with single page url
|
||||
debug!("Single page link found '{}'", single_page_url);
|
||||
|
||||
|
@ -574,15 +573,14 @@ impl ArticleScraper {
|
|||
// strip elements using Readability.com and Instapaper.com ignore class names
|
||||
// .entry-unrelated and .instapaper_ignore
|
||||
// See http://blog.instapaper.com/post/730281947
|
||||
let _ = Util::strip_node(context,
|
||||
"//*[contains(@class,' entry-unrelated ') or contains(@class,' instapaper_ignore ')]");
|
||||
|
||||
// strip elements that contain style="display: none;"
|
||||
let _ = Util::strip_node(
|
||||
context,
|
||||
"//*[contains(@style,'display:none')]",
|
||||
"//*[contains(@class,' entry-unrelated ') or contains(@class,' instapaper_ignore ')]",
|
||||
);
|
||||
|
||||
// strip elements that contain style="display: none;"
|
||||
let _ = Util::strip_node(context, "//*[contains(@style,'display:none')]");
|
||||
|
||||
// strip all comments
|
||||
let _ = Util::strip_node(context, "//comment()");
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue