mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
fmt
This commit is contained in:
parent
69659da983
commit
7b205e8e27
1 changed files with 5 additions and 7 deletions
12
src/lib.rs
12
src/lib.rs
|
@ -168,8 +168,7 @@ impl ArticleScraper {
|
||||||
xpath_single_page_link
|
xpath_single_page_link
|
||||||
);
|
);
|
||||||
|
|
||||||
if let Some(single_page_url) = Util::find_page_url(&xpath_ctx, xpath_single_page_link)
|
if let Some(single_page_url) = Util::find_page_url(&xpath_ctx, xpath_single_page_link) {
|
||||||
{
|
|
||||||
// parse again with single page url
|
// parse again with single page url
|
||||||
debug!("Single page link found '{}'", single_page_url);
|
debug!("Single page link found '{}'", single_page_url);
|
||||||
|
|
||||||
|
@ -574,15 +573,14 @@ impl ArticleScraper {
|
||||||
// strip elements using Readability.com and Instapaper.com ignore class names
|
// strip elements using Readability.com and Instapaper.com ignore class names
|
||||||
// .entry-unrelated and .instapaper_ignore
|
// .entry-unrelated and .instapaper_ignore
|
||||||
// See http://blog.instapaper.com/post/730281947
|
// See http://blog.instapaper.com/post/730281947
|
||||||
let _ = Util::strip_node(context,
|
|
||||||
"//*[contains(@class,' entry-unrelated ') or contains(@class,' instapaper_ignore ')]");
|
|
||||||
|
|
||||||
// strip elements that contain style="display: none;"
|
|
||||||
let _ = Util::strip_node(
|
let _ = Util::strip_node(
|
||||||
context,
|
context,
|
||||||
"//*[contains(@style,'display:none')]",
|
"//*[contains(@class,' entry-unrelated ') or contains(@class,' instapaper_ignore ')]",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// strip elements that contain style="display: none;"
|
||||||
|
let _ = Util::strip_node(context, "//*[contains(@style,'display:none')]");
|
||||||
|
|
||||||
// strip all comments
|
// strip all comments
|
||||||
let _ = Util::strip_node(context, "//comment()");
|
let _ = Util::strip_node(context, "//comment()");
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue