mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
check if final url differs from original even without redirect status
This commit is contained in:
parent
3bb8485f40
commit
a42ececb2a
1 changed files with 8 additions and 5 deletions
13
src/lib.rs
13
src/lib.rs
|
@ -69,11 +69,12 @@ impl ArticleScraper {
|
||||||
.context(ScraperErrorKind::Http)?;
|
.context(ScraperErrorKind::Http)?;
|
||||||
|
|
||||||
// check if url redirects and we need to pick up the new url
|
// check if url redirects and we need to pick up the new url
|
||||||
let mut url = url;
|
let url = if let Some(new_url) = ArticleScraper::check_redirect(&response, &url) {
|
||||||
if let Some(new_url) = ArticleScraper::check_redirect(&response) {
|
|
||||||
debug!("Url '{}' redirects to '{}'", url.as_str(), new_url.as_str());
|
debug!("Url '{}' redirects to '{}'", url.as_str(), new_url.as_str());
|
||||||
url = new_url;
|
new_url
|
||||||
}
|
} else {
|
||||||
|
url
|
||||||
|
};
|
||||||
|
|
||||||
// check if we are dealing with text/html
|
// check if we are dealing with text/html
|
||||||
if !ArticleScraper::check_content_type(&response)? {
|
if !ArticleScraper::check_content_type(&response)? {
|
||||||
|
@ -381,10 +382,12 @@ impl ArticleScraper {
|
||||||
Err(ScraperErrorKind::Http.into())
|
Err(ScraperErrorKind::Http.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_redirect(response: &Response) -> Option<url::Url> {
|
fn check_redirect(response: &Response, original_url: &url::Url) -> Option<url::Url> {
|
||||||
if response.status() == reqwest::StatusCode::PERMANENT_REDIRECT {
|
if response.status() == reqwest::StatusCode::PERMANENT_REDIRECT {
|
||||||
debug!("Article url redirects to '{}'", response.url().as_str());
|
debug!("Article url redirects to '{}'", response.url().as_str());
|
||||||
return Some(response.url().clone());
|
return Some(response.url().clone());
|
||||||
|
} else if response.url() != original_url {
|
||||||
|
return Some(response.url().clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
None
|
None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue