From a42ececb2a514dbe02d07f31d50e8ab070e38df7 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Sat, 6 Jun 2020 05:18:25 +0200 Subject: [PATCH] check if final url differs from original even without redirect status --- src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f03ce10..d182428 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,11 +69,12 @@ impl ArticleScraper { .context(ScraperErrorKind::Http)?; // check if url redirects and we need to pick up the new url - let mut url = url; - if let Some(new_url) = ArticleScraper::check_redirect(&response) { + let url = if let Some(new_url) = ArticleScraper::check_redirect(&response, &url) { debug!("Url '{}' redirects to '{}'", url.as_str(), new_url.as_str()); - url = new_url; - } + new_url + } else { + url + }; // check if we are dealing with text/html if !ArticleScraper::check_content_type(&response)? { @@ -381,10 +382,12 @@ impl ArticleScraper { Err(ScraperErrorKind::Http.into()) } - fn check_redirect(response: &Response) -> Option { + fn check_redirect(response: &Response, original_url: &url::Url) -> Option { if response.status() == reqwest::StatusCode::PERMANENT_REDIRECT { debug!("Article url redirects to '{}'", response.url().as_str()); return Some(response.url().clone()); + } else if response.url() != original_url { + return Some(response.url().clone()); } None