From 3a92585f4dee6bca504a38bc78c10104ebf41ac1 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Wed, 1 Mar 2023 00:42:03 +0100 Subject: [PATCH] use url.join() instead of custom code --- src/full_text_parser/mod.rs | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/src/full_text_parser/mod.rs b/src/full_text_parser/mod.rs index fc2130d..3f72179 100644 --- a/src/full_text_parser/mod.rs +++ b/src/full_text_parser/mod.rs @@ -510,7 +510,7 @@ impl FullTextParser { .unwrap_or(false); if is_relative_url { - let completed_url = Self::complete_url(article_url, &url)?; + let completed_url = article_url.join(&url)?; node.set_attribute(attribute, completed_url.as_str()) .map_err(|_| FullTextParserError::Scrape)?; } @@ -519,31 +519,6 @@ impl FullTextParser { Ok(()) } - fn complete_url( - article_url: &url::Url, - incomplete_url: &str, - ) -> Result { - let mut completed_url = article_url.scheme().to_owned(); - completed_url.push(':'); - - if !incomplete_url.starts_with("//") { - match article_url.host() { - Some(url::Host::Domain(host)) => { - completed_url.push_str("//"); - completed_url.push_str(host); - } - _ => return Err(FullTextParserError::Scrape), - }; - } - - if !completed_url.ends_with('/') && !incomplete_url.starts_with('/') { - completed_url.push('/'); - } - completed_url.push_str(incomplete_url); - let url = url::Url::parse(&completed_url)?; - Ok(url) - } - fn fix_urls(context: &Context, url: &Url) { let _ = Self::repair_urls(context, "//img", "src", url); let _ = Self::repair_urls(context, "//a", "src", url);