1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 16:15:32 +02:00

don't attempt to redownload embeded images

This commit is contained in:
Jan Lukas Gernert 2019-09-26 21:48:24 +02:00
parent 4f5aef8e17
commit 5f82872d1f

View file

@ -66,20 +66,22 @@ impl ImageDownloader {
evaluate_xpath!(context, xpath, node_vec);
for mut node in node_vec {
if let Some(url) = node.get_property("src") {
if let Ok(url) = url::Url::parse(&url) {
let parent_url = match self.check_image_parent(&node, &url) {
Ok(url) => Some(url),
Err(_) => None,
};
if !url.starts_with("data:") {
if let Ok(url) = url::Url::parse(&url) {
let parent_url = match self.check_image_parent(&node, &url) {
Ok(url) => Some(url),
Err(_) => None,
};
if let Ok((small_image, big_image)) = self.save_image(&url, &parent_url) {
if let Err(_) = node.set_property("src", &small_image) {
return Err(ImageDownloadErrorKind::HtmlParse)?;
}
if let Some(big_image) = big_image {
if let Err(_) = node.set_property("big-src", &big_image) {
if let Ok((small_image, big_image)) = self.save_image(&url, &parent_url) {
if let Err(_) = node.set_property("src", &small_image) {
return Err(ImageDownloadErrorKind::HtmlParse)?;
}
if let Some(big_image) = big_image {
if let Err(_) = node.set_property("big-src", &big_image) {
return Err(ImageDownloadErrorKind::HtmlParse)?;
}
}
}
}
}