mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-08 08:30:00 +02:00
don't attempt to redownload embeded images
This commit is contained in:
parent
4f5aef8e17
commit
5f82872d1f
1 changed files with 13 additions and 11 deletions
|
@ -66,20 +66,22 @@ impl ImageDownloader {
|
||||||
evaluate_xpath!(context, xpath, node_vec);
|
evaluate_xpath!(context, xpath, node_vec);
|
||||||
for mut node in node_vec {
|
for mut node in node_vec {
|
||||||
if let Some(url) = node.get_property("src") {
|
if let Some(url) = node.get_property("src") {
|
||||||
if let Ok(url) = url::Url::parse(&url) {
|
if !url.starts_with("data:") {
|
||||||
let parent_url = match self.check_image_parent(&node, &url) {
|
if let Ok(url) = url::Url::parse(&url) {
|
||||||
Ok(url) => Some(url),
|
let parent_url = match self.check_image_parent(&node, &url) {
|
||||||
Err(_) => None,
|
Ok(url) => Some(url),
|
||||||
};
|
Err(_) => None,
|
||||||
|
};
|
||||||
|
|
||||||
if let Ok((small_image, big_image)) = self.save_image(&url, &parent_url) {
|
if let Ok((small_image, big_image)) = self.save_image(&url, &parent_url) {
|
||||||
if let Err(_) = node.set_property("src", &small_image) {
|
if let Err(_) = node.set_property("src", &small_image) {
|
||||||
return Err(ImageDownloadErrorKind::HtmlParse)?;
|
|
||||||
}
|
|
||||||
if let Some(big_image) = big_image {
|
|
||||||
if let Err(_) = node.set_property("big-src", &big_image) {
|
|
||||||
return Err(ImageDownloadErrorKind::HtmlParse)?;
|
return Err(ImageDownloadErrorKind::HtmlParse)?;
|
||||||
}
|
}
|
||||||
|
if let Some(big_image) = big_image {
|
||||||
|
if let Err(_) = node.set_property("big-src", &big_image) {
|
||||||
|
return Err(ImageDownloadErrorKind::HtmlParse)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue