mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
don't attempt to redownload embeded images
This commit is contained in:
parent
4f5aef8e17
commit
5f82872d1f
1 changed files with 13 additions and 11 deletions
|
@ -66,20 +66,22 @@ impl ImageDownloader {
|
|||
evaluate_xpath!(context, xpath, node_vec);
|
||||
for mut node in node_vec {
|
||||
if let Some(url) = node.get_property("src") {
|
||||
if let Ok(url) = url::Url::parse(&url) {
|
||||
let parent_url = match self.check_image_parent(&node, &url) {
|
||||
Ok(url) => Some(url),
|
||||
Err(_) => None,
|
||||
};
|
||||
if !url.starts_with("data:") {
|
||||
if let Ok(url) = url::Url::parse(&url) {
|
||||
let parent_url = match self.check_image_parent(&node, &url) {
|
||||
Ok(url) => Some(url),
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
if let Ok((small_image, big_image)) = self.save_image(&url, &parent_url) {
|
||||
if let Err(_) = node.set_property("src", &small_image) {
|
||||
return Err(ImageDownloadErrorKind::HtmlParse)?;
|
||||
}
|
||||
if let Some(big_image) = big_image {
|
||||
if let Err(_) = node.set_property("big-src", &big_image) {
|
||||
if let Ok((small_image, big_image)) = self.save_image(&url, &parent_url) {
|
||||
if let Err(_) = node.set_property("src", &small_image) {
|
||||
return Err(ImageDownloadErrorKind::HtmlParse)?;
|
||||
}
|
||||
if let Some(big_image) = big_image {
|
||||
if let Err(_) = node.set_property("big-src", &big_image) {
|
||||
return Err(ImageDownloadErrorKind::HtmlParse)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue