mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 08:05:31 +02:00
dont clean video tags
This commit is contained in:
parent
bf7a89fef7
commit
db007f752c
2 changed files with 79 additions and 6 deletions
File diff suppressed because one or more lines are too long
|
@ -738,11 +738,6 @@ impl Util {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For embed with <object> tag, check inner HTML as well.
|
|
||||||
// if embed_node.get_name().to_lowercase() == "object" && constants::VIDEOS.is_match(embed_node.innerHTML) {
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
|
|
||||||
embed_count += 1;
|
embed_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -755,8 +750,9 @@ impl Util {
|
||||||
|
|
||||||
let image_obj_count = Util::get_elements_by_tag_name(node, "imageobject").len();
|
let image_obj_count = Util::get_elements_by_tag_name(node, "imageobject").len();
|
||||||
let video_obj_count = Util::get_elements_by_tag_name(node, "videoobject").len();
|
let video_obj_count = Util::get_elements_by_tag_name(node, "videoobject").len();
|
||||||
|
let video_tag_count = Util::get_elements_by_tag_name(node, "video").len();
|
||||||
|
|
||||||
if image_obj_count > 0 || video_obj_count > 0 {
|
if image_obj_count > 0 || video_obj_count > 0 || video_tag_count > 0 {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue