mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
parse image objects
This commit is contained in:
parent
572fada104
commit
871b441776
5 changed files with 149 additions and 3 deletions
|
@ -15,10 +15,10 @@ Mod gibt dem 25 Jahr alten Shooter neuen Glanz </p>
|
||||||
<img src="https://www.hardwareluxx.de/images/avatare/HWL_avatar_default.jpg" alt="Portrait des Authors" width="87" height="87"/>
|
<img src="https://www.hardwareluxx.de/images/avatare/HWL_avatar_default.jpg" alt="Portrait des Authors" width="87" height="87"/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
<imageobject><a href="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/prompt_tech_bo/valve_half-life_ray-tracing001_1360px.jpg"><img width="1360" src="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/prompt_tech_bo/valve_half-life_ray-tracing001_1360px.jpg"/></a></imageobject></div>
|
||||||
|
|
||||||
<div id="tocbar">Mod gibt dem 25 Jahr alten Shooter neuen Glanz</div>
|
<div id="tocbar">Mod gibt dem 25 Jahr alten Shooter neuen Glanz</div>
|
||||||
<div id="maincontent" itemprop="articleBody">
|
<div id="maincontent" itemprop="articleBody">
|
||||||
<p>25 Jahre ist es nun her, dass Physiker Gordon Freeman und seine Brechstange zu den Helden eines bis heute legendären Ego-Shooters wurden. Mit Half-Life hat Valve eines der erfolgreichsten Spiele aller Zeiten produziert. Kein Wunder also, dass es bis heute eine aktive Fan-Base rund um den Titel gibt. </p> <p>Während immer wieder Gerüchte um einen eventuellen dritten Teil der Reihe aufflammen, haben Modder im Laufe der Jahre unzählige Mods und Total Conversions für das Ur-Spiel produziert. Unter dem Nickname sultim_t hat nun einer von ihnen Raytracing in Half-Life 1 integriert. Die Bilder zeigen eindrucksvoll, wie stark sich die neue Technik selbst in einem so alten Spiel auf das Erlebnis auswirkt. Neben Lampen und Displays haben auch die Waffen des Protagonisten neue Licht- und Schatteneffekte erhalten.</p> <p>Wer die Mod selbst ausprobieren möchte, benötigt die Originalversion von Half-Life. Nach der Installation über Steam muss im Zielordner erst die Zip-Datei der Mod entpackt werden. Danach die Datei xash3d.exe starten und mit der X-Taste die neuen Render aktivieren. Half Life von 1998 gibt es auf <a href="https://store.steampowered.com/app/70/HalfLife/" target="_blank">Steam</a> aktuell für 8,19 Euro. Die Mod gibt es kostenlos auf der <a href="https://github.com/sultim-t/xash-rt/releases" target="_blank">GitHub-Seite von sultim_t</a>. Dort findet sich auch eine genaue Anleitung zur Installation. </p> <a href="https://www.youtube.com/embed/LQCZTxzW6A0"><img src="http://img.youtube.com/vi/LQCZTxzW6A0/hqdefault.jpg"/></a></div>
|
<p>25 Jahre ist es nun her, dass Physiker Gordon Freeman und seine Brechstange zu den Helden eines bis heute legendären Ego-Shooters wurden. Mit Half-Life hat Valve eines der erfolgreichsten Spiele aller Zeiten produziert. Kein Wunder also, dass es bis heute eine aktive Fan-Base rund um den Titel gibt. </p> <p>Während immer wieder Gerüchte um einen eventuellen dritten Teil der Reihe aufflammen, haben Modder im Laufe der Jahre unzählige Mods und Total Conversions für das Ur-Spiel produziert. Unter dem Nickname sultim_t hat nun einer von ihnen Raytracing in Half-Life 1 integriert. Die Bilder zeigen eindrucksvoll, wie stark sich die neue Technik selbst in einem so alten Spiel auf das Erlebnis auswirkt. Neben Lampen und Displays haben auch die Waffen des Protagonisten neue Licht- und Schatteneffekte erhalten.</p><div data-nav="thumbs" data-width="100%" data-minwidth="320" data-maxheight="510" data-allowfullscreen="true" data-keyboard="true" data-arrows="true" data-click="false" data-swipe="true"> <imageobject><a href="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/grand_branch_i5/valve_half-life_ray-tracing004_300px.jpg"><img width="300" height="168" alt="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" title="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" src="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/grand_branch_i5/valve_half-life_ray-tracing004_300px.jpg"/></a></imageobject><imageobject><a href="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/roomy_segment_9v/valve_half-life_ray-tracing003_300px.jpg"><img width="300" height="168" alt="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" title="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" src="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/roomy_segment_9v/valve_half-life_ray-tracing003_300px.jpg"/></a></imageobject><imageobject><a href="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/noble_sample_tj/valve_half-life_ray-tracing002_300px.jpg"><img width="300" height="168" alt="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" title="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" src="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/noble_sample_tj/valve_half-life_ray-tracing002_300px.jpg"/></a></imageobject><imageobject><a href="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/prompt_tech_bo/valve_half-life_ray-tracing001_300px.jpg"><img width="300" height="168" alt="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" title="Quelle: https://www.pcgamer.com/after-playing-half-life-with-this-new-ray-tracing-mod-i-need-a-cold-shower-in-a-dimly-lit-room/" src="https://www.hardwareluxx.de/images/cdn02/uploads/2023/Apr/prompt_tech_bo/valve_half-life_ray-tracing001_300px.jpg"/></a></imageobject></div> <p>Wer die Mod selbst ausprobieren möchte, benötigt die Originalversion von Half-Life. Nach der Installation über Steam muss im Zielordner erst die Zip-Datei der Mod entpackt werden. Danach die Datei xash3d.exe starten und mit der X-Taste die neuen Render aktivieren. Half Life von 1998 gibt es auf <a href="https://store.steampowered.com/app/70/HalfLife/" target="_blank">Steam</a> aktuell für 8,19 Euro. Die Mod gibt es kostenlos auf der <a href="https://github.com/sultim-t/xash-rt/releases" target="_blank">GitHub-Seite von sultim_t</a>. Dort findet sich auch eine genaue Anleitung zur Installation. </p> <videoobject><h3>Related video</h3><a href="https://www.youtube.com/embed/LQCZTxzW6A0"><img src="http://img.youtube.com/vi/LQCZTxzW6A0/hqdefault.jpg"/></a></videoobject></div>
|
||||||
|
|
||||||
</article></article>
|
</article></article>
|
115
article_scraper/src/image_object.rs
Normal file
115
article_scraper/src/image_object.rs
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
use crate::{full_text_parser::error::FullTextParserError, util::Util};
|
||||||
|
use libxml::tree::Node;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ImageObject {
|
||||||
|
width: Option<u32>,
|
||||||
|
height: Option<u32>,
|
||||||
|
url: Option<Url>,
|
||||||
|
description: Option<String>,
|
||||||
|
name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ImageObject {
|
||||||
|
pub fn parse_node(node: &Node) -> Option<Self> {
|
||||||
|
if node.get_name().to_uppercase() != "DIV" {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let item_prop_image = node
|
||||||
|
.get_attribute("itemprop")
|
||||||
|
.map(|prop| prop == "image")
|
||||||
|
.unwrap_or(false);
|
||||||
|
let item_type_image = node
|
||||||
|
.get_attribute("itemtype")
|
||||||
|
.map(|attr| attr == "https://schema.org/ImageObject")
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
|
if !item_prop_image && !item_type_image {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let meta_nodes = Util::get_elements_by_tag_name(node, "meta");
|
||||||
|
|
||||||
|
let mut width = None;
|
||||||
|
let mut height = None;
|
||||||
|
let mut url = None;
|
||||||
|
let mut description = None;
|
||||||
|
let mut name = None;
|
||||||
|
|
||||||
|
for meta_node in meta_nodes {
|
||||||
|
let item_prop = meta_node.get_attribute("itemprop");
|
||||||
|
let content_prop = meta_node.get_attribute("content");
|
||||||
|
|
||||||
|
if let (Some(item_prop), Some(content_prop)) = (item_prop, content_prop) {
|
||||||
|
if item_prop == "width" {
|
||||||
|
width = content_prop.parse::<u32>().ok();
|
||||||
|
} else if item_prop == "height" {
|
||||||
|
height = content_prop.parse::<u32>().ok();
|
||||||
|
} else if item_prop == "url" {
|
||||||
|
url = Url::parse(&content_prop).ok();
|
||||||
|
} else if item_prop == "description" {
|
||||||
|
description = Some(content_prop);
|
||||||
|
} else if item_prop == "name" {
|
||||||
|
name = Some(content_prop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
url.as_ref()?;
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
url,
|
||||||
|
description,
|
||||||
|
name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn replace(&self, node: &mut Node) -> Result<(), FullTextParserError> {
|
||||||
|
let mut parent = node.get_parent().ok_or(FullTextParserError::Xml)?;
|
||||||
|
|
||||||
|
if parent.get_name().to_uppercase() == "A" {
|
||||||
|
return self.replace(&mut parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
node.unlink();
|
||||||
|
|
||||||
|
let mut root = parent
|
||||||
|
.new_child(None, "imageobject")
|
||||||
|
.map_err(|_| FullTextParserError::Xml)?;
|
||||||
|
|
||||||
|
let mut a = root
|
||||||
|
.new_child(None, "a")
|
||||||
|
.map_err(|_| FullTextParserError::Xml)?;
|
||||||
|
|
||||||
|
let mut img = a
|
||||||
|
.new_child(None, "img")
|
||||||
|
.map_err(|_| FullTextParserError::Xml)?;
|
||||||
|
|
||||||
|
if let Some(width) = self.width {
|
||||||
|
_ = img.set_attribute("width", &width.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(height) = self.height {
|
||||||
|
_ = img.set_attribute("height", &height.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(description) = self.description.as_deref() {
|
||||||
|
_ = img.set_attribute("alt", description);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(name) = self.name.as_deref() {
|
||||||
|
_ = img.set_attribute("title", name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(url) = self.url.as_ref() {
|
||||||
|
_ = a.set_attribute("href", url.as_str());
|
||||||
|
_ = img.set_attribute("src", url.as_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,6 +39,7 @@ pub mod clean;
|
||||||
mod constants;
|
mod constants;
|
||||||
mod error;
|
mod error;
|
||||||
mod full_text_parser;
|
mod full_text_parser;
|
||||||
|
mod image_object;
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub mod images;
|
pub mod images;
|
||||||
mod util;
|
mod util;
|
||||||
|
|
|
@ -13,6 +13,7 @@ use tokio::fs::DirEntry;
|
||||||
use crate::{
|
use crate::{
|
||||||
constants,
|
constants,
|
||||||
full_text_parser::{config::ConfigEntry, error::FullTextParserError},
|
full_text_parser::{config::ConfigEntry, error::FullTextParserError},
|
||||||
|
image_object::ImageObject,
|
||||||
video_object::VideoObject,
|
video_object::VideoObject,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -577,6 +578,8 @@ impl Util {
|
||||||
for mut node in nodes.into_iter().rev() {
|
for mut node in nodes.into_iter().rev() {
|
||||||
if let Some(video_object) = VideoObject::parse_node(&node) {
|
if let Some(video_object) = VideoObject::parse_node(&node) {
|
||||||
_ = video_object.replace(&mut node);
|
_ = video_object.replace(&mut node);
|
||||||
|
} else if let Some(image_object) = ImageObject::parse_node(&node) {
|
||||||
|
_ = image_object.replace(&mut node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -670,6 +673,13 @@ impl Util {
|
||||||
let has_figure_ancestor =
|
let has_figure_ancestor =
|
||||||
Self::has_ancestor_tag(node, "figure", None, None::<fn(&Node) -> bool>);
|
Self::has_ancestor_tag(node, "figure", None, None::<fn(&Node) -> bool>);
|
||||||
|
|
||||||
|
let image_obj_count = Util::get_elements_by_tag_name(node, "imageobject").len();
|
||||||
|
let video_obj_count = Util::get_elements_by_tag_name(node, "videoobject").len();
|
||||||
|
|
||||||
|
if image_obj_count > 0 || video_obj_count > 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
let have_to_remove = (img > 1 && (p as f64 / img as f64) < 0.5 && !has_figure_ancestor)
|
let have_to_remove = (img > 1 && (p as f64 / img as f64) < 0.5 && !has_figure_ancestor)
|
||||||
|| (!is_list && li > p as i64)
|
|| (!is_list && li > p as i64)
|
||||||
|| (input as f64 > f64::floor(p as f64 / 3.0))
|
|| (input as f64 > f64::floor(p as f64 / 3.0))
|
||||||
|
|
|
@ -90,7 +90,27 @@ impl VideoObject {
|
||||||
let mut parent = node.get_parent().ok_or(FullTextParserError::Xml)?;
|
let mut parent = node.get_parent().ok_or(FullTextParserError::Xml)?;
|
||||||
node.unlink();
|
node.unlink();
|
||||||
|
|
||||||
let mut a = parent
|
let mut root = parent
|
||||||
|
.new_child(None, "videoobject")
|
||||||
|
.map_err(|_| FullTextParserError::Xml)?;
|
||||||
|
|
||||||
|
if let Some(name) = self.name.as_deref() {
|
||||||
|
let mut title = root
|
||||||
|
.new_child(None, "h3")
|
||||||
|
.map_err(|_| FullTextParserError::Xml)?;
|
||||||
|
_ = title.set_content(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.name != self.description {
|
||||||
|
if let Some(description) = self.description.as_deref() {
|
||||||
|
let mut desc = root
|
||||||
|
.new_child(None, "p")
|
||||||
|
.map_err(|_| FullTextParserError::Xml)?;
|
||||||
|
_ = desc.set_content(description);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut a = root
|
||||||
.new_child(None, "a")
|
.new_child(None, "a")
|
||||||
.map_err(|_| FullTextParserError::Xml)?;
|
.map_err(|_| FullTextParserError::Xml)?;
|
||||||
if let Some(embed_url) = self.embed_url.as_ref() {
|
if let Some(embed_url) = self.embed_url.as_ref() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue