mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-08 08:30:00 +02:00
move stuff around
This commit is contained in:
parent
90383545e0
commit
c08f5afa5d
6 changed files with 84 additions and 81 deletions
|
@ -1,4 +1,5 @@
|
|||
use chrono::{DateTime, Utc};
|
||||
use libxml::tree::{Document, SaveOptions};
|
||||
use std::fs::File;
|
||||
use std::io::{Error, ErrorKind, Write};
|
||||
use std::path::PathBuf;
|
||||
|
@ -9,13 +10,31 @@ pub struct Article {
|
|||
pub author: Option<String>,
|
||||
pub url: Url,
|
||||
pub date: Option<DateTime<Utc>>,
|
||||
pub html: Option<String>,
|
||||
pub thumbnail_url: Option<String>,
|
||||
pub document: Option<Document>,
|
||||
}
|
||||
|
||||
impl Article {
|
||||
pub fn save_html(&self, path: &PathBuf) -> Result<(), Error> {
|
||||
if let Some(ref html) = self.html {
|
||||
pub fn get_content(&self) -> Option<String> {
|
||||
// serialize content
|
||||
let options = SaveOptions {
|
||||
format: false,
|
||||
no_declaration: false,
|
||||
no_empty_tags: true,
|
||||
no_xhtml: false,
|
||||
xhtml: false,
|
||||
as_xml: false,
|
||||
as_html: true,
|
||||
non_significant_whitespace: false,
|
||||
};
|
||||
self.document
|
||||
.as_ref()
|
||||
.map(|doc| doc.to_string_with_options(options))
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn save_html(&self, path: &PathBuf) -> Result<(), Error> {
|
||||
if let Some(ref html) = self.get_content() {
|
||||
if let Ok(()) = std::fs::create_dir_all(path) {
|
||||
let mut file_name = match self.title.clone() {
|
||||
Some(file_name) => file_name.replace('/', "_"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue