1
0
Fork 0
mirror of https://gitlab.com/news-flash/article_scraper.git synced 2025-07-07 08:05:31 +02:00

readability cli

This commit is contained in:
Jan Lukas Gernert 2023-04-06 08:53:19 +02:00
parent a2719c8c7e
commit 063996d62f
5 changed files with 95 additions and 10 deletions

View file

@ -9,4 +9,7 @@ repository = "https://gitlab.com/news-flash/article_scraper"
[dependencies]
article_scraper = { path = "../article_scraper/" }
clap = { version = "4.2", features = [ "derive" ] }
clap = { version = "4.2", features = [ "derive" ] }
simplelog = "0.12"
log = "0.4"
url = "2.3"

View file

@ -6,25 +6,29 @@ use std::path::PathBuf;
pub struct Args {
/// Turn debug logging on
#[arg(short, long)]
debug: bool,
pub debug: bool,
#[command(subcommand)]
command: Option<Commands>,
pub command: Commands,
/// Destination of resulting HTML file
#[arg(short, long, value_name = "FILE")]
output: Option<PathBuf>,
pub output: Option<PathBuf>,
}
#[derive(Subcommand)]
enum Commands {
pub enum Commands {
/// Only use the Readability parser
Readability {
/// Source HTML file
#[arg(long, value_name = "FILE")]
html: Option<PathBuf>,
/// Source Url
/// Base to complete relative Url
#[arg(long, value_name = "URL")]
base_url: Option<String>,
/// Source Url to download HTML from
#[arg(long, value_name = "URL")]
source_url: Option<String>,
},

View file

@ -1,8 +1,49 @@
use std::{path::PathBuf, process::exit};
use crate::args::{Args, Commands};
use clap::Parser;
use simplelog::{ColorChoice, Config, LevelFilter, TermLogger, TerminalMode};
use url::Url;
mod args;
pub fn main() {
let _args = args::Args::parse();
println!("hello world");
let args = Args::parse();
let level = if args.debug {
LevelFilter::Debug
} else {
LevelFilter::Info
};
TermLogger::init(
level,
Config::default(),
TerminalMode::Mixed,
ColorChoice::Auto,
)
.unwrap();
match args.command {
Commands::Readability {
html,
base_url,
source_url,
} => extract_readability(html, source_url, base_url),
}
log::info!("hello world");
}
fn extract_readability(
html_file: Option<PathBuf>,
source_url: Option<String>,
base_url: Option<String>,
) {
if html_file.is_none() && source_url.is_none() {
log::error!("");
exit(0);
}
let source_url = source_url.map(|url| Url::parse(&url).expect("invalid source url"));
let base_url = base_url.map(|url| Url::parse(&url).expect("invalid base url"));
}