mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
readability cli
This commit is contained in:
parent
a2719c8c7e
commit
063996d62f
5 changed files with 95 additions and 10 deletions
|
@ -9,4 +9,7 @@ repository = "https://gitlab.com/news-flash/article_scraper"
|
|||
|
||||
[dependencies]
|
||||
article_scraper = { path = "../article_scraper/" }
|
||||
clap = { version = "4.2", features = [ "derive" ] }
|
||||
clap = { version = "4.2", features = [ "derive" ] }
|
||||
simplelog = "0.12"
|
||||
log = "0.4"
|
||||
url = "2.3"
|
|
@ -6,25 +6,29 @@ use std::path::PathBuf;
|
|||
pub struct Args {
|
||||
/// Turn debug logging on
|
||||
#[arg(short, long)]
|
||||
debug: bool,
|
||||
pub debug: bool,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Option<Commands>,
|
||||
pub command: Commands,
|
||||
|
||||
/// Destination of resulting HTML file
|
||||
#[arg(short, long, value_name = "FILE")]
|
||||
output: Option<PathBuf>,
|
||||
pub output: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
pub enum Commands {
|
||||
/// Only use the Readability parser
|
||||
Readability {
|
||||
/// Source HTML file
|
||||
#[arg(long, value_name = "FILE")]
|
||||
html: Option<PathBuf>,
|
||||
|
||||
/// Source Url
|
||||
/// Base to complete relative Url
|
||||
#[arg(long, value_name = "URL")]
|
||||
base_url: Option<String>,
|
||||
|
||||
/// Source Url to download HTML from
|
||||
#[arg(long, value_name = "URL")]
|
||||
source_url: Option<String>,
|
||||
},
|
||||
|
|
|
@ -1,8 +1,49 @@
|
|||
use std::{path::PathBuf, process::exit};
|
||||
|
||||
use crate::args::{Args, Commands};
|
||||
use clap::Parser;
|
||||
use simplelog::{ColorChoice, Config, LevelFilter, TermLogger, TerminalMode};
|
||||
use url::Url;
|
||||
|
||||
mod args;
|
||||
|
||||
pub fn main() {
|
||||
let _args = args::Args::parse();
|
||||
println!("hello world");
|
||||
let args = Args::parse();
|
||||
|
||||
let level = if args.debug {
|
||||
LevelFilter::Debug
|
||||
} else {
|
||||
LevelFilter::Info
|
||||
};
|
||||
TermLogger::init(
|
||||
level,
|
||||
Config::default(),
|
||||
TerminalMode::Mixed,
|
||||
ColorChoice::Auto,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
match args.command {
|
||||
Commands::Readability {
|
||||
html,
|
||||
base_url,
|
||||
source_url,
|
||||
} => extract_readability(html, source_url, base_url),
|
||||
}
|
||||
|
||||
log::info!("hello world");
|
||||
}
|
||||
|
||||
fn extract_readability(
|
||||
html_file: Option<PathBuf>,
|
||||
source_url: Option<String>,
|
||||
base_url: Option<String>,
|
||||
) {
|
||||
if html_file.is_none() && source_url.is_none() {
|
||||
log::error!("");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
let source_url = source_url.map(|url| Url::parse(&url).expect("invalid source url"));
|
||||
let base_url = base_url.map(|url| Url::parse(&url).expect("invalid base url"));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue