#![warn(clippy::all, clippy::pedantic)]
#![warn(
absolute_paths_not_starting_with_crate,
rustdoc::invalid_html_tags,
missing_copy_implementations,
missing_debug_implementations,
semicolon_in_expressions_from_macros,
unreachable_pub,
unused_extern_crates,
variant_size_differences,
clippy::missing_const_for_fn
)]
#![deny(anonymous_parameters, macro_use_extern_crate)]
#![deny(missing_docs)]
use std::fs::{self, File};
use std::io::{self, BufRead, BufReader, ErrorKind, IsTerminal, stdin};
use std::num::NonZeroUsize;
use anyhow::{Context, Error, Result};
use clap::{Parser, crate_version};
use commands::{CommandParams, generate};
use formatters::log::init_logging;
use http::HeaderMap;
use log::{error, info, warn};
use lychee_lib::filter::PathExcludes;
use config::HeaderMapExt;
use ring as _;
use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;
mod cache;
mod client;
mod commands;
mod config;
mod files_from;
mod formatters;
mod hints;
mod parse;
mod progress;
mod time;
mod verbosity;
use crate::formatters::stats::{OutputStats, output_hints, output_statistics};
use crate::{
cache::Cache,
config::{Config, LYCHEE_CACHE_FILE, LYCHEE_IGNORE_FILE, LycheeOptions},
formatters::duration::Duration,
generate::generate,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ExitCode {
Success = 0,
#[allow(unused)]
UnexpectedFailure = 1,
LinkCheckFailure = 2,
ConfigFile = 3,
}
const LYCHEEIGNORE_COMMENT_MARKER: &str = "#";
fn main() -> Result<()> {
let exit_code = run_main()?;
std::process::exit(exit_code);
}
fn read_lines(file: &File) -> Result<Vec<String>> {
let lines: Vec<_> = BufReader::new(file).lines().collect::<Result<_, _>>()?;
Ok(lines
.into_iter()
.filter(|line| {
!line.is_empty() && !line.trim_start().starts_with(LYCHEEIGNORE_COMMENT_MARKER)
})
.collect())
}
fn handle_fd_limits(opts: &mut LycheeOptions) {
use rlimit::increase_nofile_limit;
const BASELINE_OVERHEAD: u64 = 20;
if let Ok(soft_limit) = increase_nofile_limit(u64::MAX) {
#[expect(
clippy::cast_possible_truncation,
reason = "max_concurrency is small in practice"
)]
let concurrency = soft_limit.saturating_sub(BASELINE_OVERHEAD) as usize;
let concurrency = NonZeroUsize::new(concurrency).unwrap_or(NonZeroUsize::MIN);
let requested_concurrency = opts.config.max_concurrency();
if requested_concurrency > concurrency {
warn!(
"System file descriptor limit is {soft_limit} which is too low for the requested \
concurrency of {requested_concurrency}. Lowering `max_concurrency` to \
{concurrency} to prevent 'Too many open files' errors.",
);
opts.config.set_max_concurrency(concurrency);
}
}
}
fn load_config() -> Result<LycheeOptions> {
let mut opts = LycheeOptions::parse();
init_logging(&opts.config.verbose(), &opts.config.mode());
if opts.config_files.is_empty() {
if let Some(default_config) = config::loaders::default_config_file()? {
opts.config = opts.config.merge(default_config);
}
} else {
let configs = opts.config_files.iter().rev(); for config_file in configs {
opts.config = opts.config.merge_file(config_file)?;
}
}
if let Ok(lycheeignore) = File::open(LYCHEE_IGNORE_FILE) {
opts.config.exclude.append(&mut read_lines(&lycheeignore)?);
}
if !&opts.config.exclude_file.is_empty() {
warn!(
"WARNING: `--exclude-file` is deprecated and will soon be removed; use the `{LYCHEE_IGNORE_FILE}` file to ignore URL patterns instead. To exclude paths of files and directories, use `--exclude-path`."
);
}
if opts.config.base.is_some() {
warn!(
"WARNING: `--base` is deprecated and will soon be removed; use `--base-url` instead."
);
}
for path in &opts.config.exclude_file {
let file = File::open(path)?;
opts.config.exclude.append(&mut read_lines(&file)?);
}
handle_fd_limits(&mut opts);
Ok(opts)
}
fn load_cookie_jar(cfg: &Config) -> Result<Option<CookieJar>> {
match &cfg.cookie_jar {
Some(path) => Ok(CookieJar::load(path.clone()).map(Some)?),
None => Ok(None),
}
}
#[must_use]
fn load_cache(cfg: &Config) -> Option<Cache> {
if !cfg.cache() {
return None;
}
let max_cache_age = cfg.max_cache_age();
match fs::metadata(LYCHEE_CACHE_FILE) {
Err(_e) => {
return None;
}
Ok(metadata) => {
let modified = metadata.modified().ok()?;
let elapsed = modified.elapsed().ok()?;
if elapsed > max_cache_age {
warn!(
"Cache is too old (age: {}, max age: {}). Discarding and recreating.",
Duration::from_secs(elapsed.as_secs()),
Duration::from_secs(max_cache_age.as_secs())
);
return None;
}
info!(
"Cache is recent (age: {}, max age: {}). Using.",
Duration::from_secs(elapsed.as_secs()),
Duration::from_secs(max_cache_age.as_secs())
);
}
}
let cache = Cache::load(
LYCHEE_CACHE_FILE,
max_cache_age.as_secs(),
&cfg.cache_exclude_status(),
);
match cache {
Ok(cache) => Some(cache),
Err(e) => {
warn!("Error while loading cache: {e}. Continuing without.");
None
}
}
}
fn run_main() -> Result<i32> {
use std::process::exit;
let opts = match load_config() {
Ok(opts) => opts,
Err(e) => {
error!(
"Error while loading config: {:?}\n\
See: https://github.com/lycheeverse/lychee/blob/lychee-v{}/lychee.example.toml",
e,
crate_version!()
);
exit(ExitCode::ConfigFile as i32);
}
};
if let Some(output) = &opts.config.output {
let parent = output.parent().filter(|p| !p.as_os_str().is_empty());
if let Some(parent) = parent
&& !parent.exists()
{
error!(
"Output path `{}` is not writable: parent directory `{}` does not exist",
output.display(),
parent.display()
);
exit(ExitCode::UnexpectedFailure as i32);
}
}
if let Some(mode) = opts.config.generate {
print!("{}", generate(&mode)?);
exit(ExitCode::Success as i32);
}
let runtime = match opts.config.threads {
Some(threads) => {
tokio::runtime::Builder::new_multi_thread()
.worker_threads(threads.get())
.enable_all()
.build()?
}
None => tokio::runtime::Runtime::new()?,
};
match runtime.block_on(run(&opts)) {
Err(e) if Some(ErrorKind::BrokenPipe) == underlying_io_error_kind(&e) => {
exit(ExitCode::Success as i32);
}
res => res,
}
}
fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> {
for cause in error.chain() {
if let Some(io_error) = cause.downcast_ref::<io::Error>() {
return Some(io_error.kind());
}
}
None
}
async fn run(opts: &LycheeOptions) -> Result<i32> {
let inputs = opts.inputs()?;
let is_stdin_input = inputs.len() == 1
&& inputs
.iter()
.any(|input| matches!(input.source, lychee_lib::InputSource::Stdin))
&& stdin().is_terminal();
let base = match (opts.config.base.clone(), opts.config.base_url.clone()) {
(None, base_url) => base_url,
(base, None) => base,
(_base, base_url) => {
warn!(
"WARNING: Both, `--base` and `--base-url` are set. Using `base-url` and ignoring `--base` (as it's deprecated)."
);
base_url
}
};
if opts.config.dump_inputs() {
let exit_code = commands::dump_inputs(
inputs,
opts.config.output.as_ref(),
&opts.config.exclude_path,
&opts.config.extensions(),
!opts.config.hidden(),
!opts.config.no_ignore(),
)
.await?;
return Ok(exit_code as i32);
}
let cache = load_cache(&opts.config).unwrap_or_default();
let cookie_jar = load_cookie_jar(&opts.config).with_context(|| {
format!(
"Cannot load cookie jar from path `{}`",
opts.config
.cookie_jar
.as_ref()
.map_or_else(|| "<none>".to_string(), |p| p.display().to_string())
)
})?;
let client = client::create(&opts.config, cookie_jar.as_deref())?;
let mut collector = Collector::new(opts.config.root_dir.clone(), base.unwrap_or_default())?
.skip_missing_inputs(opts.config.skip_missing())
.skip_hidden(!opts.config.hidden())
.skip_ignored(!opts.config.no_ignore())
.include_verbatim(opts.config.include_verbatim())
.headers(HeaderMap::from_header_pairs(&opts.config.headers())?)
.excluded_paths(PathExcludes::new(opts.config.exclude_path.clone())?)
.use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").is_ok_and(|x| x == "1"))
.include_wikilinks(opts.config.include_wikilinks())
.preprocessor(opts.config.preprocess.clone())
.host_pool(client.host_pool());
collector = if let Some(ref basic_auth) = opts.config.basic_auth {
collector.basic_auth_extractor(BasicAuthExtractor::new(basic_auth)?)
} else {
collector
};
let requests = collector.collect_links_from_file_types(inputs, opts.config.extensions());
let params = CommandParams {
client,
cache,
requests,
cfg: opts.config.clone(),
is_stdin_input,
};
let exit_code = if opts.config.dump() {
commands::dump(params).await?
} else {
let (response_stats, cache, exit_code, host_pool) = commands::check(params).await?;
hints::handle_stats(&response_stats, &opts.config);
let stats = OutputStats {
response_stats,
host_stats: opts
.config
.host_stats()
.then_some(host_pool.all_host_stats()),
};
output_statistics(stats, &opts.config)?;
if opts.config.cache() {
cache.store(LYCHEE_CACHE_FILE)?;
}
if let Some(cookie_jar) = cookie_jar.as_ref() {
info!("Saving cookie jar");
cookie_jar.save().context("Cannot save cookie jar")?;
}
output_hints(&opts.config);
exit_code
};
Ok(exit_code as i32)
}