use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use anyhow::{Context as _, Result};
use regex::Regex;
fn url_scheme_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"^[A-Za-z][A-Za-z0-9+\-.]*://").expect("static regex"))
}
pub fn looks_like_url(s: &str) -> bool {
url_scheme_re().is_match(s)
}
fn custom_scheme_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"^[A-Za-z][A-Za-z0-9+\-.]+:[^\\/]").expect("static regex"))
}
pub fn looks_like_custom_scheme(s: &str) -> bool {
custom_scheme_re().is_match(s)
}
pub fn has_invalid_path_chars(s: &str) -> bool {
s.chars()
.any(|c| matches!(c, '<' | '>' | '"' | '|' | '?' | '*') || (c as u32) < 0x20)
}
#[derive(Debug, Clone)]
pub enum Input {
File(PathBuf),
Url(url::Url),
Raw(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum, serde::Deserialize)]
#[clap(rename_all = "lower")]
#[serde(rename_all = "lowercase")]
pub enum InputKind {
File,
Url,
Raw,
}
impl Input {
#[allow(dead_code)]
pub fn from_arg(raw: &str) -> Result<Self> {
Self::from_arg_as(raw, None)
}
pub fn from_arg_as(raw: &str, force: Option<InputKind>) -> Result<Self> {
match force {
Some(InputKind::Url) => {
let u = url::Url::parse(raw).with_context(|| format!("invalid URL: {raw}"))?;
Ok(Input::Url(u))
}
Some(InputKind::File) => {
let p = PathBuf::from(raw)
.canonicalize()
.or_else(|_| std::path::absolute(raw))
.unwrap_or_else(|_| PathBuf::from(raw));
Ok(Input::File(p))
}
Some(InputKind::Raw) => Ok(Input::Raw(raw.to_string())),
None => {
if looks_like_url(raw) {
let u = url::Url::parse(raw).with_context(|| format!("invalid URL: {raw}"))?;
return Ok(Input::Url(u));
}
if let Ok(p) = PathBuf::from(raw).canonicalize() {
return Ok(Input::File(p));
}
if looks_like_custom_scheme(raw) {
return Ok(Input::Raw(raw.to_string()));
}
if raw.is_empty() || has_invalid_path_chars(raw) {
return Ok(Input::Raw(raw.to_string()));
}
let abs = std::path::absolute(raw).unwrap_or_else(|_| PathBuf::from(raw));
Ok(Input::File(abs))
}
}
}
pub fn kind(&self) -> InputKind {
match self {
Input::File(_) => InputKind::File,
Input::Url(_) => InputKind::Url,
Input::Raw(_) => InputKind::Raw,
}
}
pub fn match_string(&self) -> String {
match self {
Input::File(p) => crate::matcher::normalize_path(p),
Input::Url(u) => u.as_str().to_string(),
Input::Raw(s) => s.clone(),
}
}
pub fn display_string(&self) -> String {
match self {
Input::File(p) => crate::matcher::strip_verbatim(&p.to_string_lossy()),
Input::Url(u) => u.as_str().to_string(),
Input::Raw(s) => s.clone(),
}
}
pub fn as_file(&self) -> Option<&Path> {
match self {
Input::File(p) => Some(p),
_ => None,
}
}
#[allow(dead_code)]
pub fn as_url(&self) -> Option<&url::Url> {
match self {
Input::Url(u) => Some(u),
_ => None,
}
}
#[allow(dead_code)]
pub fn as_raw(&self) -> Option<&str> {
match self {
Input::Raw(s) => Some(s.as_str()),
_ => None,
}
}
pub fn kind_label(&self) -> &'static str {
match self {
Input::File(_) => "file",
Input::Url(_) => "url",
Input::Raw(_) => "raw",
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn url_detection() {
assert!(looks_like_url("http://example.com"));
assert!(looks_like_url("https://example.com/path"));
assert!(looks_like_url("ftp://host"));
assert!(looks_like_url("file:///etc/hosts"));
assert!(!looks_like_url("C:\\Users\\x\\file.txt"));
assert!(!looks_like_url("/home/x/file.txt"));
assert!(!looks_like_url("./relative"));
}
#[test]
fn custom_scheme_args_classify_as_raw() {
let i = Input::from_arg("issue:1234").unwrap();
assert!(matches!(i, Input::Raw(_)));
assert_eq!(i.kind_label(), "raw");
assert_eq!(i.match_string(), "issue:1234");
let i = Input::from_arg("gh:owner/repo").unwrap();
assert!(matches!(i, Input::Raw(_)));
}
#[test]
fn nonexistent_path_like_args_classify_as_file() {
for s in [
"/tmp/does-not-exist-todoke-test.md",
"newfile.txt",
"./relative-new.log",
] {
let i = Input::from_arg(s).unwrap();
assert!(matches!(i, Input::File(_)), "{s} should be File");
}
}
#[test]
fn extensionless_bare_words_classify_as_file() {
for s in ["Makefile", "Dockerfile", "HEAD", "main", "some-bare-word"] {
let i = Input::from_arg(s).unwrap();
assert!(matches!(i, Input::File(_)), "{s} should be File");
}
}
#[test]
fn invalid_path_chars_route_to_raw() {
for s in ["foo|bar", "a?b", "star*arg", "quote\"it", "<tag>"] {
let i = Input::from_arg(s).unwrap();
assert!(matches!(i, Input::Raw(_)), "{s} should be Raw");
}
}
#[test]
fn has_invalid_path_chars_detects_reserved() {
assert!(has_invalid_path_chars("foo|bar"));
assert!(has_invalid_path_chars("a?b"));
assert!(has_invalid_path_chars("a\x01b"));
assert!(!has_invalid_path_chars("C:\\Users\\x"));
assert!(!has_invalid_path_chars("/abs/path"));
assert!(!has_invalid_path_chars("Makefile"));
}
#[test]
fn looks_like_custom_scheme_cases() {
assert!(looks_like_custom_scheme("issue:42"));
assert!(looks_like_custom_scheme("gh:owner/repo"));
assert!(looks_like_custom_scheme("jira:ABC-1"));
assert!(!looks_like_custom_scheme("C:\\Users\\x"));
assert!(!looks_like_custom_scheme("D:foo"));
assert!(!looks_like_custom_scheme("Makefile"));
assert!(!looks_like_custom_scheme("HEAD"));
assert!(!looks_like_custom_scheme("scheme:/slash"));
}
#[test]
fn force_as_file_absolutizes_nonexistent() {
let i = Input::from_arg_as("nonexistent-todoke-test.md", Some(InputKind::File)).unwrap();
assert!(matches!(i, Input::File(_)));
}
#[test]
fn force_as_raw_skips_canonicalize() {
let i = Input::from_arg_as(".", Some(InputKind::Raw)).unwrap();
assert!(matches!(i, Input::Raw(_)));
assert_eq!(i.display_string(), ".");
}
#[test]
fn url_still_detected_first() {
let i = Input::from_arg("https://example.com/foo").unwrap();
assert!(matches!(i, Input::Url(_)));
}
}