use crate::types::FileType;
use crate::{helpers, ErrorKind, Result};
use async_stream::try_stream;
use futures::stream::Stream;
use glob::glob_with;
use jwalk::WalkDir;
use reqwest::Url;
use serde::Serialize;
use shellexpand::tilde;
use std::fmt::Display;
use std::fs;
use std::path::{Path, PathBuf};
use tokio::io::{stdin, AsyncReadExt};
const STDIN: &str = "-";
fn valid_extension(p: &Path) -> bool {
matches!(FileType::from(p), FileType::Markdown | FileType::Html)
}
#[derive(Debug)]
pub struct InputContent {
pub source: InputSource,
pub file_type: FileType,
pub content: String,
}
impl InputContent {
#[must_use]
pub fn from_string(s: &str, file_type: FileType) -> Self {
Self {
source: InputSource::String(s.to_owned()),
file_type,
content: s.to_owned(),
}
}
}
impl TryFrom<&PathBuf> for InputContent {
type Error = crate::ErrorKind;
fn try_from(path: &PathBuf) -> std::result::Result<Self, Self::Error> {
let input =
fs::read_to_string(path).map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))?;
Ok(Self {
source: InputSource::String(input.clone()),
file_type: FileType::from(path),
content: input,
})
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum InputSource {
RemoteUrl(Box<Url>),
FsGlob {
pattern: String,
ignore_case: bool,
},
FsPath(PathBuf),
Stdin,
String(String),
}
impl Serialize for InputSource {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.collect_str(self)
}
}
impl Display for InputSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
Self::RemoteUrl(url) => url.as_str(),
Self::FsGlob { pattern, .. } => pattern,
Self::FsPath(path) => path.to_str().unwrap_or_default(),
Self::Stdin => "stdin",
Self::String(s) => s,
})
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Input {
pub source: InputSource,
pub file_type_hint: Option<FileType>,
pub excluded_paths: Option<Vec<PathBuf>>,
}
impl Input {
pub fn new(
value: &str,
file_type_hint: Option<FileType>,
glob_ignore_case: bool,
excluded_paths: Option<Vec<PathBuf>>,
) -> Result<Self> {
let source = if value == STDIN {
InputSource::Stdin
} else if let Ok(url) = Url::parse(value) {
InputSource::RemoteUrl(Box::new(url))
} else {
let is_glob = glob::Pattern::escape(value) != value;
if is_glob {
InputSource::FsGlob {
pattern: value.to_owned(),
ignore_case: glob_ignore_case,
}
} else {
let path = PathBuf::from(value);
if path.exists() {
InputSource::FsPath(path)
} else if path.is_relative() {
return Err(ErrorKind::FileNotFound(path));
} else {
let url = Url::parse(&format!("http://{value}")).map_err(|e| {
ErrorKind::ParseUrl(e, "Input is not a valid URL".to_string())
})?;
InputSource::RemoteUrl(Box::new(url))
}
}
};
Ok(Self {
source,
file_type_hint,
excluded_paths,
})
}
pub async fn get_contents(
self,
skip_missing: bool,
) -> impl Stream<Item = Result<InputContent>> {
try_stream! {
match self.source {
InputSource::RemoteUrl(ref url) => {
let content = Self::url_contents(url).await;
match content {
Err(_) if skip_missing => (),
Err(e) => Err(e)?,
Ok(content) => yield content,
}
},
InputSource::FsGlob {
ref pattern,
ignore_case,
} => {
for await content in self.glob_contents(pattern, ignore_case).await {
let content = content?;
yield content;
}
}
InputSource::FsPath(ref path) => {
if path.is_dir() {
for entry in WalkDir::new(path).skip_hidden(true)
.process_read_dir(move |_, _, _, children| {
children.retain(|child| {
let entry = match child.as_ref() {
Ok(x) => x,
Err(_) => return true,
};
if self.is_excluded_path(&entry.path()) {
return false;
}
let file_type = entry.file_type();
if file_type.is_dir() {
return true;
}
if file_type.is_symlink() {
return false;
}
if !file_type.is_file() {
return false;
}
return valid_extension(&entry.path());
});
}) {
let entry = entry?;
if entry.file_type().is_dir() {
continue;
}
let content = Self::path_content(entry.path()).await?;
yield content
}
} else {
if self.is_excluded_path(path) {
return ();
}
let content = Self::path_content(path).await;
match content {
Err(_) if skip_missing => (),
Err(e) => Err(e)?,
Ok(content) => yield content,
};
}
},
InputSource::Stdin => {
let content = Self::stdin_content(self.file_type_hint).await?;
yield content;
},
InputSource::String(ref s) => {
let content = Self::string_content(s, self.file_type_hint);
yield content;
},
}
}
}
async fn url_contents(url: &Url) -> Result<InputContent> {
let file_type = if url.path().is_empty() || url.path() == "/" {
FileType::Html
} else {
FileType::from(url.as_str())
};
let res = reqwest::get(url.clone())
.await
.map_err(ErrorKind::NetworkRequest)?;
let input_content = InputContent {
source: InputSource::RemoteUrl(Box::new(url.clone())),
file_type,
content: res.text().await.map_err(ErrorKind::ReadResponseBody)?,
};
Ok(input_content)
}
async fn glob_contents(
&self,
path_glob: &str,
ignore_case: bool,
) -> impl Stream<Item = Result<InputContent>> + '_ {
let glob_expanded = tilde(&path_glob).to_string();
let mut match_opts = glob::MatchOptions::new();
match_opts.case_sensitive = !ignore_case;
try_stream! {
for entry in glob_with(&glob_expanded, match_opts)? {
match entry {
Ok(path) => {
if path.is_dir() {
continue;
}
if self.is_excluded_path(&path) {
continue;
}
let content: InputContent = Self::path_content(&path).await?;
yield content;
}
Err(e) => eprintln!("{e:?}"),
}
}
}
}
fn is_excluded_path(&self, path: &PathBuf) -> bool {
let excluded_paths = match &self.excluded_paths {
Some(excluded) => excluded,
None => return false,
};
is_excluded_path(excluded_paths, path)
}
pub async fn path_content<P: Into<PathBuf> + AsRef<Path> + Clone>(
path: P,
) -> Result<InputContent> {
let path = path.into();
let content = tokio::fs::read_to_string(&path)
.await
.map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))?;
let input_content = InputContent {
file_type: FileType::from(&path),
source: InputSource::FsPath(path),
content,
};
Ok(input_content)
}
async fn stdin_content(file_type_hint: Option<FileType>) -> Result<InputContent> {
let mut content = String::new();
let mut stdin = stdin();
stdin.read_to_string(&mut content).await?;
let input_content = InputContent {
source: InputSource::Stdin,
file_type: file_type_hint.unwrap_or_default(),
content,
};
Ok(input_content)
}
fn string_content(s: &str, file_type_hint: Option<FileType>) -> InputContent {
InputContent::from_string(s, file_type_hint.unwrap_or_default())
}
}
fn is_excluded_path(excluded_paths: &[PathBuf], path: &PathBuf) -> bool {
for excluded in excluded_paths {
if let Ok(true) = helpers::path::contains(excluded, path) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_input_handles_real_relative_paths() {
let test_file = "./Cargo.toml";
let path = Path::new(test_file);
assert!(path.exists());
assert!(path.is_relative());
let input = Input::new(test_file, None, false, None);
assert!(input.is_ok());
assert!(matches!(
input,
Ok(Input {
source: InputSource::FsPath(PathBuf { .. }),
file_type_hint: None,
excluded_paths: None
})
));
}
#[test]
fn test_input_handles_nonexistent_relative_paths() {
let test_file = "./nonexistent/relative/path";
let path = Path::new(test_file);
assert!(!path.exists());
assert!(path.is_relative());
let input = Input::new(test_file, None, false, None);
assert!(input.is_err());
assert!(matches!(
input,
Err(ErrorKind::FileNotFound(PathBuf { .. }))
));
}
#[test]
fn test_valid_extension() {
assert!(valid_extension(Path::new("file.md")));
assert!(valid_extension(Path::new("file.markdown")));
assert!(valid_extension(Path::new("file.html")));
assert!(valid_extension(Path::new("file.htm")));
assert!(valid_extension(Path::new("file.HTM")));
assert!(!valid_extension(Path::new("file.txt")));
assert!(!valid_extension(Path::new("file")));
}
#[test]
fn test_no_exclusions() {
let dir = tempfile::tempdir().unwrap();
assert!(!is_excluded_path(&[], &dir.path().to_path_buf()));
}
#[test]
fn test_excluded() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().to_path_buf();
assert!(is_excluded_path(&[path.clone()], &path));
}
#[test]
fn test_excluded_subdir() {
let parent_dir = tempfile::tempdir().unwrap();
let parent = parent_dir.path();
let child_dir = tempfile::tempdir_in(parent).unwrap();
let child = child_dir.path();
assert!(is_excluded_path(
&[parent.to_path_buf()],
&child.to_path_buf()
));
}
}