1use anyhow::Result;
7use std::collections::BTreeSet;
8use std::fs;
9use std::path::{Component, Path, PathBuf};
10use tokei::Languages;
11
12use tokmd_settings::ScanOptions;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct InMemoryFile {
17 pub path: PathBuf,
18 pub bytes: Vec<u8>,
19}
20
21impl InMemoryFile {
22 #[must_use]
23 pub fn new(path: impl Into<PathBuf>, bytes: impl Into<Vec<u8>>) -> Self {
24 Self {
25 path: path.into(),
26 bytes: bytes.into(),
27 }
28 }
29}
30
31#[derive(Debug)]
41pub struct MaterializedScan {
42 languages: Languages,
43 logical_paths: Vec<PathBuf>,
44 root: tempfile::TempDir,
45}
46
47impl MaterializedScan {
48 #[must_use]
49 pub fn languages(&self) -> &Languages {
50 &self.languages
51 }
52
53 #[must_use]
54 pub fn logical_paths(&self) -> &[PathBuf] {
55 &self.logical_paths
56 }
57
58 #[must_use]
59 pub fn strip_prefix(&self) -> &Path {
60 self.root.path()
61 }
62}
63
64pub fn normalize_in_memory_paths(inputs: &[InMemoryFile]) -> Result<Vec<PathBuf>> {
69 normalize_logical_paths(inputs, true)
70}
71
72pub fn scan_in_memory(inputs: &[InMemoryFile], args: &ScanOptions) -> Result<MaterializedScan> {
73 let root = tempfile::tempdir()?;
74 let logical_paths = normalize_in_memory_paths(inputs)?;
75
76 for (logical_path, input) in logical_paths.iter().zip(inputs) {
77 let full_path = root.path().join(logical_path);
78 if let Some(parent) = full_path.parent() {
79 fs::create_dir_all(parent)?;
80 }
81 fs::write(full_path, &input.bytes)?;
82 }
83
84 let scan_root = vec![root.path().to_path_buf()];
85 let languages = crate::scan(&scan_root, args)?;
86
87 Ok(MaterializedScan {
88 languages,
89 logical_paths,
90 root,
91 })
92}
93
94fn normalize_logical_paths(
95 inputs: &[InMemoryFile],
96 case_insensitive: bool,
97) -> Result<Vec<PathBuf>> {
98 let mut seen = BTreeSet::new();
99 let mut normalized = Vec::with_capacity(inputs.len());
100
101 for input in inputs {
102 let logical_path = normalize_logical_path(&input.path)?;
103 if !seen.insert(logical_path_key(&logical_path, case_insensitive)) {
104 anyhow::bail!("Duplicate in-memory path: {}", logical_path.display());
105 }
106 normalized.push(logical_path);
107 }
108
109 Ok(normalized)
110}
111
112fn logical_path_key(path: &Path, case_insensitive: bool) -> String {
113 let rendered = path.to_string_lossy();
114 if case_insensitive {
115 rendered.to_lowercase()
116 } else {
117 rendered.into_owned()
118 }
119}
120
121fn normalize_logical_path(path: &Path) -> Result<PathBuf> {
122 if path.as_os_str().is_empty() {
123 anyhow::bail!("In-memory path must not be empty");
124 }
125
126 let mut normalized = PathBuf::new();
127 for component in path.components() {
128 match component {
129 Component::Normal(segment) => normalized.push(segment),
130 Component::CurDir => {}
131 Component::ParentDir => {
132 anyhow::bail!(
133 "In-memory path must not contain parent traversal: {}",
134 path.display()
135 );
136 }
137 Component::RootDir | Component::Prefix(_) => {
138 anyhow::bail!("In-memory path must be relative: {}", path.display());
139 }
140 }
141 }
142
143 if normalized.as_os_str().is_empty() {
144 anyhow::bail!("In-memory path must resolve to a file: {}", path.display());
145 }
146
147 Ok(normalized)
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 #[test]
155 fn normalize_logical_path_strips_dot_segments() -> Result<()> {
156 let normalized = normalize_logical_path(Path::new("./src/./lib.rs"))?;
157 assert_eq!(normalized, PathBuf::from("src/lib.rs"));
158 Ok(())
159 }
160
161 #[test]
162 fn normalize_logical_path_rejects_absolute_paths() {
163 let err = normalize_logical_path(Path::new("/src/lib.rs")).unwrap_err();
164 assert!(err.to_string().contains("must be relative"));
165 }
166
167 #[test]
168 fn normalize_logical_path_rejects_parent_traversal() {
169 let err = normalize_logical_path(Path::new("../src/lib.rs")).unwrap_err();
170 assert!(err.to_string().contains("parent traversal"));
171 }
172
173 #[test]
174 fn normalize_logical_paths_rejects_duplicate_after_normalization() {
175 let inputs = vec![
176 InMemoryFile::new("./src/lib.rs", "fn main() {}\n"),
177 InMemoryFile::new("src/lib.rs", "fn main() {}\n"),
178 ];
179
180 let err = normalize_logical_paths(&inputs, false).unwrap_err();
181 assert!(err.to_string().contains("Duplicate in-memory path"));
182 }
183
184 #[test]
185 fn normalize_logical_paths_rejects_case_only_collision_on_case_insensitive_fs() {
186 let inputs = vec![
187 InMemoryFile::new("src/lib.rs", "fn main() {}\n"),
188 InMemoryFile::new("SRC/LIB.rs", "fn main() {}\n"),
189 ];
190
191 let err = normalize_logical_paths(&inputs, true).unwrap_err();
192 assert!(err.to_string().contains("Duplicate in-memory path"));
193 }
194}