subx_cli/cli/input_handler.rs
1use std::collections::HashMap;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use log::warn;
6use tempfile::TempDir;
7
8use crate::core::archive;
9use crate::error::SubXError;
10
11/// Universal input path processing structure for CLI commands.
12///
13/// `InputPathHandler` provides a unified interface for processing file and directory
14/// inputs across different SubX CLI commands. It supports multiple input sources,
15/// recursive directory scanning, and file extension filtering.
16///
17/// This handler is used by commands like `match`, `convert`, `sync`, and `detect-encoding`
18/// to provide consistent `-i` parameter functionality and directory processing behavior.
19///
20/// # Features
21///
22/// - **Multiple Input Sources**: Supports multiple files and directories via `-i` parameter
23/// - **Recursive Processing**: Optional recursive directory scanning with `--recursive` flag
24/// - **File Filtering**: Filter files by extension for command-specific processing
25/// - **Path Validation**: Validates all input paths exist before processing
26/// - **Cross-Platform**: Handles both absolute and relative paths correctly
27/// - **Archive Extraction**: Transparently extracts `.zip` (and `.rar` when built
28/// with the `archive-rar` feature) archives passed directly as inputs into a
29/// temporary directory and processes the extracted files as if they had been
30/// supplied directly. Archives discovered during recursive directory traversal
31/// are NOT extracted. The behaviour can be disabled per command via
32/// `--no-extract` (see [`with_no_extract`](Self::with_no_extract)), in which
33/// case archive files are treated as regular files and filtered by the
34/// command's extension list.
35///
36/// # Return Value
37///
38/// [`collect_files`](Self::collect_files) returns a [`CollectedFiles`] handle
39/// that dereferences to `&[PathBuf]`. When archive extraction is performed,
40/// `CollectedFiles` owns the underlying [`tempfile::TempDir`] handles; the
41/// extracted directories are removed automatically (RAII) when the
42/// `CollectedFiles` value is dropped. Callers must therefore keep the
43/// `CollectedFiles` value alive for as long as the extracted file paths are
44/// in use. `CollectedFiles` also exposes
45/// [`archive_origin`](CollectedFiles::archive_origin) so callers can map an
46/// extracted file back to the original archive that produced it.
47///
48/// # Examples
49///
50/// ## Basic Usage
51///
52/// ```rust
53/// use subx_cli::cli::InputPathHandler;
54/// use std::path::PathBuf;
55/// # use tempfile::TempDir;
56/// # use std::fs;
57///
58/// # let tmp = TempDir::new().unwrap();
59/// # let test_dir = tmp.path();
60/// # let file1 = test_dir.join("test1.srt");
61/// # let file2 = test_dir.join("test2.ass");
62/// # fs::write(&file1, "test content").unwrap();
63/// # fs::write(&file2, "test content").unwrap();
64///
65/// // Create handler from multiple paths
66/// let paths = vec![file1, file2];
67/// let handler = InputPathHandler::from_args(&paths, false)?
68/// .with_extensions(&["srt", "ass"]);
69///
70/// // Collect all matching files
71/// let files = handler.collect_files()?;
72/// assert_eq!(files.len(), 2);
73/// # Ok::<(), subx_cli::error::SubXError>(())
74/// ```
75///
76/// ## Directory Processing
77///
78/// ```rust
79/// use subx_cli::cli::InputPathHandler;
80/// use std::path::PathBuf;
81/// # use tempfile::TempDir;
82/// # use std::fs;
83///
84/// # let tmp = TempDir::new().unwrap();
85/// # let test_dir = tmp.path();
86/// # let nested_dir = test_dir.join("nested");
87/// # fs::create_dir(&nested_dir).unwrap();
88/// # let file1 = test_dir.join("test1.srt");
89/// # let file2 = nested_dir.join("test2.srt");
90/// # fs::write(&file1, "test content").unwrap();
91/// # fs::write(&file2, "test content").unwrap();
92///
93/// // Flat directory scanning (non-recursive)
94/// let handler_flat = InputPathHandler::from_args(&[test_dir.to_path_buf()], false)?
95/// .with_extensions(&["srt"]);
96/// let files_flat = handler_flat.collect_files()?;
97/// assert_eq!(files_flat.len(), 1); // Only finds file1
98///
99/// // Recursive directory scanning
100/// let handler_recursive = InputPathHandler::from_args(&[test_dir.to_path_buf()], true)?
101/// .with_extensions(&["srt"]);
102/// let files_recursive = handler_recursive.collect_files()?;
103/// assert_eq!(files_recursive.len(), 2); // Finds both file1 and file2
104/// # Ok::<(), subx_cli::error::SubXError>(())
105/// ```
106///
107/// ## Command Integration
108///
109/// ```rust,no_run
110/// use subx_cli::cli::{InputPathHandler, MatchArgs};
111/// # use std::path::PathBuf;
112///
113/// // Example of how commands use InputPathHandler
114/// # let args = MatchArgs {
115/// # path: Some(PathBuf::from("test")),
116/// # input_paths: vec![],
117/// # recursive: false,
118/// # dry_run: false,
119/// # confidence: 80,
120/// # backup: false,
121/// # copy: false,
122/// # move_files: false,
123/// # no_extract: false,
124/// # };
125/// let handler = args.get_input_handler()?;
126/// let files = handler.collect_files()?;
127/// // Process files...
128/// # Ok::<(), subx_cli::error::SubXError>(())
129/// ```
130#[derive(Debug, Clone)]
131pub struct InputPathHandler {
132 /// List of input paths (files and directories) to process
133 pub paths: Vec<PathBuf>,
134 /// Whether to recursively scan subdirectories
135 pub recursive: bool,
136 /// File extension filters (lowercase, without dot)
137 pub file_extensions: Vec<String>,
138 /// Whether to skip archive extraction for archive file inputs
139 pub no_extract: bool,
140}
141
142impl InputPathHandler {
143 /// Merge paths from multiple sources to create a unified path list
144 ///
145 /// This method provides a unified interface for CLI commands to merge
146 /// different types of path parameters into a single PathBuf vector.
147 ///
148 /// # Arguments
149 ///
150 /// * `optional_paths` - Optional path list (e.g., `path`, `input`, `video`, `subtitle`, etc.)
151 /// * `multiple_paths` - Multiple path list (e.g., `input_paths`)
152 /// * `string_paths` - String format path list (e.g., `file_paths`)
153 ///
154 /// # Returns
155 ///
156 /// Returns the merged PathBuf vector, or an error if all inputs are empty
157 ///
158 /// # Examples
159 ///
160 /// ```rust
161 /// use subx_cli::cli::InputPathHandler;
162 /// use std::path::PathBuf;
163 ///
164 /// // Merge paths from different sources
165 /// let optional = vec![Some(PathBuf::from("single.srt"))];
166 /// let multiple = vec![PathBuf::from("dir1"), PathBuf::from("dir2")];
167 /// let strings = vec!["file1.srt".to_string(), "file2.ass".to_string()];
168 ///
169 /// let merged = InputPathHandler::merge_paths_from_multiple_sources(
170 /// &optional,
171 /// &multiple,
172 /// &strings
173 /// )?;
174 ///
175 /// // merged now contains all paths
176 /// assert_eq!(merged.len(), 5);
177 /// # Ok::<(), subx_cli::error::SubXError>(())
178 /// ```
179 pub fn merge_paths_from_multiple_sources(
180 optional_paths: &[Option<PathBuf>],
181 multiple_paths: &[PathBuf],
182 string_paths: &[String],
183 ) -> Result<Vec<PathBuf>, SubXError> {
184 let mut all_paths = Vec::new();
185
186 // Add optional paths (filter out None values)
187 for p in optional_paths.iter().flatten() {
188 all_paths.push(p.clone());
189 }
190
191 // Add multiple paths
192 all_paths.extend(multiple_paths.iter().cloned());
193
194 // Add string paths (convert to PathBuf)
195 for path_str in string_paths {
196 all_paths.push(PathBuf::from(path_str));
197 }
198
199 // Check if any paths were specified
200 if all_paths.is_empty() {
201 return Err(SubXError::NoInputSpecified);
202 }
203
204 Ok(all_paths)
205 }
206
207 /// Create InputPathHandler from command line arguments
208 pub fn from_args(input_args: &[PathBuf], recursive: bool) -> Result<Self, SubXError> {
209 let handler = Self {
210 paths: input_args.to_vec(),
211 recursive,
212 file_extensions: Vec::new(),
213 no_extract: false,
214 };
215 handler.validate()?;
216 Ok(handler)
217 }
218
219 /// Set supported file extensions (without dot)
220 pub fn with_extensions(mut self, extensions: &[&str]) -> Self {
221 self.file_extensions = extensions.iter().map(|s| s.to_lowercase()).collect();
222 self
223 }
224
225 /// Set whether to skip archive extraction.
226 ///
227 /// When `true`, archive files (`.zip`, `.rar`) are treated as regular
228 /// files and subject to the normal extension filter instead of being
229 /// extracted.
230 pub fn with_no_extract(mut self, no_extract: bool) -> Self {
231 self.no_extract = no_extract;
232 self
233 }
234
235 /// Validate that all paths exist
236 pub fn validate(&self) -> Result<(), SubXError> {
237 for path in &self.paths {
238 if !path.exists() {
239 return Err(SubXError::PathNotFound(path.clone()));
240 }
241 }
242 Ok(())
243 }
244
245 /// Get all specified directory paths
246 ///
247 /// This method returns all specified directory paths for commands
248 /// that need to process directories one by one. If the specified path
249 /// contains files, it will return the directory containing that file.
250 ///
251 /// # Returns
252 ///
253 /// Deduplicated list of directory paths
254 ///
255 /// # Examples
256 ///
257 /// ```rust
258 /// use subx_cli::cli::InputPathHandler;
259 /// use std::path::PathBuf;
260 /// # use tempfile::TempDir;
261 /// # use std::fs;
262 ///
263 /// # let tmp = TempDir::new().unwrap();
264 /// # let test_dir = tmp.path();
265 /// # let file1 = test_dir.join("test1.srt");
266 /// # fs::write(&file1, "test content").unwrap();
267 ///
268 /// let paths = vec![file1.clone(), test_dir.to_path_buf()];
269 /// let handler = InputPathHandler::from_args(&paths, false)?;
270 /// let directories = handler.get_directories();
271 ///
272 /// // Should contain test_dir (after deduplication)
273 /// assert_eq!(directories.len(), 1);
274 /// assert_eq!(directories[0], test_dir);
275 /// # Ok::<(), subx_cli::error::SubXError>(())
276 /// ```
277 pub fn get_directories(&self) -> Vec<PathBuf> {
278 let mut directories = std::collections::HashSet::new();
279
280 for path in &self.paths {
281 if path.is_dir() {
282 directories.insert(path.clone());
283 } else if path.is_file() {
284 if let Some(parent) = path.parent() {
285 directories.insert(parent.to_path_buf());
286 }
287 }
288 }
289
290 directories.into_iter().collect()
291 }
292
293 /// Expand files and directories, collecting all files that match the filter conditions.
294 ///
295 /// When archive extraction is enabled (the default), directly-specified
296 /// archive files (`.zip`, `.rar`) are transparently extracted to temporary
297 /// directories and their contents are included in the result instead of
298 /// the archive path itself. Archives found during directory traversal
299 /// are **not** extracted.
300 pub fn collect_files(&self) -> Result<CollectedFiles, SubXError> {
301 let mut files = Vec::new();
302 let mut temp_dirs = Vec::new();
303 let mut archive_origins: HashMap<PathBuf, PathBuf> = HashMap::new();
304
305 for base in &self.paths {
306 if base.is_file() {
307 // Check if this is an archive that should be extracted
308 if !self.no_extract {
309 if let Some(_format) = archive::detect_format(base) {
310 match self.extract_and_collect(base) {
311 Ok((extracted, temp_dir)) => {
312 let temp_root = temp_dir.path().to_path_buf();
313 archive_origins.insert(temp_root, base.clone());
314 files.extend(extracted);
315 temp_dirs.push(temp_dir);
316 continue;
317 }
318 Err(e) => {
319 warn!(
320 "Failed to extract archive {}, skipping: {e}",
321 base.display()
322 );
323 continue;
324 }
325 }
326 }
327 }
328 if self.matches_extension(base) {
329 files.push(base.clone());
330 }
331 } else if base.is_dir() {
332 if self.recursive {
333 files.extend(self.scan_directory_recursive(base)?);
334 } else {
335 files.extend(self.scan_directory_flat(base)?);
336 }
337 } else {
338 return Err(SubXError::InvalidPath(base.clone()));
339 }
340 }
341
342 if temp_dirs.is_empty() {
343 Ok(CollectedFiles::new(files))
344 } else {
345 Ok(CollectedFiles::with_archives(
346 files,
347 temp_dirs,
348 archive_origins,
349 ))
350 }
351 }
352
353 /// Extracts an archive to a temp directory and returns paths matching
354 /// the configured extension filter.
355 fn extract_and_collect(
356 &self,
357 archive_path: &Path,
358 ) -> Result<(Vec<PathBuf>, TempDir), SubXError> {
359 let temp_dir = TempDir::new().map_err(|e| {
360 SubXError::CommandExecution(format!("Failed to create temp directory: {e}"))
361 })?;
362 let extracted = archive::extract_archive(archive_path, temp_dir.path()).map_err(|e| {
363 SubXError::CommandExecution(format!(
364 "Failed to extract {}: {e}",
365 archive_path.display()
366 ))
367 })?;
368
369 let filtered: Vec<PathBuf> = extracted
370 .into_iter()
371 .filter(|p| self.matches_extension(p))
372 .collect();
373
374 Ok((filtered, temp_dir))
375 }
376
377 fn matches_extension(&self, path: &Path) -> bool {
378 if self.file_extensions.is_empty() {
379 return true;
380 }
381 path.extension()
382 .and_then(|e| e.to_str())
383 .map(|s| {
384 self.file_extensions
385 .iter()
386 .any(|ext| ext.eq_ignore_ascii_case(s))
387 })
388 .unwrap_or(false)
389 }
390
391 fn scan_directory_flat(&self, dir: &Path) -> Result<Vec<PathBuf>, SubXError> {
392 let mut result = Vec::new();
393 let rd = fs::read_dir(dir).map_err(|e| SubXError::DirectoryReadError {
394 path: dir.to_path_buf(),
395 source: e,
396 })?;
397 for entry in rd {
398 let entry = entry.map_err(|e| SubXError::DirectoryReadError {
399 path: dir.to_path_buf(),
400 source: e,
401 })?;
402 let ft = entry
403 .file_type()
404 .map_err(|e| SubXError::DirectoryReadError {
405 path: dir.to_path_buf(),
406 source: e,
407 })?;
408 if ft.is_symlink() {
409 log::debug!("Skipping symlink: {}", entry.path().display());
410 continue;
411 }
412 let p = entry.path();
413 if ft.is_file() && self.matches_extension(&p) {
414 result.push(p);
415 }
416 }
417 Ok(result)
418 }
419
420 fn scan_directory_recursive(&self, dir: &Path) -> Result<Vec<PathBuf>, SubXError> {
421 let mut result = Vec::new();
422 let rd = fs::read_dir(dir).map_err(|e| SubXError::DirectoryReadError {
423 path: dir.to_path_buf(),
424 source: e,
425 })?;
426 for entry in rd {
427 let entry = entry.map_err(|e| SubXError::DirectoryReadError {
428 path: dir.to_path_buf(),
429 source: e,
430 })?;
431 let ft = entry
432 .file_type()
433 .map_err(|e| SubXError::DirectoryReadError {
434 path: dir.to_path_buf(),
435 source: e,
436 })?;
437 if ft.is_symlink() {
438 log::debug!("Skipping symlink: {}", entry.path().display());
439 continue;
440 }
441 let p = entry.path();
442 if ft.is_file() {
443 if self.matches_extension(&p) {
444 result.push(p.clone());
445 }
446 } else if ft.is_dir() {
447 result.extend(self.scan_directory_recursive(&p)?);
448 }
449 }
450 Ok(result)
451 }
452}
453
454/// Result of collecting files from input paths, including any temporary
455/// directories created during archive extraction.
456///
457/// This struct owns any `TempDir` handles created during archive extraction.
458/// The temporary directories are automatically cleaned up when this value
459/// is dropped.
460#[derive(Debug)]
461pub struct CollectedFiles {
462 /// Collected file paths
463 paths: Vec<PathBuf>,
464 /// Temporary directories from archive extraction (kept alive by ownership)
465 _temp_dirs: Vec<TempDir>,
466 /// Mapping from temp-directory root to original archive file path
467 archive_origins: HashMap<PathBuf, PathBuf>,
468}
469
470impl CollectedFiles {
471 /// Creates a new `CollectedFiles` with no archive origins.
472 pub fn new(paths: Vec<PathBuf>) -> Self {
473 Self {
474 paths,
475 _temp_dirs: Vec::new(),
476 archive_origins: HashMap::new(),
477 }
478 }
479
480 /// Creates a new `CollectedFiles` with archive context.
481 pub fn with_archives(
482 paths: Vec<PathBuf>,
483 temp_dirs: Vec<TempDir>,
484 archive_origins: HashMap<PathBuf, PathBuf>,
485 ) -> Self {
486 Self {
487 paths,
488 _temp_dirs: temp_dirs,
489 archive_origins,
490 }
491 }
492
493 /// Returns the archive origin path for a file extracted from an archive.
494 ///
495 /// If the given path starts with a known temp-directory root, returns
496 /// the original archive file path. Returns `None` for non-archive paths.
497 pub fn archive_origin(&self, path: &Path) -> Option<&Path> {
498 for (temp_root, archive_path) in &self.archive_origins {
499 if path.starts_with(temp_root) {
500 return Some(archive_path.as_path());
501 }
502 }
503 None
504 }
505
506 /// Consumes self and returns the collected paths.
507 ///
508 /// **Warning:** This drops the `TempDir` handles, so any paths pointing
509 /// to temporary extraction directories will become invalid.
510 pub fn into_paths(self) -> Vec<PathBuf> {
511 self.paths
512 }
513}
514
515impl std::ops::Deref for CollectedFiles {
516 type Target = Vec<PathBuf>;
517
518 fn deref(&self) -> &Self::Target {
519 &self.paths
520 }
521}
522
523impl AsRef<[PathBuf]> for CollectedFiles {
524 fn as_ref(&self) -> &[PathBuf] {
525 &self.paths
526 }
527}
528
529#[cfg(test)]
530mod symlink_tests {
531 use super::*;
532 use std::fs;
533 use tempfile::TempDir;
534
535 #[cfg(unix)]
536 #[test]
537 fn test_scan_directory_recursive_skips_symlinks() {
538 let tmp = TempDir::new().unwrap();
539 let real = tmp.path().join("real.txt");
540 fs::write(&real, b"x").unwrap();
541 let link = tmp.path().join("link.txt");
542 std::os::unix::fs::symlink(&real, &link).unwrap();
543
544 let handler = InputPathHandler::from_args(&[tmp.path().to_path_buf()], true).unwrap();
545 let results = handler.scan_directory_recursive(tmp.path()).unwrap();
546
547 assert!(results.iter().any(|p| p == &real));
548 assert!(
549 !results.iter().any(|p| p == &link),
550 "symlinked file should have been skipped"
551 );
552 }
553
554 #[cfg(unix)]
555 #[test]
556 fn test_scan_directory_flat_skips_symlinks() {
557 let tmp = TempDir::new().unwrap();
558 let real = tmp.path().join("real.txt");
559 fs::write(&real, b"x").unwrap();
560 let link = tmp.path().join("link.txt");
561 std::os::unix::fs::symlink(&real, &link).unwrap();
562
563 let handler = InputPathHandler::from_args(&[tmp.path().to_path_buf()], false).unwrap();
564 let results = handler.scan_directory_flat(tmp.path()).unwrap();
565
566 assert!(results.iter().any(|p| p == &real));
567 assert!(!results.iter().any(|p| p == &link));
568 }
569}