Skip to main content

subx_cli/cli/
input_handler.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use log::warn;
6use tempfile::TempDir;
7
8use crate::core::archive;
9use crate::error::SubXError;
10
11/// Universal input path processing structure for CLI commands.
12///
13/// `InputPathHandler` provides a unified interface for processing file and directory
14/// inputs across different SubX CLI commands. It supports multiple input sources,
15/// recursive directory scanning, and file extension filtering.
16///
17/// This handler is used by commands like `match`, `convert`, `sync`, and `detect-encoding`
18/// to provide consistent `-i` parameter functionality and directory processing behavior.
19///
20/// # Features
21///
22/// - **Multiple Input Sources**: Supports multiple files and directories via `-i` parameter
23/// - **Recursive Processing**: Optional recursive directory scanning with `--recursive` flag
24/// - **File Filtering**: Filter files by extension for command-specific processing
25/// - **Path Validation**: Validates all input paths exist before processing
26/// - **Cross-Platform**: Handles both absolute and relative paths correctly
27/// - **Archive Extraction**: Transparently extracts `.zip` (and `.rar` when built
28///   with the `archive-rar` feature) archives passed directly as inputs into a
29///   temporary directory and processes the extracted files as if they had been
30///   supplied directly. Archives discovered during recursive directory traversal
31///   are NOT extracted. The behaviour can be disabled per command via
32///   `--no-extract` (see [`with_no_extract`](Self::with_no_extract)), in which
33///   case archive files are treated as regular files and filtered by the
34///   command's extension list.
35///
36/// # Return Value
37///
38/// [`collect_files`](Self::collect_files) returns a [`CollectedFiles`] handle
39/// that dereferences to `&[PathBuf]`. When archive extraction is performed,
40/// `CollectedFiles` owns the underlying [`tempfile::TempDir`] handles; the
41/// extracted directories are removed automatically (RAII) when the
42/// `CollectedFiles` value is dropped. Callers must therefore keep the
43/// `CollectedFiles` value alive for as long as the extracted file paths are
44/// in use. `CollectedFiles` also exposes
45/// [`archive_origin`](CollectedFiles::archive_origin) so callers can map an
46/// extracted file back to the original archive that produced it.
47///
48/// # Examples
49///
50/// ## Basic Usage
51///
52/// ```rust
53/// use subx_cli::cli::InputPathHandler;
54/// use std::path::PathBuf;
55/// # use tempfile::TempDir;
56/// # use std::fs;
57///
58/// # let tmp = TempDir::new().unwrap();
59/// # let test_dir = tmp.path();
60/// # let file1 = test_dir.join("test1.srt");
61/// # let file2 = test_dir.join("test2.ass");
62/// # fs::write(&file1, "test content").unwrap();
63/// # fs::write(&file2, "test content").unwrap();
64///
65/// // Create handler from multiple paths
66/// let paths = vec![file1, file2];
67/// let handler = InputPathHandler::from_args(&paths, false)?
68///     .with_extensions(&["srt", "ass"]);
69///
70/// // Collect all matching files
71/// let files = handler.collect_files()?;
72/// assert_eq!(files.len(), 2);
73/// # Ok::<(), subx_cli::error::SubXError>(())
74/// ```
75///
76/// ## Directory Processing
77///
78/// ```rust
79/// use subx_cli::cli::InputPathHandler;
80/// use std::path::PathBuf;
81/// # use tempfile::TempDir;
82/// # use std::fs;
83///
84/// # let tmp = TempDir::new().unwrap();
85/// # let test_dir = tmp.path();
86/// # let nested_dir = test_dir.join("nested");
87/// # fs::create_dir(&nested_dir).unwrap();
88/// # let file1 = test_dir.join("test1.srt");
89/// # let file2 = nested_dir.join("test2.srt");
90/// # fs::write(&file1, "test content").unwrap();
91/// # fs::write(&file2, "test content").unwrap();
92///
93/// // Flat directory scanning (non-recursive)
94/// let handler_flat = InputPathHandler::from_args(&[test_dir.to_path_buf()], false)?
95///     .with_extensions(&["srt"]);
96/// let files_flat = handler_flat.collect_files()?;
97/// assert_eq!(files_flat.len(), 1); // Only finds file1
98///
99/// // Recursive directory scanning
100/// let handler_recursive = InputPathHandler::from_args(&[test_dir.to_path_buf()], true)?
101///     .with_extensions(&["srt"]);
102/// let files_recursive = handler_recursive.collect_files()?;
103/// assert_eq!(files_recursive.len(), 2); // Finds both file1 and file2
104/// # Ok::<(), subx_cli::error::SubXError>(())
105/// ```
106///
107/// ## Command Integration
108///
109/// ```rust,no_run
110/// use subx_cli::cli::{InputPathHandler, MatchArgs};
111/// # use std::path::PathBuf;
112///
113/// // Example of how commands use InputPathHandler
114/// # let args = MatchArgs {
115/// #     path: Some(PathBuf::from("test")),
116/// #     input_paths: vec![],
117/// #     recursive: false,
118/// #     dry_run: false,
119/// #     confidence: 80,
120/// #     backup: false,
121/// #     copy: false,
122/// #     move_files: false,
123/// #     no_extract: false,
124/// # };
125/// let handler = args.get_input_handler()?;
126/// let files = handler.collect_files()?;
127/// // Process files...
128/// # Ok::<(), subx_cli::error::SubXError>(())
129/// ```
130#[derive(Debug, Clone)]
131pub struct InputPathHandler {
132    /// List of input paths (files and directories) to process
133    pub paths: Vec<PathBuf>,
134    /// Whether to recursively scan subdirectories
135    pub recursive: bool,
136    /// File extension filters (lowercase, without dot)
137    pub file_extensions: Vec<String>,
138    /// Whether to skip archive extraction for archive file inputs
139    pub no_extract: bool,
140}
141
142impl InputPathHandler {
143    /// Merge paths from multiple sources to create a unified path list
144    ///
145    /// This method provides a unified interface for CLI commands to merge
146    /// different types of path parameters into a single PathBuf vector.
147    ///
148    /// # Arguments
149    ///
150    /// * `optional_paths` - Optional path list (e.g., `path`, `input`, `video`, `subtitle`, etc.)
151    /// * `multiple_paths` - Multiple path list (e.g., `input_paths`)
152    /// * `string_paths` - String format path list (e.g., `file_paths`)
153    ///
154    /// # Returns
155    ///
156    /// Returns the merged PathBuf vector, or an error if all inputs are empty
157    ///
158    /// # Examples
159    ///
160    /// ```rust
161    /// use subx_cli::cli::InputPathHandler;
162    /// use std::path::PathBuf;
163    ///
164    /// // Merge paths from different sources
165    /// let optional = vec![Some(PathBuf::from("single.srt"))];
166    /// let multiple = vec![PathBuf::from("dir1"), PathBuf::from("dir2")];
167    /// let strings = vec!["file1.srt".to_string(), "file2.ass".to_string()];
168    ///
169    /// let merged = InputPathHandler::merge_paths_from_multiple_sources(
170    ///     &optional,
171    ///     &multiple,
172    ///     &strings
173    /// )?;
174    ///
175    /// // merged now contains all paths
176    /// assert_eq!(merged.len(), 5);
177    /// # Ok::<(), subx_cli::error::SubXError>(())
178    /// ```
179    pub fn merge_paths_from_multiple_sources(
180        optional_paths: &[Option<PathBuf>],
181        multiple_paths: &[PathBuf],
182        string_paths: &[String],
183    ) -> Result<Vec<PathBuf>, SubXError> {
184        let mut all_paths = Vec::new();
185
186        // Add optional paths (filter out None values)
187        for p in optional_paths.iter().flatten() {
188            all_paths.push(p.clone());
189        }
190
191        // Add multiple paths
192        all_paths.extend(multiple_paths.iter().cloned());
193
194        // Add string paths (convert to PathBuf)
195        for path_str in string_paths {
196            all_paths.push(PathBuf::from(path_str));
197        }
198
199        // Check if any paths were specified
200        if all_paths.is_empty() {
201            return Err(SubXError::NoInputSpecified);
202        }
203
204        Ok(all_paths)
205    }
206
207    /// Create InputPathHandler from command line arguments
208    pub fn from_args(input_args: &[PathBuf], recursive: bool) -> Result<Self, SubXError> {
209        let handler = Self {
210            paths: input_args.to_vec(),
211            recursive,
212            file_extensions: Vec::new(),
213            no_extract: false,
214        };
215        handler.validate()?;
216        Ok(handler)
217    }
218
219    /// Set supported file extensions (without dot)
220    pub fn with_extensions(mut self, extensions: &[&str]) -> Self {
221        self.file_extensions = extensions.iter().map(|s| s.to_lowercase()).collect();
222        self
223    }
224
225    /// Set whether to skip archive extraction.
226    ///
227    /// When `true`, archive files (`.zip`, `.rar`) are treated as regular
228    /// files and subject to the normal extension filter instead of being
229    /// extracted.
230    pub fn with_no_extract(mut self, no_extract: bool) -> Self {
231        self.no_extract = no_extract;
232        self
233    }
234
235    /// Validate that all paths exist
236    pub fn validate(&self) -> Result<(), SubXError> {
237        for path in &self.paths {
238            if !path.exists() {
239                return Err(SubXError::PathNotFound(path.clone()));
240            }
241        }
242        Ok(())
243    }
244
245    /// Get all specified directory paths
246    ///
247    /// This method returns all specified directory paths for commands
248    /// that need to process directories one by one. If the specified path
249    /// contains files, it will return the directory containing that file.
250    ///
251    /// # Returns
252    ///
253    /// Deduplicated list of directory paths
254    ///
255    /// # Examples
256    ///
257    /// ```rust
258    /// use subx_cli::cli::InputPathHandler;
259    /// use std::path::PathBuf;
260    /// # use tempfile::TempDir;
261    /// # use std::fs;
262    ///
263    /// # let tmp = TempDir::new().unwrap();
264    /// # let test_dir = tmp.path();
265    /// # let file1 = test_dir.join("test1.srt");
266    /// # fs::write(&file1, "test content").unwrap();
267    ///
268    /// let paths = vec![file1.clone(), test_dir.to_path_buf()];
269    /// let handler = InputPathHandler::from_args(&paths, false)?;
270    /// let directories = handler.get_directories();
271    ///
272    /// // Should contain test_dir (after deduplication)
273    /// assert_eq!(directories.len(), 1);
274    /// assert_eq!(directories[0], test_dir);
275    /// # Ok::<(), subx_cli::error::SubXError>(())
276    /// ```
277    pub fn get_directories(&self) -> Vec<PathBuf> {
278        let mut directories = std::collections::HashSet::new();
279
280        for path in &self.paths {
281            if path.is_dir() {
282                directories.insert(path.clone());
283            } else if path.is_file() {
284                if let Some(parent) = path.parent() {
285                    directories.insert(parent.to_path_buf());
286                }
287            }
288        }
289
290        directories.into_iter().collect()
291    }
292
293    /// Expand files and directories, collecting all files that match the filter conditions.
294    ///
295    /// When archive extraction is enabled (the default), directly-specified
296    /// archive files (`.zip`, `.rar`) are transparently extracted to temporary
297    /// directories and their contents are included in the result instead of
298    /// the archive path itself. Archives found during directory traversal
299    /// are **not** extracted.
300    pub fn collect_files(&self) -> Result<CollectedFiles, SubXError> {
301        let mut files = Vec::new();
302        let mut temp_dirs = Vec::new();
303        let mut archive_origins: HashMap<PathBuf, PathBuf> = HashMap::new();
304
305        for base in &self.paths {
306            if base.is_file() {
307                // Check if this is an archive that should be extracted
308                if !self.no_extract {
309                    if let Some(_format) = archive::detect_format(base) {
310                        match self.extract_and_collect(base) {
311                            Ok((extracted, temp_dir)) => {
312                                let temp_root = temp_dir.path().to_path_buf();
313                                archive_origins.insert(temp_root, base.clone());
314                                files.extend(extracted);
315                                temp_dirs.push(temp_dir);
316                                continue;
317                            }
318                            Err(e) => {
319                                warn!(
320                                    "Failed to extract archive {}, skipping: {e}",
321                                    base.display()
322                                );
323                                continue;
324                            }
325                        }
326                    }
327                }
328                if self.matches_extension(base) {
329                    files.push(base.clone());
330                }
331            } else if base.is_dir() {
332                if self.recursive {
333                    files.extend(self.scan_directory_recursive(base)?);
334                } else {
335                    files.extend(self.scan_directory_flat(base)?);
336                }
337            } else {
338                return Err(SubXError::InvalidPath(base.clone()));
339            }
340        }
341
342        if temp_dirs.is_empty() {
343            Ok(CollectedFiles::new(files))
344        } else {
345            Ok(CollectedFiles::with_archives(
346                files,
347                temp_dirs,
348                archive_origins,
349            ))
350        }
351    }
352
353    /// Extracts an archive to a temp directory and returns paths matching
354    /// the configured extension filter.
355    fn extract_and_collect(
356        &self,
357        archive_path: &Path,
358    ) -> Result<(Vec<PathBuf>, TempDir), SubXError> {
359        let temp_dir = TempDir::new().map_err(|e| {
360            SubXError::CommandExecution(format!("Failed to create temp directory: {e}"))
361        })?;
362        let extracted = archive::extract_archive(archive_path, temp_dir.path()).map_err(|e| {
363            SubXError::CommandExecution(format!(
364                "Failed to extract {}: {e}",
365                archive_path.display()
366            ))
367        })?;
368
369        let filtered: Vec<PathBuf> = extracted
370            .into_iter()
371            .filter(|p| self.matches_extension(p))
372            .collect();
373
374        Ok((filtered, temp_dir))
375    }
376
377    fn matches_extension(&self, path: &Path) -> bool {
378        if self.file_extensions.is_empty() {
379            return true;
380        }
381        path.extension()
382            .and_then(|e| e.to_str())
383            .map(|s| {
384                self.file_extensions
385                    .iter()
386                    .any(|ext| ext.eq_ignore_ascii_case(s))
387            })
388            .unwrap_or(false)
389    }
390
391    fn scan_directory_flat(&self, dir: &Path) -> Result<Vec<PathBuf>, SubXError> {
392        let mut result = Vec::new();
393        let rd = fs::read_dir(dir).map_err(|e| SubXError::DirectoryReadError {
394            path: dir.to_path_buf(),
395            source: e,
396        })?;
397        for entry in rd {
398            let entry = entry.map_err(|e| SubXError::DirectoryReadError {
399                path: dir.to_path_buf(),
400                source: e,
401            })?;
402            let ft = entry
403                .file_type()
404                .map_err(|e| SubXError::DirectoryReadError {
405                    path: dir.to_path_buf(),
406                    source: e,
407                })?;
408            if ft.is_symlink() {
409                log::debug!("Skipping symlink: {}", entry.path().display());
410                continue;
411            }
412            let p = entry.path();
413            if ft.is_file() && self.matches_extension(&p) {
414                result.push(p);
415            }
416        }
417        Ok(result)
418    }
419
420    fn scan_directory_recursive(&self, dir: &Path) -> Result<Vec<PathBuf>, SubXError> {
421        let mut result = Vec::new();
422        let rd = fs::read_dir(dir).map_err(|e| SubXError::DirectoryReadError {
423            path: dir.to_path_buf(),
424            source: e,
425        })?;
426        for entry in rd {
427            let entry = entry.map_err(|e| SubXError::DirectoryReadError {
428                path: dir.to_path_buf(),
429                source: e,
430            })?;
431            let ft = entry
432                .file_type()
433                .map_err(|e| SubXError::DirectoryReadError {
434                    path: dir.to_path_buf(),
435                    source: e,
436                })?;
437            if ft.is_symlink() {
438                log::debug!("Skipping symlink: {}", entry.path().display());
439                continue;
440            }
441            let p = entry.path();
442            if ft.is_file() {
443                if self.matches_extension(&p) {
444                    result.push(p.clone());
445                }
446            } else if ft.is_dir() {
447                result.extend(self.scan_directory_recursive(&p)?);
448            }
449        }
450        Ok(result)
451    }
452}
453
454/// Result of collecting files from input paths, including any temporary
455/// directories created during archive extraction.
456///
457/// This struct owns any `TempDir` handles created during archive extraction.
458/// The temporary directories are automatically cleaned up when this value
459/// is dropped.
460#[derive(Debug)]
461pub struct CollectedFiles {
462    /// Collected file paths
463    paths: Vec<PathBuf>,
464    /// Temporary directories from archive extraction (kept alive by ownership)
465    _temp_dirs: Vec<TempDir>,
466    /// Mapping from temp-directory root to original archive file path
467    archive_origins: HashMap<PathBuf, PathBuf>,
468}
469
470impl CollectedFiles {
471    /// Creates a new `CollectedFiles` with no archive origins.
472    pub fn new(paths: Vec<PathBuf>) -> Self {
473        Self {
474            paths,
475            _temp_dirs: Vec::new(),
476            archive_origins: HashMap::new(),
477        }
478    }
479
480    /// Creates a new `CollectedFiles` with archive context.
481    pub fn with_archives(
482        paths: Vec<PathBuf>,
483        temp_dirs: Vec<TempDir>,
484        archive_origins: HashMap<PathBuf, PathBuf>,
485    ) -> Self {
486        Self {
487            paths,
488            _temp_dirs: temp_dirs,
489            archive_origins,
490        }
491    }
492
493    /// Returns the archive origin path for a file extracted from an archive.
494    ///
495    /// If the given path starts with a known temp-directory root, returns
496    /// the original archive file path. Returns `None` for non-archive paths.
497    pub fn archive_origin(&self, path: &Path) -> Option<&Path> {
498        for (temp_root, archive_path) in &self.archive_origins {
499            if path.starts_with(temp_root) {
500                return Some(archive_path.as_path());
501            }
502        }
503        None
504    }
505
506    /// Consumes self and returns the collected paths.
507    ///
508    /// **Warning:** This drops the `TempDir` handles, so any paths pointing
509    /// to temporary extraction directories will become invalid.
510    pub fn into_paths(self) -> Vec<PathBuf> {
511        self.paths
512    }
513}
514
515impl std::ops::Deref for CollectedFiles {
516    type Target = Vec<PathBuf>;
517
518    fn deref(&self) -> &Self::Target {
519        &self.paths
520    }
521}
522
523impl AsRef<[PathBuf]> for CollectedFiles {
524    fn as_ref(&self) -> &[PathBuf] {
525        &self.paths
526    }
527}
528
529#[cfg(test)]
530mod symlink_tests {
531    use super::*;
532    use std::fs;
533    use tempfile::TempDir;
534
535    #[cfg(unix)]
536    #[test]
537    fn test_scan_directory_recursive_skips_symlinks() {
538        let tmp = TempDir::new().unwrap();
539        let real = tmp.path().join("real.txt");
540        fs::write(&real, b"x").unwrap();
541        let link = tmp.path().join("link.txt");
542        std::os::unix::fs::symlink(&real, &link).unwrap();
543
544        let handler = InputPathHandler::from_args(&[tmp.path().to_path_buf()], true).unwrap();
545        let results = handler.scan_directory_recursive(tmp.path()).unwrap();
546
547        assert!(results.iter().any(|p| p == &real));
548        assert!(
549            !results.iter().any(|p| p == &link),
550            "symlinked file should have been skipped"
551        );
552    }
553
554    #[cfg(unix)]
555    #[test]
556    fn test_scan_directory_flat_skips_symlinks() {
557        let tmp = TempDir::new().unwrap();
558        let real = tmp.path().join("real.txt");
559        fs::write(&real, b"x").unwrap();
560        let link = tmp.path().join("link.txt");
561        std::os::unix::fs::symlink(&real, &link).unwrap();
562
563        let handler = InputPathHandler::from_args(&[tmp.path().to_path_buf()], false).unwrap();
564        let results = handler.scan_directory_flat(tmp.path()).unwrap();
565
566        assert!(results.iter().any(|p| p == &real));
567        assert!(!results.iter().any(|p| p == &link));
568    }
569}