subx_cli/cli/
detect_encoding_args.rs

1//! File encoding detection command-line arguments and options.
2//!
3//! This module defines the command-line interface for the `detect-encoding` subcommand,
4//! which analyzes text files to determine their character encoding. This is particularly
5//! useful for subtitle files that may be encoded in various character sets, especially
6//! legacy encodings or region-specific formats.
7//!
8//! # Supported Detection
9//!
10//! The encoding detection can identify a wide range of character encodings including:
11//! - UTF-8, UTF-16LE, UTF-16BE (Unicode variants)
12//! - Windows-1252, ISO-8859-1 (Western European)
13//! - GBK, GB2312, Big5 (Chinese variants)
14//! - Shift_JIS, EUC-JP (Japanese)
15//! - KOI8-R, Windows-1251 (Cyrillic)
16//! - And many more regional encodings
17//!
18//! # Examples
19//!
20//! ```bash
21//! # Detect encoding of a single file
22//! subx detect-encoding subtitle.srt
23//!
24//! # Detect encoding of multiple files with verbose output
25//! subx detect-encoding --verbose *.srt *.sub
26//!
27//! # Batch detect all subtitle files in current directory
28//! subx detect-encoding *.srt *.ass *.vtt *.sub
29//! ```
30
31use crate::cli::InputPathHandler;
32use crate::error::SubXError;
33use clap::Args;
34use std::path::PathBuf;
35
36/// Command-line arguments for file encoding detection.
37#[derive(Args, Debug)]
38pub struct DetectEncodingArgs {
39    /// Display detailed sample text and confidence information
40    #[arg(short, long)]
41    pub verbose: bool,
42
43    /// Specify file or directory paths to process (new parameter, mutually exclusive with file_paths)
44    #[arg(
45        short = 'i',
46        long = "input",
47        value_name = "PATH",
48        conflicts_with = "file_paths"
49    )]
50    pub input_paths: Vec<PathBuf>,
51
52    /// Recursively process subdirectories (new parameter)
53    #[arg(short, long)]
54    pub recursive: bool,
55
56    /// File paths to analyze for encoding detection
57    #[arg(required = true, conflicts_with = "input_paths")]
58    pub file_paths: Vec<String>,
59}
60
61#[cfg(test)]
62mod tests {
63    use crate::cli::{Cli, Commands};
64    use clap::Parser;
65    use std::path::PathBuf;
66
67    #[test]
68    fn test_detect_encoding_args_file_paths() {
69        let cli = Cli::try_parse_from(["subx-cli", "detect-encoding", "a.srt", "b.ass"]).unwrap();
70        let args = match cli.command {
71            Commands::DetectEncoding(a) => a,
72            _ => panic!("Expected DetectEncoding command"),
73        };
74        assert!(args.input_paths.is_empty());
75        assert_eq!(
76            args.file_paths,
77            vec!["a.srt".to_string(), "b.ass".to_string()]
78        );
79        assert!(!args.recursive);
80    }
81
82    #[test]
83    fn test_detect_encoding_args_input_paths() {
84        let cli = Cli::try_parse_from([
85            "subx-cli",
86            "detect-encoding",
87            "-i",
88            "dir1",
89            "-i",
90            "dir2",
91            "--recursive",
92            "--verbose",
93        ])
94        .unwrap();
95        let args = match cli.command {
96            Commands::DetectEncoding(a) => a,
97            _ => panic!("Expected DetectEncoding command"),
98        };
99        assert!(args.file_paths.is_empty());
100        assert_eq!(
101            args.input_paths,
102            vec![PathBuf::from("dir1"), PathBuf::from("dir2")]
103        );
104        assert!(args.recursive);
105        assert!(args.verbose);
106    }
107
108    #[test]
109    fn test_detect_encoding_args_conflict_file_and_input() {
110        let res = Cli::try_parse_from(["subx-cli", "detect-encoding", "file.srt", "-i", "dir"]);
111        assert!(res.is_err());
112    }
113}
114
115impl DetectEncodingArgs {
116    /// Get all input paths, combining file_paths and input_paths parameters
117    pub fn get_input_handler(&self) -> Result<InputPathHandler, SubXError> {
118        let merged_paths = InputPathHandler::merge_paths_from_multiple_sources(
119            &[],
120            &self.input_paths,
121            &self.file_paths,
122        )?;
123
124        Ok(InputPathHandler::from_args(&merged_paths, self.recursive)?
125            .with_extensions(&["srt", "ass", "vtt", "ssa", "sub", "txt"]))
126    }
127
128    /// Get all file paths to process
129    pub fn get_file_paths(&self) -> Result<Vec<PathBuf>, SubXError> {
130        if !self.input_paths.is_empty() {
131            let handler = InputPathHandler::from_args(&self.input_paths, self.recursive)?
132                .with_extensions(&["srt", "ass", "vtt", "ssa", "sub", "txt"]);
133            return handler.collect_files();
134        }
135        if !self.file_paths.is_empty() {
136            return Ok(self.file_paths.iter().map(PathBuf::from).collect());
137        }
138        Err(SubXError::NoInputSpecified)
139    }
140}