Skip to main content

subx_cli/cli/
detect_encoding_args.rs

1//! File encoding detection command-line arguments and options.
2//!
3//! This module defines the command-line interface for the `detect-encoding` subcommand,
4//! which analyzes text files to determine their character encoding. This is particularly
5//! useful for subtitle files that may be encoded in various character sets, especially
6//! legacy encodings or region-specific formats.
7//!
8//! # Supported Detection
9//!
10//! The encoding detection can identify a wide range of character encodings including:
11//! - UTF-8, UTF-16LE, UTF-16BE (Unicode variants)
12//! - Windows-1252, ISO-8859-1 (Western European)
13//! - GBK, GB2312, Big5 (Chinese variants)
14//! - Shift_JIS, EUC-JP (Japanese)
15//! - KOI8-R, Windows-1251 (Cyrillic)
16//! - And many more regional encodings
17//!
18//! # Examples
19//!
20//! ```bash
21//! # Detect encoding of a single file
22//! subx detect-encoding subtitle.srt
23//!
24//! # Detect encoding of multiple files with verbose output
25//! subx detect-encoding --verbose *.srt *.sub
26//!
27//! # Batch detect all subtitle files in current directory
28//! subx detect-encoding *.srt *.ass *.vtt *.sub
29//! ```
30
31use crate::cli::InputPathHandler;
32use crate::error::SubXError;
33use clap::Args;
34use std::path::PathBuf;
35
36/// Command-line arguments for file encoding detection.
37#[derive(Args, Debug)]
38pub struct DetectEncodingArgs {
39    /// Display detailed sample text and confidence information
40    #[arg(short, long)]
41    pub verbose: bool,
42
43    /// Specify file or directory paths to process (new parameter, mutually exclusive with file_paths)
44    #[arg(
45        short = 'i',
46        long = "input",
47        value_name = "PATH",
48        conflicts_with = "file_paths"
49    )]
50    pub input_paths: Vec<PathBuf>,
51
52    /// Recursively process subdirectories (new parameter)
53    #[arg(short, long)]
54    pub recursive: bool,
55
56    /// File paths to analyze for encoding detection
57    #[arg(required = true, conflicts_with = "input_paths")]
58    pub file_paths: Vec<String>,
59
60    /// Disable automatic archive extraction for `-i` inputs
61    #[arg(long, default_value_t = false)]
62    pub no_extract: bool,
63}
64
65#[cfg(test)]
66mod tests {
67    use crate::cli::{Cli, Commands};
68    use clap::Parser;
69    use std::path::PathBuf;
70
71    #[test]
72    fn test_detect_encoding_args_file_paths() {
73        let cli = Cli::try_parse_from(["subx-cli", "detect-encoding", "a.srt", "b.ass"]).unwrap();
74        let args = match cli.command {
75            Commands::DetectEncoding(a) => a,
76            _ => panic!("Expected DetectEncoding command"),
77        };
78        assert!(args.input_paths.is_empty());
79        assert_eq!(
80            args.file_paths,
81            vec!["a.srt".to_string(), "b.ass".to_string()]
82        );
83        assert!(!args.recursive);
84    }
85
86    #[test]
87    fn test_detect_encoding_args_input_paths() {
88        let cli = Cli::try_parse_from([
89            "subx-cli",
90            "detect-encoding",
91            "-i",
92            "dir1",
93            "-i",
94            "dir2",
95            "--recursive",
96            "--verbose",
97        ])
98        .unwrap();
99        let args = match cli.command {
100            Commands::DetectEncoding(a) => a,
101            _ => panic!("Expected DetectEncoding command"),
102        };
103        assert!(args.file_paths.is_empty());
104        assert_eq!(
105            args.input_paths,
106            vec![PathBuf::from("dir1"), PathBuf::from("dir2")]
107        );
108        assert!(args.recursive);
109        assert!(args.verbose);
110    }
111
112    #[test]
113    fn test_detect_encoding_args_conflict_file_and_input() {
114        let res = Cli::try_parse_from(["subx-cli", "detect-encoding", "file.srt", "-i", "dir"]);
115        assert!(res.is_err());
116    }
117}
118
119impl DetectEncodingArgs {
120    /// Get all input paths, combining file_paths and input_paths parameters
121    pub fn get_input_handler(&self) -> Result<InputPathHandler, SubXError> {
122        let merged_paths = InputPathHandler::merge_paths_from_multiple_sources(
123            &[],
124            &self.input_paths,
125            &self.file_paths,
126        )?;
127
128        Ok(InputPathHandler::from_args(&merged_paths, self.recursive)?
129            .with_extensions(&["srt", "ass", "vtt", "ssa", "sub", "txt"])
130            .with_no_extract(self.no_extract))
131    }
132
133    /// Get all file paths to process
134    pub fn get_file_paths(&self) -> Result<Vec<PathBuf>, SubXError> {
135        if !self.input_paths.is_empty() {
136            let handler = InputPathHandler::from_args(&self.input_paths, self.recursive)?
137                .with_extensions(&["srt", "ass", "vtt", "ssa", "sub", "txt"]);
138            return handler.collect_files().map(|cf| cf.into_paths());
139        }
140        if !self.file_paths.is_empty() {
141            return Ok(self.file_paths.iter().map(PathBuf::from).collect());
142        }
143        Err(SubXError::NoInputSpecified)
144    }
145}