subx_cli/cli/
detect_encoding_args.rs

1//! File encoding detection command-line arguments and options.
2//!
3//! This module defines the command-line interface for the `detect-encoding` subcommand,
4//! which analyzes text files to determine their character encoding. This is particularly
5//! useful for subtitle files that may be encoded in various character sets, especially
6//! legacy encodings or region-specific formats.
7//!
8//! # Supported Detection
9//!
10//! The encoding detection can identify a wide range of character encodings including:
11//! - UTF-8, UTF-16LE, UTF-16BE (Unicode variants)
12//! - Windows-1252, ISO-8859-1 (Western European)
13//! - GBK, GB2312, Big5 (Chinese variants)
14//! - Shift_JIS, EUC-JP (Japanese)
15//! - KOI8-R, Windows-1251 (Cyrillic)
16//! - And many more regional encodings
17//!
18//! # Examples
19//!
20//! ```bash
21//! # Detect encoding of a single file
22//! subx detect-encoding subtitle.srt
23//!
24//! # Detect encoding of multiple files with verbose output
25//! subx detect-encoding --verbose *.srt *.sub
26//!
27//! # Batch detect all subtitle files in current directory
28//! subx detect-encoding *.srt *.ass *.vtt *.sub
29//! ```
30
31use clap::Args;
32
33/// Command-line arguments for file encoding detection.
34///
35/// The detect-encoding command analyzes the byte patterns and character
36/// distributions in text files to determine their most likely character
37/// encoding. This is essential for processing subtitle files that may
38/// have been created with different encodings.
39///
40/// # Detection Algorithm
41///
42/// The detection process uses multiple approaches:
43/// 1. **BOM (Byte Order Mark) detection** for Unicode files
44/// 2. **Statistical analysis** of byte patterns
45/// 3. **Character frequency analysis** for specific languages
46/// 4. **Heuristic rules** based on encoding characteristics
47///
48/// # Examples
49///
50/// ```rust
51/// use subx_cli::cli::DetectEncodingArgs;
52///
53/// let args = DetectEncodingArgs {
54///     verbose: true,
55///     file_paths: vec![
56///         "subtitle1.srt".to_string(),
57///         "subtitle2.ass".to_string(),
58///     ],
59/// };
60/// ```
61#[derive(Args, Debug)]
62pub struct DetectEncodingArgs {
63    /// Display detailed sample text and confidence information.
64    ///
65    /// When enabled, shows additional information about the detection process:
66    /// - Confidence percentage for the detected encoding
67    /// - Sample text decoded with the detected encoding
68    /// - Alternative encoding candidates with their confidence scores
69    /// - Detected language hints (if available)
70    ///
71    /// This is useful for verifying detection accuracy and troubleshooting
72    /// encoding issues with problematic files.
73    ///
74    /// # Examples
75    ///
76    /// ```bash
77    /// # Basic detection
78    /// subx detect-encoding file.srt
79    /// # Output: file.srt: UTF-8
80    ///
81    /// # Verbose detection with details
82    /// subx detect-encoding --verbose file.srt
83    /// # Output:
84    /// # file.srt: UTF-8 (99.5% confidence)
85    /// # Sample: "1\n00:00:01,000 --> 00:00:03,000\nHello World"
86    /// # Alternatives: ISO-8859-1 (15.2%), Windows-1252 (12.8%)
87    /// ```
88    #[arg(short, long)]
89    pub verbose: bool,
90
91    /// File paths to analyze for encoding detection.
92    ///
93    /// Accepts multiple file paths or glob patterns. All specified files
94    /// will be analyzed and their detected encodings reported. The command
95    /// supports both absolute and relative paths.
96    ///
97    /// # Supported File Types
98    ///
99    /// While primarily designed for subtitle files, the detection works
100    /// with any text-based file:
101    /// - Subtitle files: .srt, .ass, .vtt, .sub, .ssa, .smi
102    /// - Text files: .txt, .md, .csv, .json, .xml
103    /// - Script files: .py, .js, .html, .css
104    ///
105    /// # Examples
106    ///
107    /// ```bash
108    /// # Single file
109    /// subx detect-encoding subtitle.srt
110    ///
111    /// # Multiple specific files
112    /// subx detect-encoding file1.srt file2.ass file3.vtt
113    ///
114    /// # Glob patterns (shell expansion)
115    /// subx detect-encoding *.srt
116    /// subx detect-encoding subtitles/*.{srt,ass}
117    ///
118    /// # Mixed paths
119    /// subx detect-encoding /absolute/path/file.srt ./relative/file.ass
120    /// ```
121    ///
122    /// # Error Handling
123    ///
124    /// If a file cannot be read or analyzed:
125    /// - The error is reported for that specific file
126    /// - Processing continues with remaining files
127    /// - Non-text files are skipped with a warning
128    /// - Permission errors are clearly indicated
129    #[arg(required = true)]
130    pub file_paths: Vec<String>,
131}