gh_docs_download/
cli.rs

1//! Command-line interface for the GitHub documentation downloader.
2//!
3//! This module provides the CLI argument parsing and main application logic.
4
5use crate::error::{GitHubDocsError, Result};
6use crate::types::{RepoName, RepoOwner, RepoSpec};
7use clap::Parser;
8use url::Url;
9
10/// A CLI tool to download documentation files from GitHub repositories.
11#[derive(Parser, Debug)]
12#[command(author, version, about, long_about = None)]
13pub struct Args {
14    /// GitHub tree URL (e.g., "<https://github.com/owner/repo/tree/branch/path>")
15    #[arg(short = 'r', long)]
16    pub repo: String,
17
18    /// Output directory for downloaded files
19    #[arg(short = 'o', long, default_value = "downloads")]
20    pub output: String,
21
22    /// Only list files without downloading
23    #[arg(long)]
24    pub list_only: bool,
25
26    /// Include subdirectories recursively
27    #[arg(long, default_value = "true")]
28    pub recursive: bool,
29}
30
31impl Args {
32    /// Parse GitHub tree URL into repository spec and documentation path.
33    ///
34    /// Expected format: `https://github.com/owner/repo/tree/branch/path`
35    ///
36    /// # Returns
37    ///
38    /// Returns `(RepoSpec, String)` where the second element is the documentation path.
39    ///
40    /// # Errors
41    ///
42    /// Returns `GitHubDocsError::InvalidRepoFormat` if the URL is not a valid GitHub tree URL.
43    pub fn parse_repo_spec(&self) -> Result<(RepoSpec, String)> {
44        let url = Url::parse(&self.repo)?;
45
46        // Verify it's a GitHub URL
47        if url.host_str() != Some("github.com") {
48            return Err(GitHubDocsError::InvalidRepoFormat {
49                input: self.repo.clone(),
50            });
51        }
52
53        let path_segments: Vec<&str> = url
54            .path_segments()
55            .ok_or_else(|| GitHubDocsError::InvalidRepoFormat {
56                input: self.repo.clone(),
57            })?
58            .collect();
59
60        // Must be: /owner/repo/tree/branch/path...
61        if path_segments.len() < 5 || path_segments[2] != "tree" {
62            return Err(GitHubDocsError::InvalidRepoFormat {
63                input: format!("Expected GitHub tree URL format: https://github.com/owner/repo/tree/branch/path, got: {}", self.repo),
64            });
65        }
66
67        let owner = RepoOwner::new(path_segments[0])?;
68        let repo_name = RepoName::new(path_segments[1])?;
69        let repo_spec = RepoSpec::new(owner, repo_name);
70
71        // Extract path after /tree/branch/
72        let doc_path = path_segments[4..].join("/");
73
74        Ok((repo_spec, doc_path))
75    }
76
77    /// Validate the arguments and return any validation errors.
78    ///
79    /// # Errors
80    ///
81    /// Returns `GitHubDocsError::InvalidRepoFormat` if the repository URL format is invalid.
82    pub fn validate(&self) -> Result<()> {
83        // Validate repository format
84        let _ = self.parse_repo_spec()?;
85
86        // Validate output directory path
87        if self.output.is_empty() {
88            return Err(GitHubDocsError::InvalidRepoFormat {
89                input: "Output directory cannot be empty".to_string(),
90            });
91        }
92
93        Ok(())
94    }
95}
96
97/// CLI application runner.
98pub struct CliApp {
99    args: Args,
100}
101
102impl CliApp {
103    /// Create a new CLI application with the given arguments.
104    #[must_use]
105    pub fn new(args: Args) -> Self {
106        Self { args }
107    }
108
109    /// Run the CLI application.
110    ///
111    /// This is the main entry point that orchestrates the entire operation:
112    /// 1. Validate arguments
113    /// 2. Create downloader
114    /// 3. Discover documentation directories
115    /// 4. Collect documentation files
116    /// 5. Download or list files
117    ///
118    /// # Errors
119    ///
120    /// Returns `GitHubDocsError` if any step of the process fails.
121    pub fn run(&self) -> Result<()> {
122        // Validate arguments
123        self.args.validate()?;
124
125        // Parse repository specification and extract path from tree URL
126        let (repo_spec, doc_path) = self.args.parse_repo_spec()?;
127
128        // Create download configuration
129        let config = crate::downloader::DownloadConfig {
130            output_dir: self.args.output.clone(),
131            list_only: self.args.list_only,
132            recursive: self.args.recursive,
133            target_path: doc_path,
134        };
135
136        // Create downloader
137        let downloader = crate::downloader::GitHubDocsDownloader::new(repo_spec.clone(), config);
138
139        println!(
140            "Searching for documentation directories in {}...",
141            repo_spec.full_name()
142        );
143
144        // Discover documentation directories
145        let docs_dirs = downloader.find_docs_directories()?;
146
147        if docs_dirs.is_empty() {
148            return Err(GitHubDocsError::no_documentation_found(
149                repo_spec.owner.as_str(),
150                repo_spec.name.as_str(),
151            ));
152        }
153
154        println!("Found {} documentation directories:", docs_dirs.len());
155        for dir in &docs_dirs {
156            println!("  - {dir}");
157        }
158
159        // Collect all documentation files
160        let all_doc_files = downloader.get_all_documentation_files(&docs_dirs)?;
161
162        if all_doc_files.is_empty() {
163            println!("No documentation files found in the discovered directories.");
164            return Ok(());
165        }
166
167        // Download or list files
168        downloader.download_files(&all_doc_files)?;
169
170        Ok(())
171    }
172
173    /// Get the parsed arguments.
174    #[must_use]
175    pub fn args(&self) -> &Args {
176        &self.args
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    #[test]
185    fn test_parse_repo_spec_tree_url() {
186        let args = Args {
187            repo: "https://github.com/rust-lang/rust/tree/main/docs".to_string(),
188            output: "test".to_string(),
189            list_only: false,
190            recursive: true,
191        };
192
193        let (repo_spec, path) = args.parse_repo_spec().unwrap();
194        assert_eq!(repo_spec.owner.as_str(), "rust-lang");
195        assert_eq!(repo_spec.name.as_str(), "rust");
196        assert_eq!(path, "docs");
197    }
198
199    #[test]
200    fn test_parse_repo_spec_tree_url_nested_path() {
201        let args = Args {
202            repo: "https://github.com/TanStack/router/tree/main/docs/router/eslint".to_string(),
203            output: "test".to_string(),
204            list_only: false,
205            recursive: true,
206        };
207
208        let (repo_spec, path) = args.parse_repo_spec().unwrap();
209        assert_eq!(repo_spec.owner.as_str(), "TanStack");
210        assert_eq!(repo_spec.name.as_str(), "router");
211        assert_eq!(path, "docs/router/eslint");
212    }
213
214    #[test]
215    fn test_parse_repo_spec_invalid_url() {
216        let args = Args {
217            repo: "https://notgithub.com/owner/repo/tree/main/docs".to_string(),
218            output: "test".to_string(),
219            list_only: false,
220            recursive: true,
221        };
222
223        assert!(args.parse_repo_spec().is_err());
224    }
225
226    #[test]
227    fn test_parse_repo_spec_missing_tree_structure() {
228        let args = Args {
229            repo: "https://github.com/owner/repo".to_string(),
230            output: "test".to_string(),
231            list_only: false,
232            recursive: true,
233        };
234
235        assert!(args.parse_repo_spec().is_err());
236    }
237
238    #[test]
239    fn test_parse_repo_spec_invalid_format() {
240        let args = Args {
241            repo: "invalid-repo-format".to_string(),
242            output: "test".to_string(),
243            list_only: false,
244            recursive: true,
245        };
246
247        assert!(args.parse_repo_spec().is_err());
248    }
249}