1use crate::url::Url;
2use anyhow::Result;
3use futures::stream::{self, StreamExt};
4use std::path::PathBuf;
5use std::sync::Arc;
6use std::thread;
7
8pub mod cli;
9mod error;
10mod html;
11mod markdown;
12pub mod url;
13
14pub use cli::Cli;
15pub use error::Error;
16
17include!(concat!(env!("OUT_DIR"), "/built.rs"));
18
19pub fn version() -> String {
21 format!(
22 "{}\nBuild Time: {}\nTarget: {}\nProfile: {}",
23 env!("CARGO_PKG_VERSION"),
24 BUILT_TIME_UTC,
25 TARGET,
26 PROFILE
27 )
28}
29
30pub(crate) const USER_AGENT_STRING: &str =
32 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0";
33
34#[derive(Debug, Clone)]
36pub struct Config {
37 pub verbose: bool,
38 pub max_retries: u32,
39 pub output_base: PathBuf,
40 pub single_file: bool,
41 pub has_output: bool,
42 pub pack_file: Option<PathBuf>,
43}
44
45pub async fn process_urls(
47 urls: Vec<String>,
48 config: Config,
49) -> Result<Vec<(String, anyhow::Error)>> {
50 use indicatif::{ProgressBar, ProgressStyle};
51 use tokio::io::AsyncWriteExt;
52
53 let pb = if urls.len() > 1 {
54 let pb = ProgressBar::new(urls.len() as u64);
55 pb.set_style(
56 ProgressStyle::default_bar()
57 .template(
58 "{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta})",
59 )
60 .unwrap()
61 .progress_chars("#>-"),
62 );
63 Some(pb)
64 } else {
65 None
66 };
67
68 let pb = Arc::new(pb);
69 let concurrency_limit = thread::available_parallelism()
71 .map(|n| n.get() * 2) .unwrap_or(10);
73
74 let should_pack = config.pack_file.is_some();
76 let pack_path = config.pack_file.clone();
77 let packed_content = if should_pack {
78 Arc::new(tokio::sync::Mutex::new(Vec::with_capacity(urls.len())))
79 } else {
80 Arc::new(tokio::sync::Mutex::new(Vec::new()))
81 };
82
83 let urls_for_ordering = urls.clone();
85
86 let results = stream::iter(urls.into_iter().map(|url| {
87 let pb = Arc::clone(&pb);
88 let config = config.clone();
89 let packed_content = Arc::clone(&packed_content);
90 async move {
91 if config.verbose {
92 eprintln!("Processing: {}", url);
93 }
94 match Url::parse(&url) {
95 Ok(url_parsed) => {
96 let out_path = if config.single_file
97 && config.has_output
98 && !config.output_base.is_dir()
99 {
100 Some(config.output_base)
101 } else {
102 url::create_output_path(&url_parsed, &config.output_base).ok()
103 };
104
105 let result = if should_pack {
106 match url::process_url_with_content(
108 &url,
109 out_path,
110 config.verbose,
111 config.max_retries,
112 )
113 .await
114 {
115 Ok(content) => {
116 if let Some(md_content) = content {
117 let mut content_vec = packed_content.lock().await;
118 content_vec.push((url.clone(), md_content));
119 }
120 Ok(())
121 }
122 Err(e) => Err(e),
123 }
124 } else {
125 url::process_url_with_retry(
127 &url,
128 out_path,
129 config.verbose,
130 config.max_retries,
131 )
132 .await
133 };
134
135 if let Some(pb) = &*pb {
136 pb.inc(1);
137 }
138 result
139 }
140 Err(e) => {
141 if let Some(pb) = &*pb {
142 pb.inc(1);
143 }
144 Err((url, e.into()))
145 }
146 }
147 }
148 }))
149 .buffer_unordered(concurrency_limit)
150 .collect::<Vec<_>>()
151 .await;
152
153 if let Some(pb) = &*pb {
154 pb.finish_with_message("Done!");
155 }
156
157 if let Some(pack_path) = pack_path {
159 if config.verbose {
160 eprintln!("Writing packed content to {}", pack_path.display());
161 }
162
163 if let Some(parent) = pack_path.parent() {
164 if let Err(e) = tokio::fs::create_dir_all(parent).await {
165 eprintln!(
166 "Warning: Failed to create directory {}: {}",
167 parent.display(),
168 e
169 );
170 }
171 }
172
173 let mut packed_file = match tokio::fs::File::create(&pack_path).await {
174 Ok(file) => file,
175 Err(e) => {
176 eprintln!("Error creating packed file: {}", e);
177 return Ok(results.into_iter().filter_map(|r| r.err()).collect());
178 }
179 };
180
181 let mut content_to_write = packed_content.lock().await;
183
184 let mut url_to_index = std::collections::HashMap::new();
186 for (i, url) in urls_for_ordering.iter().enumerate() {
187 url_to_index.insert(url.clone(), i);
188 }
189
190 content_to_write.sort_by(|a, b| {
191 let a_idx = url_to_index.get(&a.0).unwrap_or(&usize::MAX);
192 let b_idx = url_to_index.get(&b.0).unwrap_or(&usize::MAX);
193 a_idx.cmp(b_idx)
194 });
195
196 for (url, content) in content_to_write.iter() {
197 if let Err(e) = packed_file
198 .write_all(format!("# {}\n\n{}\n\n---\n\n", url, content).as_bytes())
199 .await
200 {
201 eprintln!("Error writing to packed file: {}", e);
202 }
203 }
204 }
205
206 let mut errors = Vec::new();
208 for r in results {
209 match r {
210 Ok(()) => {}
211 Err(e) => {
212 eprintln!("Warning: Failed to process {}: {}", e.0, e.1);
213 errors.push(e);
214 }
215 }
216 }
217
218 Ok(errors)
219}