1pub mod ar;
4pub mod gzip;
5pub mod javadoc;
6pub mod pyc;
7pub mod zip;
8
9use anyhow::{bail, Context, Result};
10use log::{log, debug, info, warn, Level};
11use std::ascii::escape_default;
12use std::collections::HashMap;
13use std::fmt::Write;
14use std::ffi::OsStr;
15use std::fs;
16use std::fs::{File, Metadata};
17use std::io::{self, Seek};
18use std::os::unix::fs as unix_fs;
19use std::os::unix::fs::MetadataExt as _;
20use std::path::Path;
21use std::sync::Arc;
22use tempfile::NamedTempFile;
23use thiserror::Error;
24
25#[cfg(target_os = "linux")]
26use std::os::linux::fs::MetadataExt as _;
27#[cfg(target_os = "macos")]
28use std::os::macos::fs::MetadataExt as _;
29
30use super::config;
31
32#[derive(Error, Debug)]
33pub enum Error {
34 #[error("unexpected EOF, cannot take {1} bytes at offset 0x{0:x}")]
35 UnexpectedEOF(u64, usize),
36
37 #[error("wrong magic at offset {0}\n (have \"{}\", exp. \"{}\")",
38 asciify(.1), asciify(.2))]
39 BadMagic(u64, Vec<u8>, &'static [u8]),
40
41 #[error("{0}")]
42 Other(String),
43}
44
45pub fn asciify<B: AsRef<[u8]>>(buf: B) -> String {
47 String::from_utf8(
48 buf.as_ref()
49 .iter()
50 .flat_map(|b| escape_default(*b))
51 .collect(),
52 ).unwrap()
53}
54
55#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
56pub enum ProcessResult {
57 Ignored,
58 Noop,
59 Replaced,
60 Rewritten,
61 BadFormat,
62 Error,
63}
64
65impl ProcessResult {
66 pub fn convert_and_warn(input_path: &Path, result: Result<ProcessResult>) -> ProcessResult {
67 match result {
68 Err(err) => {
69 warn!("{}: failed to process: {}", input_path.display(), err);
70
71 if err.downcast_ref::<Error>().is_some() {
72 ProcessResult::BadFormat
73 } else {
74 ProcessResult::Error
75 }
76 }
77 Ok(res) => res
78 }
79 }
80
81 pub fn extend_and_warn(&mut self, input_path: &Path, result: Result<ProcessResult>) {
82 let converted = ProcessResult::convert_and_warn(input_path, result);
83
84 if (*self == ProcessResult::Replaced && converted == ProcessResult::Rewritten) ||
85 (*self == ProcessResult::Rewritten && converted == ProcessResult::Replaced) {
86 warn!("{}: different process result, hardlink count modified externally?",
87 input_path.display());
88 }
89
90 if *self < converted {
91 *self = converted;
92 }
93 }
94}
95
96pub trait Processor {
97 fn name(&self) -> &str;
98
99 fn initialize(&mut self) -> Result<()> {
101 Ok(())
102 }
103
104 fn filter(&self, path: &Path) -> Result<bool>;
106
107 fn process(&self, path: &Path) -> Result<ProcessResult>;
109}
110
111#[derive(Debug, Default, PartialEq)]
112pub struct Stats {
113 pub directories: u64,
117
118 pub files: u64,
122
123 pub inodes_processed: u64,
127
128 pub inodes_replaced: u64,
132 pub inodes_rewritten: u64,
133
134 pub misunderstood: u64,
138
139 pub errors: u64,
141}
142
143impl Stats {
144 pub fn new() -> Self { Default::default() }
145
146 pub fn add_one(&mut self, result: ProcessResult) {
147 match result {
148 ProcessResult::Ignored => { return; }
149 ProcessResult::Noop => {}
150 ProcessResult::Replaced => { self.inodes_replaced += 1; }
151 ProcessResult::Rewritten => { self.inodes_rewritten += 1; }
152 ProcessResult::BadFormat => { self.misunderstood += 1; }
153 ProcessResult::Error => { self.errors += 1; }
154 }
155
156 self.inodes_processed += 1;
157 }
158
159 pub fn add(&mut self, other: &Stats) {
160 self.directories += other.directories;
161 self.files += other.files;
162 self.inodes_processed += other.inodes_processed;
163 self.inodes_replaced += other.inodes_replaced;
164 self.inodes_rewritten += other.inodes_rewritten;
165 self.misunderstood += other.misunderstood;
166 self.errors += other.errors;
167 }
168
169 pub fn summarize(&self) {
170 info!("Scanned {} directories and {} files,\n \
171 processed {} inodes,\n \
172 {} modified ({} replaced + {} rewritten),\n \
173 {} unsupported format, {} errors\
174 ",
175 self.directories, self.files,
176 self.inodes_processed,
177 self.inodes_replaced + self.inodes_rewritten,
178 self.inodes_replaced, self.inodes_rewritten,
179 self.misunderstood, self.errors);
180 }
181}
182
183pub type HandlerBoxed = fn(&Arc<config::Config>) -> Box<dyn Processor + Send + Sync>;
184
185pub const HANDLERS: &[(&str, bool, HandlerBoxed)] = &[
186 ("ar", true, ar::Ar::boxed ),
187 ("jar", true, zip::Zip::boxed_jar ),
188 ("javadoc", true, javadoc::Javadoc::boxed ),
189 ("gzip", true, gzip::Gzip::boxed ),
190 ("pyc", true, pyc::Pyc::boxed ),
191 ("zip", true, zip::Zip::boxed_zip ),
192 ("pyc-zero-mtime", false, pyc::PycZeroMtime::boxed),
193];
194
195pub fn handler_names() -> Vec<&'static str> {
196 HANDLERS.iter()
197 .map(|(name, _, _)| *name)
198 .collect()
199}
200
201pub fn make_handlers(config: &Arc<config::Config>) -> Result<Vec<Box<dyn Processor + Send + Sync>>> {
202 let mut handlers: Vec<Box<dyn Processor + Send + Sync>> = vec![];
203
204 for (name, _, func) in HANDLERS {
205 if config.handler_names.contains(name) {
206 let mut handler = func(config);
207 match handler.initialize() {
208 Err(e) => {
209 if config.strict_handlers {
210 bail!("Cannot initialize handler {}: {}", handler.name(), e);
211 }
212 warn!("Handler {} skipped: {}", handler.name(), e);
213 }
214 Ok(()) => {
215 debug!("Initialized handler {}.", handler.name());
216 handlers.push(handler);
217 }
218 }
219 }
220 }
221
222 Ok(handlers)
223}
224
225pub fn inodes_seen() -> HashMap<u64, u8> {
226 HashMap::new()
227}
228
229pub fn do_print(config: &Arc<config::Config>) -> Result<()> {
230 let handler = pyc::Pyc::new(config);
231 let mut w = String::new();
232
233 for (n, input_path) in config.inputs.iter().enumerate() {
234 if n > 0 {
235 writeln!(w)?; }
237 handler.pretty_print(&mut w, input_path)?;
238 }
239
240 print!("{w}");
241
242 Ok(())
243}
244
245pub fn do_normal_work(config: &Arc<config::Config>) -> Result<Stats> {
246 let handlers = make_handlers(config)?;
247 let mut inodes_seen = inodes_seen();
248 let mut total = Stats::new();
249
250 for input_path in &config.inputs {
251 let stats = process_file_or_dir(&handlers, &mut inodes_seen, input_path, None);
252 total.add(&stats);
253 }
254
255 Ok(total)
256}
257
258pub type ProcessWrapper<'a> = Option<&'a dyn Fn(u8, &Path) -> Result<()>>;
259
260fn process_file(
261 handlers: &[Box<dyn Processor + Send + Sync>],
262 already_seen: &mut u8,
263 input_path: &Path,
264 process_wrapper: ProcessWrapper,
265) -> Result<ProcessResult> {
266
267 let mut entry_mod = ProcessResult::Ignored;
271
272 let mut selected_handlers = 0;
273
274 for (n_processor, processor) in handlers.iter().enumerate() {
275 if *already_seen & (1 << n_processor) > 0 {
279 debug!("{}: already seen by {} handler",
280 input_path.display(), processor.name());
281 continue;
282 }
283
284 let cond = processor.filter(input_path)?;
285 if cond {
286 debug!("{}: matched by handler {}", input_path.display(), processor.name());
287
288 selected_handlers |= 1 << n_processor;
289
290 if process_wrapper.is_none() {
291 let res = processor.process(input_path);
292 entry_mod.extend_and_warn(input_path, res);
293 }
294 }
295
296 *already_seen |= selected_handlers;
297 }
298
299 if selected_handlers > 0 {
300 if let Some(func) = process_wrapper {
301 assert!(entry_mod == ProcessResult::Ignored);
302 func(selected_handlers, input_path)?;
303 }
304 }
305
306 Ok(entry_mod)
307}
308
309fn process_entry(
310 handlers: &[Box<dyn Processor + Send + Sync>],
311 inodes_seen: &mut HashMap<u64, u8>,
312 process_wrapper: ProcessWrapper,
313 stats: &mut Stats,
314 entry: &walkdir::DirEntry,
315) -> Result<ProcessResult> {
316
317 debug!("Looking at {}…", entry.path().display());
318
319 let name = unwrap_os_string(entry.file_name())?;
320 if name.starts_with(".#.") && name.ends_with(".tmp") {
321 return Ok(ProcessResult::Ignored);
323 }
324
325 let metadata = entry.metadata()?;
326 if metadata.is_dir() {
327 stats.directories += 1;
328 return Ok(ProcessResult::Ignored);
329 }
330
331 stats.files += 1;
332 if !metadata.is_file() {
333 debug!("{}: not a file", entry.path().display());
334 return Ok(ProcessResult::Ignored);
335 }
336
337 let inode = metadata.ino();
338 let mut already_seen = *inodes_seen.get(&inode).unwrap_or(&0);
339
340 let entry_mod = process_file(
341 handlers,
342 &mut already_seen,
343 entry.path(),
344 process_wrapper)?;
345
346 inodes_seen.insert(inode, already_seen); if entry_mod != ProcessResult::Noop {
348 let metadata = entry.metadata()?;
350 let inode2 = metadata.ino();
351 if inode2 != inode {
352 inodes_seen.insert(inode2, already_seen);
356 }
357 }
358
359 Ok(entry_mod)
360}
361
362pub fn process_file_or_dir(
363 handlers: &[Box<dyn Processor + Send + Sync>],
364 inodes_seen: &mut HashMap<u64, u8>,
365 input_path: &Path,
366 process_wrapper: ProcessWrapper,
367) -> Stats {
368
369 let mut stats = Stats::new();
370
371 for entry in walkdir::WalkDir::new(input_path)
372 .follow_links(false)
373 .into_iter() {
374 let entry = match entry {
375 Err(e) => {
376 warn!("Failed to process: {e}");
377 stats.errors += 1;
378 continue;
379 }
380 Ok(entry) => entry
381 };
382
383 let res = process_entry(handlers, inodes_seen, process_wrapper, &mut stats, &entry);
384 stats.add_one(ProcessResult::convert_and_warn(entry.path(), res));
385 }
386
387 stats
388}
389
390fn unwrap_os_string(filename: &OsStr) -> Result<&str> {
391 match filename.to_str() {
392 Some(s) => Ok(s),
393 None => {
394 bail!("Invalid file name {:?}", filename);
395 }
396 }
397}
398
399pub struct InputOutputHelper<'a> {
400 pub input_path: &'a Path,
401 pub input_metadata: Metadata,
402
403 pub output: Option<NamedTempFile>,
405
406 pub check: bool,
407 pub verbose: bool, }
409
410impl Drop for InputOutputHelper<'_> {
411 fn drop(&mut self) {
412 if let Some(f) = self.output.take() {
413 debug!("{}: discarding temporary copy", f.path().display());
414 if let Err(e) = f.close() {
415 if e.kind() != io::ErrorKind::NotFound {
416 warn!("Failed to remove tempfile for {}: {}", self.input_path.display(), e);
417 }
418 }
419 }
420 }
421}
422
423impl<'a> InputOutputHelper<'a> {
424 pub fn open(
425 input_path: &'a Path,
426 check: bool,
427 verbose: bool,
428 ) -> Result<(Self, io::BufReader<File>)> {
429
430 let input = File::open(input_path)
431 .with_context(|| format!("Cannot open {input_path:?}"))?;
432
433 let input_metadata = input.metadata()?;
434 let input = io::BufReader::new(input);
435
436 let io = InputOutputHelper {
437 input_path,
438 input_metadata,
439 output: None,
440 check,
441 verbose,
442 };
443
444 Ok((io, input))
445 }
446
447 pub fn open_output(&mut self, need_real_file_for_check: bool) -> Result<()> {
448 assert!(self.output.is_none());
449
450 let tmpfile = if self.check && !need_real_file_for_check {
451 tempfile::Builder::new()
452 .disable_cleanup(true)
453 .make(|_| File::options()
454 .read(true)
455 .write(true)
456 .open("/dev/null"))?
457 } else {
458 let prefix = format!(
459 ".#.{}",
460 self.input_path.file_name().and_then(|s| s.to_str()).unwrap_or("tmp")
461 );
462
463 if self.check {
464 NamedTempFile::with_prefix(prefix)?
465 } else {
466 NamedTempFile::with_prefix_in(prefix, self.input_path.parent().unwrap())?
469 }
470 };
471
472 self.output = Some(tmpfile);
473
474 Ok(())
475 }
476
477 pub fn finalize(&mut self, have_mod: bool) -> Result<ProcessResult> {
478 let meta = &self.input_metadata;
479
480 if !have_mod {
481 Ok(ProcessResult::Noop)
482
483 } else if self.check {
484 Ok(
486 if meta.nlink() == 1 {
487 ProcessResult::Replaced
488 } else {
489 ProcessResult::Rewritten
490 }
491 )
492
493 } else {
494 let output = self.output.as_mut().unwrap();
495
496 if meta.nlink() == 1 {
500 log!(if self.verbose { Level::Info } else { Level::Debug },
501 "{}: replacing with normalized version", self.input_path.display());
502
503 output.disable_cleanup(true);
504
505 output.as_file_mut().set_permissions(meta.permissions())?;
506 output.as_file_mut().set_modified(meta.modified()?)?;
507
508 if let Err(e) = unix_fs::lchown(output.path(), Some(meta.st_uid()), Some(meta.st_gid())) {
509 if e.kind() == io::ErrorKind::PermissionDenied {
510 warn!("{}: cannot change file ownership, ignoring", output.path().display());
511 } else {
512 bail!("{}: cannot change file ownership: {}", output.path().display(), e);
513 }
514 }
515
516 fs::rename(output.path(), self.input_path)?;
517 self.output.take(); Ok(ProcessResult::Replaced)
520
521 } else {
522 log!(if self.verbose { Level::Info } else { Level::Debug },
523 "{}: rewriting with normalized contents", self.input_path.display());
524
525 let file = output.as_file_mut();
526
527 file.seek(io::SeekFrom::Start(0))?;
528
529 let mut input_writer = File::options().write(true).open(self.input_path)?;
530 let len = io::copy(file, &mut input_writer)?;
531 input_writer.set_len(len)?;
533 input_writer.set_modified(meta.modified()?)?;
534
535 Ok(ProcessResult::Rewritten)
536 }
537 }
538 }
539}
540
541#[cfg(test)]
542mod tests {
543 use super::*;
544
545 #[test]
546 fn filter_asciify() {
547 assert_eq!(asciify("asdf"), "asdf");
548 assert_eq!(asciify("\"\""), "\\\"\\\"");
549 assert_eq!(asciify("\n\t\r"), "\\n\\t\\r");
550 assert_eq!(asciify("zębina"), "z\\xc4\\x99bina");
551 assert_eq!(asciify([0; 4]), "\\x00\\x00\\x00\\x00");
552 }
553}