add_determinism/add_det/handlers/
javadoc.rs

1/* SPDX-License-Identifier: GPL-3.0-or-later */
2
3use anyhow::Result;
4use log::{debug, info};
5use regex::{Regex, RegexBuilder};
6use std::io;
7use std::io::{BufRead, BufWriter, Write};
8use std::path::Path;
9use std::sync::Arc;
10
11use super::{config, InputOutputHelper};
12
13const HEADER_LINES_TO_CHECK: i32 = 15;
14
15pub struct Javadoc {
16    config: Arc<config::Config>,
17}
18
19impl Javadoc {
20    pub fn boxed(config: &Arc<config::Config>) -> Box<dyn super::Processor + Send + Sync> {
21        Box::new(Self { config: config.clone() })
22    }
23
24    fn process_line(&self, line: &str) -> Result<Option<String>> {
25        // javadoc files have the date in two places in the header:
26        //   <!-- Generated by javadoc (21) on Sat Mar 02 16:07:41 UTC 2024 -->
27        //   <meta name="dc.created" content="2024-03-02">
28        //
29        // We strip the javadoc version and date in the first line, based on the
30        // assumption that this is just a freeform comment and the date is not
31        // parsed by anything. The information that this was generated by javadoc is
32        // retained to that is useful information (and because people sometimes
33        // modify generated files by hand, wasting their time).
34        //
35        // In the second line, we parse the date as %Y-%m-%d, compare is with
36        // $SOURCE_DATE_EPOCH, and replace if newer. This means that we'll not
37        // rewrite this line in pages that were generated a long time ago.
38
39        let re = Regex::new(r"(.*<!-- Generated by javadoc) .+ (-->.*)")?;
40        if let Some(caps) = re.captures(line) {
41            return Ok(Some(format!("{} {}", &caps[1], &caps[2])));
42        }
43
44        let epoch = self.config.source_date_epoch
45            .map(|v| chrono::DateTime::from_timestamp(v, 0).unwrap());
46
47        if let Some(epoch) = epoch {
48            let re = RegexBuilder::new(r#"<(meta name="(date|dc\.created)" content=)"([^"]+)">"#)
49                .case_insensitive(true)
50                .build()?;
51
52            if let Some(caps) = re.captures(line) {
53                match chrono::NaiveDate::parse_from_str(&caps[3], "%Y-%m-%d") {
54                    Err(_) => {
55                        debug!("Failed to parse naive date: {:?}", &caps[3]);
56                    }
57                    Ok(date) => {
58                        debug!("Matched meta {} date {} → {:?}", &caps[2], &caps[3], date);
59                        if epoch.date_naive() < date {
60                            let ts = epoch.format("%Y-%m-%d");
61                            return Ok(Some(format!("<{}\"{}\">", &caps[1], ts)));
62                        }
63                    }
64                }
65            }
66        }
67
68        Ok(None)
69    }
70}
71
72impl super::Processor for Javadoc {
73    fn name(&self) -> &str {
74        "javadoc"
75    }
76
77    fn filter(&self, path: &Path) -> Result<bool> {
78        Ok(
79            self.config.ignore_extension ||
80                path.extension().is_some_and(|x| x == "html")
81            // && path.to_str().is_some_and(|x| x.contains("/usr/share/javadoc/"))
82        )
83    }
84
85    fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
86        let mut have_mod = false;
87        let mut after_header = false;
88
89        let (mut io, input) = InputOutputHelper::open(input_path, self.config.check, true)?;
90
91        io.open_output(false)?;
92        let mut output = BufWriter::new(io.output.as_mut().unwrap().as_file_mut());
93
94        let head_end_re = RegexBuilder::new(r"</head>")
95            .case_insensitive(true)
96            .build()?;
97
98        let mut num = 0;
99        for line in input.lines() {
100            let line = match line {
101                Err(e) => {
102                    if e.kind() == io::ErrorKind::InvalidData {
103                        info!("{}:{}: {}, ignoring.", input_path.display(), num + 1, e);
104                        return Ok(super::ProcessResult::Noop);
105                    } else {
106                        return Err(e.into());
107                    }
108                }
109                Ok(line) => line
110            };
111
112            num += 1;
113
114            let line2 = if !after_header { self.process_line(&line)? } else { None };
115
116            if line2.is_some() && !have_mod {
117                debug!("{}:{}: found first line to replace: {:?}", input_path.display(), num, line);
118                have_mod = true;
119            }
120
121            if !after_header && (num >= HEADER_LINES_TO_CHECK || head_end_re.find(&line).is_some()) {
122                if !have_mod {
123                    let why = if num >= HEADER_LINES_TO_CHECK {
124                        format!("first {HEADER_LINES_TO_CHECK} lines")
125                    } else {
126                        String::from("until header end")
127                    };
128
129                    debug!("{}:{}: found nothing to replace {}", input_path.display(), num, why);
130                    return Ok(super::ProcessResult::Noop);
131                }
132
133                after_header = true;
134            }
135
136            writeln!(output, "{}", line2.unwrap_or(line))?;
137        }
138
139        output.flush()?;
140        drop(output);
141
142        io.finalize(have_mod)
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn test_filter_html() {
152        let cfg = Arc::new(config::Config::empty(1704106800, false));
153        let h = Javadoc::boxed(&cfg);
154
155        assert!( h.filter(Path::new("/some/path/page.html")).unwrap());
156        assert!(!h.filter(Path::new("/some/path/page.htmll")).unwrap());
157        assert!(!h.filter(Path::new("/some/path/page.html.jpg")).unwrap());
158        assert!(!h.filter(Path::new("/some/path/page")).unwrap());
159        assert!(!h.filter(Path::new("/some/path/html")).unwrap());
160        assert!(!h.filter(Path::new("/some/path/html_html")).unwrap());
161        assert!(!h.filter(Path::new("/")).unwrap());
162    }
163
164    #[test]
165    fn test_process_line() {
166        let config = Arc::new(config::Config::empty(1704106800, false));
167        let h = Javadoc { config };
168        let plu = |s| h.process_line(s).unwrap();
169
170        assert_eq!(plu("<!-- Generated by javadoc (21) on Sat Mar 02 16:07:41 UTC 2024 -->").unwrap(),
171                   "<!-- Generated by javadoc -->");
172
173        // If we're running on an already processed file, don't report this as a match
174        assert!(plu("<!-- Generated by javadoc -->").is_none());
175
176        assert_eq!(plu(r#"<meta name="dc.created" content="2024-03-02">"#).unwrap(),
177                   r#"<meta name="dc.created" content="2024-01-01">"#);
178
179        assert_eq!(plu(r#"<META NAME="dc.created" CONTENT="2024-03-02">"#).unwrap(),
180                   r#"<META NAME="dc.created" CONTENT="2024-01-01">"#);
181
182        // Too old
183        assert!(plu(r#"<META NAME="dc.created" CONTENT="2023-09-09">"#).is_none());
184
185        // Misformatted
186        assert!(plu(r#"<META NAME="dc.created" CONTENT="2025">"#).is_none());
187        assert!(plu(r#"<META NAME="dc.created" CONTENT="2025-13-01">"#).is_none());
188        assert!(plu(r#"<META NAME="dc.created" CONTENT="2025-01-40">"#).is_none());
189    }
190}