ci_manager/
util.rs

1//! Utility functions for parsing and working with GitHub CLI output and other utility functions.
2use super::*;
3
4/// Parse a path from a string
5/// # Example
6/// ```
7/// # use ci_manager::util::first_path_from_str;
8/// use std::path::PathBuf;
9///
10/// let haystack = r#"multi line
11/// test string with/path/file.txt is
12/// valid"#;
13/// let path = first_path_from_str(haystack).unwrap();
14/// assert_eq!(path, PathBuf::from("with/path/file.txt"));
15///
16/// // No path in string is an error
17/// let haystack = "Random string with no path";
18/// assert!(first_path_from_str(haystack).is_err());
19///
20/// // Path with no leading '/' and no file extension is OK
21/// let haystack = "foo app/3-_2/t/3 bar";
22/// let path = first_path_from_str(haystack).unwrap();
23/// assert_eq!(path, PathBuf::from("app/3-_2/t/3"));
24///
25/// // More realistic example
26/// let haystack = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
27/// let path = first_path_from_str(haystack).unwrap();
28/// assert_eq!(
29///   path,
30///  PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
31/// );
32/// ```
33/// # Errors
34/// This function returns an error if no valid path is found in the string
35pub fn first_path_from_str(s: &str) -> Result<PathBuf> {
36    static RE: Lazy<Regex> =
37        Lazy::new(|| Regex::new(r"[a-zA-Z0-9-_.\/]+\/[a-zA-Z0-9-_.]+").unwrap());
38
39    let path_str = RE.find(s).context("No path found in string")?.as_str();
40    Ok(PathBuf::from(path_str))
41}
42
43/// Take the lines with failed jobs from the output of `gh run view`
44pub fn take_lines_with_failed_jobs(output: String) -> Vec<String> {
45    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"X.*ID [0-9]*\)").unwrap());
46
47    RE.find_iter(&output)
48        .map(|m| m.as_str().to_owned())
49        .collect()
50}
51
52/// Extract the job IDs from the lines with job information
53pub fn id_from_job_lines(lines: &[String]) -> Vec<String> {
54    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"ID (?<JOB_ID>[0-9]*)").unwrap());
55
56    lines
57        .iter()
58        .map(|line| {
59            RE.captures(line)
60                .unwrap_or_else(|| {
61                    panic!("Expected a line with a Job ID, but no ID found in line: {line}")
62                })
63                .name("JOB_ID")
64                .expect("Expected a Job ID")
65                .as_str()
66                .to_owned()
67        })
68        .collect()
69}
70
71/// Parse text for timestamps and IDs and remove them, returning the modified text without making a copy.
72///
73/// Some compromises are made to be able to remove timestamps in between other symbols e.g. '/83421321/'.
74/// but still avoid removing commit SHAs. That means that these symbols are also removed (any non-letter character
75/// preceding and following an ID).
76///
77/// # Example
78/// ```
79/// # use ci_manager::util::remove_timestamps_and_ids;
80/// # use pretty_assertions::assert_eq;
81/// let test_str = r"ID 21442749267 ";
82/// let modified = remove_timestamps_and_ids(test_str);
83/// assert_eq!(modified, "ID"); // Note that the space is removed
84///
85///
86/// let test_str = r#"ID 21442749267
87/// date: 2024-02-28 00:03:46
88/// other text"#;
89/// let modified = remove_timestamps_and_ids(test_str);
90/// assert_eq!(modified, "IDdate: \nother text");
91/// ```
92pub fn remove_timestamps_and_ids(text: &str) -> borrow::Cow<str> {
93    static RE: Lazy<Regex> = Lazy::new(|| {
94        Regex::new(
95            r"(?x)
96            # Timestamps like YYYY-MM-DD HH:MM:SS
97            ([0-9]{4}-[0-9]{2}-[0-9]{2}\x20[0-9]{2}:[0-9]{2}:[0-9]{2})
98            |
99            # IDs like 21442749267 but only if they are preceded and followed by non-letter characters
100            (?:[^[a-zA-Z]])([0-9]{10,11})(?:[^[a-zA-Z]])
101        ",
102        )
103        .unwrap()
104    });
105
106    RE.replace_all(text, "")
107}
108
109/// Remove non-ASCII characters from a string
110/// # Example
111/// ```
112/// # use ci_manager::util::remove_non_ascii;
113/// # use pretty_assertions::assert_eq;
114/// let test_str = "stríng wøth nön-æscii chåråcters";
115/// let modified = remove_non_ascii(test_str);
116/// assert_eq!(modified, "strng wth nn-scii chrcters");
117/// ```
118pub fn remove_non_ascii(text: &str) -> borrow::Cow<str> {
119    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^\x00-\x7F]+").unwrap());
120
121    RE.replace_all(text, "")
122}
123
124/// Remove ANSI codes from a string
125/// # Example
126/// ```
127/// # use ci_manager::util::remove_ansi_codes;
128/// # use pretty_assertions::assert_eq;
129/// let test_str = r#"[1;31mERROR:[0m Logfile of failure stored in"#;
130/// let modified = remove_ansi_codes(test_str);
131/// assert_eq!(modified, "ERROR: Logfile of failure stored in");
132/// ```
133pub fn remove_ansi_codes(text: &str) -> borrow::Cow<str> {
134    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\x1b\[[;\d]*[A-Za-z]").unwrap());
135
136    RE.replace_all(text, "")
137}
138
139/// Parse a log and remove line-prefixed timestamps in the format `YYYY-MM-DDTHH:MM:SS.0000000Z` (ISO 8601).
140/// # Example
141/// ```
142/// # use ci_manager::util::remove_timestamp_prefixes;
143/// # use pretty_assertions::assert_eq;
144/// let test_str = "2024-02-28T00:03:46.0000000Z [INFO] This is a log message";
145/// let modified = remove_timestamp_prefixes(test_str);
146/// assert_eq!(modified, "[INFO] This is a log message");
147/// ```
148/// ## Multiple lines
149/// ```
150/// # use ci_manager::util::remove_timestamp_prefixes;
151/// # use pretty_assertions::assert_eq;
152/// let test_str = "\
153/// 2024-02-28T00:03:46.0000000Z [INFO] This is a log message
154/// 2024-03-15T20:35:48.9824182Z [ERROR] This is another log message";
155/// let modified = remove_timestamp_prefixes(test_str);
156/// assert_eq!(modified, "\
157/// [INFO] This is a log message
158/// [ERROR] This is another log message");
159///
160pub fn remove_timestamp_prefixes(log: &str) -> borrow::Cow<str> {
161    // The fist group matches 0 or more newlines, and uses that group to replace the timestamp
162    // this way the newlines are preserved (making it agnostic to the type of newline used in the log)
163    static RE: Lazy<Regex> =
164        Lazy::new(|| Regex::new(r"([\r\n]*)\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{7}Z\s").unwrap());
165
166    RE.replace_all(log, "$1")
167}
168
169/// Parse an absolute path from a string. This assumes that the the first '/' found in the string is the start
170/// of the path.
171/// # Example
172/// ```
173/// # use ci_manager::util::first_abs_path_from_str;
174/// use std::path::PathBuf;
175///
176/// let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
177/// let path = first_abs_path_from_str(test_str).unwrap();
178/// assert_eq!(
179///    path,
180///   PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
181/// );
182/// ```
183///
184/// # Errors
185/// Returns an error if no '/' is found in the string or
186/// if the path is not a valid path.
187pub fn first_abs_path_from_str(s: &str) -> Result<PathBuf> {
188    let start = s.find('/').context("Path not found, no '/' in string")?;
189    let path = PathBuf::from(&s[start..]);
190    Ok(path)
191}
192
193/// Add https:// to a URL if it is not already present
194/// # Example
195/// ```
196/// # use ci_manager::util::ensure_https_prefix;
197/// # use pretty_assertions::assert_eq;
198/// // If the URL does not have the https prefix, it is added
199/// let mut url = String::from("github.com/docker/buildx/issues");
200/// ensure_https_prefix(&mut url);
201/// assert_eq!(url, "https://github.com/docker/buildx/issues");
202///
203/// // If the URL already has the https prefix, it is not modified
204/// let mut url = String::from("https://gitlab.com/foo-org/foo-repo");
205/// ensure_https_prefix(&mut url);
206/// assert_eq!(url, String::from("https://gitlab.com/foo-org/foo-repo"));
207/// ```
208pub fn ensure_https_prefix(url: &mut String) {
209    if url.starts_with("https://") {
210        return;
211    }
212    url.insert_str(0, "https://");
213}
214
215/// Canonicalize a repository URL to the form `https://{host}/{repo}`
216///
217/// # Arguments
218/// * `repo` - The repository URL e.g. `user1/user1-repo`
219/// * `host` - The host for the repository e.g. `github.com`
220///
221/// # Example
222/// ```
223/// # use ci_manager::util::canonicalize_repo_url;
224/// let repo = "bob/bobbys-repo";
225/// let canonicalized = canonicalize_repo_url(repo, "github");
226/// assert_eq!(canonicalized, "https://github.com/bob/bobbys-repo");
227///
228/// // If the host is already in the URL, only the protocol is added
229/// let repo = "github.com/lisa/lisas-repo";
230/// let canonicalized = canonicalize_repo_url(repo, "github.com");
231/// assert_eq!(canonicalized, "https://github.com/lisa/lisas-repo");
232///
233/// // If the URL is already in the canonical form, it is returned as is
234/// let repo = "https://gitlab.com/foo-org/foo-repo";
235/// let canonicalized = canonicalize_repo_url(repo, "gitlab.com");
236/// assert_eq!(canonicalized, repo);
237/// ```
238pub fn canonicalize_repo_url(repo: &str, host: &str) -> String {
239    // Check if the host argument has a top-level domain and add it `.com` if it doesn't
240    let host = if host.contains('.') {
241        host.to_string()
242    } else {
243        format!("{host}.com")
244    };
245    let canonical_prefix: String = format!("https://{host}/");
246    if repo.starts_with("https://") {
247        if repo.starts_with(&canonical_prefix) {
248            repo.to_string()
249        } else {
250            repo.replace("https://", &canonical_prefix)
251        }
252    } else if repo.starts_with(&format!("{host}/")) {
253        repo.replace(&format!("{host}/"), &canonical_prefix)
254    } else {
255        format!("{canonical_prefix}{repo}")
256    }
257}
258
259/// Parse a repository URL/identifier to owner and repo fragments
260/// # Example
261/// ```
262/// # use pretty_assertions::assert_eq;
263/// # use ci_manager::util::repo_to_owner_repo_fragments;
264/// let repo_url = "github.com/luftkode/distro-template";
265/// let (owner, repo) = repo_to_owner_repo_fragments(repo_url).unwrap();
266/// assert_eq!((owner.as_str(), repo.as_str()), ("luftkode", "distro-template"));
267///
268/// let repo_url = "luftkode/bifrost-app";
269/// let (owner, repo) = repo_to_owner_repo_fragments(repo_url).unwrap();
270/// assert_eq!((owner.as_str(), repo.as_str()), ("luftkode", "bifrost-app"));
271/// ```
272///
273/// # Errors
274/// Returns an error if the URL cannot be parsed
275/// # Example
276/// ```
277/// # use ci_manager::util::repo_to_owner_repo_fragments;
278/// let repo_url = "github.com/luftkode";
279/// let result = repo_to_owner_repo_fragments(repo_url);
280/// assert!(result.is_err());
281/// ```
282pub fn repo_to_owner_repo_fragments(repo_url: &str) -> Result<(String, String)> {
283    let parts: Vec<&str> = repo_url.split('/').collect();
284    // reverse the order of the parts and take the first two
285    let repo_and_owner = parts.into_iter().rev().take(2).collect::<Vec<&str>>();
286    // Check that there are 2 parts and that neither are empty or contain spaces or dots
287    if repo_and_owner.len() != 2
288        || repo_and_owner
289            .iter()
290            .any(|s| s.is_empty() || s.contains(' ') || s.contains('.'))
291    {
292        bail!("Could not parse owner and repo from URL: {repo_url}");
293    }
294    let (repo, owner) = (repo_and_owner[0], repo_and_owner[1]);
295    Ok((owner.to_string(), repo.to_string()))
296}
297
298/// Calculate the smallest levenshtein distance between an issue body and other issue bodies
299pub fn issue_text_similarity(issue_body: &str, other_issues: &[String]) -> usize {
300    let issue_body_without_timestamps = remove_timestamps_and_ids(issue_body);
301
302    let smallest_distance = other_issues
303        .iter()
304        .map(|other_issue_body| {
305            distance::levenshtein(
306                &issue_body_without_timestamps,
307                &remove_timestamps_and_ids(other_issue_body),
308            )
309        })
310        .min()
311        .unwrap_or(usize::MAX);
312
313    smallest_distance
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use pretty_assertions::assert_eq;
320
321    #[test]
322    fn test_absolute_path_from_str() {
323        let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
324        let path = first_abs_path_from_str(test_str).unwrap();
325        assert_eq!(
326            path,
327            PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
328        );
329    }
330
331    #[test]
332    pub fn test_canonicalize_repo_url() {
333        let repo = "luftkode/distro-template";
334        let canonicalized = canonicalize_repo_url(repo, "github.com");
335        assert_eq!(canonicalized, "https://github.com/luftkode/distro-template");
336    }
337
338    #[test]
339    pub fn test_remove_timestamps_and_ids() {
340        let test_str = "ID 8072883145 ";
341        let modified = remove_timestamps_and_ids(test_str);
342        assert_eq!(modified, "ID");
343    }
344
345    #[test]
346    pub fn test_remove_timestamps_and_ids_log_text() {
347        const LOG_TEXT: &'static str = r#"**Run ID**: 8072883145 [LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs/8072883145)
348
349        **1 job failed:**
350        - **`Test template xilinx`**
351
352        ### `Test template xilinx` (ID 22055505284)
353        **Step failed:** `📦 Build yocto image`
354        \
355        **Log:** https://github.com/luftkode/distro-template/actions/runs/8072883145/job/22055505284
356        "#;
357
358        const EXPECTED_MODIFIED: &'static str = r#"**Run ID**:[LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs
359
360        **1 job failed:**
361        - **`Test template xilinx`**
362
363        ### `Test template xilinx` (ID
364        **Step failed:** `📦 Build yocto image`
365        \
366        **Log:** https://github.com/luftkode/distro-template/actions/runsjob        "#;
367
368        let modified = remove_timestamps_and_ids(LOG_TEXT);
369        assert_eq!(
370            modified, EXPECTED_MODIFIED,
371            "Expected: {EXPECTED_MODIFIED}\nGot: {modified}"
372        );
373    }
374}
ci_manager/util.rs

ci_manager/
util.rs