gh_workflow_parser/
util.rs

1//! Utility functions for parsing and working with GitHub CLI output and other utility functions.
2use std::{error::Error, path::PathBuf, process::Command};
3
4use crate::gh::gh_cli;
5use bzip2::Compression;
6use once_cell::sync::Lazy;
7use regex::Regex;
8use std::io::prelude::*;
9
10/// Parse a path from a string
11/// # Example
12/// ```
13/// # use gh_workflow_parser::util::first_path_from_str;
14/// use std::path::PathBuf;
15///
16/// let haystack = r#"multi line
17/// test string with/path/file.txt is
18/// valid"#;
19/// let path = first_path_from_str(haystack).unwrap();
20/// assert_eq!(path, PathBuf::from("with/path/file.txt"));
21///
22/// // No path in string is an error
23/// let haystack = "Random string with no path";
24/// assert!(first_path_from_str(haystack).is_err());
25///
26/// // Path with no leading '/' and no file extension is OK
27/// let haystack = "foo app/3-_2/t/3 bar";
28/// let path = first_path_from_str(haystack).unwrap();
29/// assert_eq!(path, PathBuf::from("app/3-_2/t/3"));
30///
31/// // More realistic example
32/// let haystack = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
33/// let path = first_path_from_str(haystack).unwrap();
34/// assert_eq!(
35///   path,
36///  PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
37/// );
38/// ```
39/// # Errors
40/// This function returns an error if no valid path is found in the string
41pub fn first_path_from_str(s: &str) -> Result<PathBuf, Box<dyn std::error::Error>> {
42    static RE: Lazy<Regex> =
43        Lazy::new(|| Regex::new(r"[a-zA-Z0-9-_.\/]+\/[a-zA-Z0-9-_.]+").unwrap());
44
45    let path_str = RE.find(s).ok_or("No path found in string")?.as_str();
46    Ok(PathBuf::from(path_str))
47}
48
49/// Take the lines with failed jobs from the output of `gh run view`
50pub fn take_lines_with_failed_jobs(output: String) -> Vec<String> {
51    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"X.*ID [0-9]*\)").unwrap());
52
53    RE.find_iter(&output)
54        .map(|m| m.as_str().to_owned())
55        .collect()
56}
57
58/// Extract the job IDs from the lines with job information
59pub fn id_from_job_lines(lines: &[String]) -> Vec<String> {
60    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"ID (?<JOB_ID>[0-9]*)").unwrap());
61
62    lines
63        .iter()
64        .map(|line| {
65            RE.captures(line)
66                .unwrap_or_else(|| {
67                    panic!("Expected a line with a Job ID, but no ID found in line: {line}")
68                })
69                .name("JOB_ID")
70                .expect("Expected a Job ID")
71                .as_str()
72                .to_owned()
73        })
74        .collect()
75}
76
77/// Parse text for timestamps and IDs and remove them, returning the modified text without making a copy.
78///
79/// Some compromises are made to be able to remove timestamps in between other symbols e.g. '/83421321/'.
80/// but still avoid removing commit SHAs. That means that these symbols are also removed (any non-letter character
81/// preceding and following an ID).
82///
83/// # Example
84/// ```
85/// # use gh_workflow_parser::util::remove_timestamps;
86/// # use pretty_assertions::assert_eq;
87/// let test_str = r"ID 21442749267 ";
88/// let modified = remove_timestamps(test_str);
89/// assert_eq!(modified, "ID"); // Note that the space is removed
90///
91///
92/// let test_str = r#"ID 21442749267
93/// date: 2024-02-28 00:03:46
94/// other text"#;
95/// let modified = remove_timestamps(test_str);
96/// assert_eq!(modified, "IDdate: \nother text");
97/// ```
98pub fn remove_timestamps(text: &str) -> std::borrow::Cow<str> {
99    static RE: Lazy<Regex> = Lazy::new(|| {
100        Regex::new(
101            r"(?x)
102            # Timestamps like YYYY-MM-DD HH:MM:SS
103            ([0-9]{4}-[0-9]{2}-[0-9]{2}\x20[0-9]{2}:[0-9]{2}:[0-9]{2})
104            |
105            # IDs like 21442749267 but only if they are preceded and followed by non-letter characters
106            (?:[^[a-zA-Z]])([0-9]{10,11})(?:[^[a-zA-Z]])
107        ",
108        )
109        .unwrap()
110    });
111
112    RE.replace_all(text, "")
113}
114
115/// Parse an absolute path from a string. This assumes that the the first '/' found in the string is the start
116/// of the path.
117/// # Example
118/// ```
119/// # use gh_workflow_parser::util::first_abs_path_from_str;
120/// use std::path::PathBuf;
121///
122/// let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
123/// let path = first_abs_path_from_str(test_str).unwrap();
124/// assert_eq!(
125///    path,
126///   PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
127/// );
128/// ```
129///
130/// # Errors
131/// This function returns an error if no '/' is found in the string or
132/// if the path is not a valid path.
133pub fn first_abs_path_from_str(s: &str) -> Result<PathBuf, Box<dyn Error>> {
134    let start = s.find('/').unwrap_or_else(|| {
135        panic!("Expected a path in the string, but no '/' found in string: {s}")
136    });
137    let path = PathBuf::from(&s[start..]);
138    Ok(path)
139}
140
141/// Retrieve the GitHub CLI version from the GitHub CLI binary and check that it meets version requirements.
142pub fn check_gh_cli_version(min_required: semver::Version) -> Result<(), Box<dyn Error>> {
143    let gh_cli_version = Command::new(gh_cli()).arg("--version").output()?;
144    let version_str = String::from_utf8(gh_cli_version.stdout)?;
145    check_gh_cli_version_str(min_required, &version_str)
146}
147
148/// Check that the GitHub CLI version meets version requirements from the string output of `gh --version`
149///
150/// # Example
151/// ```
152/// # use gh_workflow_parser::util::check_gh_cli_version_str;
153/// let version_str = "gh version 2.43.1 (2024-01-31)";
154/// let min_required = semver::Version::new(2, 43, 1);
155/// let version = check_gh_cli_version_str(min_required, version_str);
156/// assert!(version.is_ok());
157/// ```
158///
159/// # Errors
160/// Returns an error if the version string cannot be parsed as a semver version or
161/// if the version is less than the minimum required version.
162pub fn check_gh_cli_version_str(
163    min_required: semver::Version,
164    version_str: &str,
165) -> Result<(), Box<dyn Error>> {
166    static GH_CLI_VER_RE: Lazy<Regex> =
167        Lazy::new(|| Regex::new(r"gh version (?P<version>[0-9]+\.[0-9]+\.[0-9]+)").unwrap());
168
169    let version = GH_CLI_VER_RE
170        .captures(version_str)
171        .unwrap()
172        .name("version")
173        .unwrap()
174        .as_str();
175
176    let version = semver::Version::parse(version)?;
177    if version < min_required {
178        return Err(format!("GitHub CLI version {version} is not supported. Please install version {min_required} or higher")
179        .into());
180    }
181    Ok(())
182}
183
184/// Set the file permissions for a file on Linux
185#[cfg(target_os = "linux")]
186pub fn set_linux_file_permissions(file: &std::path::Path, mode: u32) -> Result<(), Box<dyn Error>> {
187    let metadata = std::fs::metadata(file).unwrap();
188    let mut perms = metadata.permissions();
189    std::os::unix::fs::PermissionsExt::set_mode(&mut perms, mode);
190    std::fs::set_permissions(file, perms).unwrap();
191    Ok(())
192}
193
194pub fn bzip2_decompress(input: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
195    let mut d = bzip2::bufread::BzDecoder::new(input);
196    let mut out = Vec::new();
197    d.read_to_end(&mut out)?;
198    Ok(out)
199}
200
201pub fn bzip2_compress(input: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
202    let mut e = bzip2::bufread::BzEncoder::new(input, Compression::new(9));
203    let mut out = Vec::new();
204    e.read_to_end(&mut out)?;
205    Ok(out)
206}
207
208/// Canonicalize a repository URL to the form `https://{host}/{repo}`
209///
210/// # Arguments
211/// * `repo` - The repository URL e.g. `user1/user1-repo`
212/// * `host` - The host for the repository e.g. `github.com`
213///
214/// # Example
215/// ```
216/// # use gh_workflow_parser::util::canonicalize_repo_url;
217/// let repo = "bob/bobbys-repo";
218/// let canonicalized = canonicalize_repo_url(repo, "github.com");
219/// assert_eq!(canonicalized, "https://github.com/bob/bobbys-repo");
220///
221/// // If the host is already in the URL, only the protocol is added
222/// let repo = "github.com/lisa/lisas-repo";
223/// let canonicalized = canonicalize_repo_url(repo, "github.com");
224/// assert_eq!(canonicalized, "https://github.com/lisa/lisas-repo");
225///
226/// // If the URL is already in the canonical form, it is returned as is
227/// let repo = "https://gitlab.com/foo-org/foo-repo";
228/// let canonicalized = canonicalize_repo_url(repo, "gitlab.com");
229/// assert_eq!(canonicalized, repo);
230/// ```
231pub fn canonicalize_repo_url(repo: &str, host: &str) -> String {
232    let canonical_prefix: String = format!("https://{host}/");
233    if repo.starts_with("https://") {
234        if repo.starts_with(&canonical_prefix) {
235            repo.to_string()
236        } else {
237            repo.replace("https://", &canonical_prefix)
238        }
239    } else if repo.starts_with(&format!("{host}/")) {
240        repo.replace(&format!("{host}/"), &canonical_prefix)
241    } else {
242        format!("{canonical_prefix}{repo}")
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249    use crate::config::GH_CLI_MIN_VERSION;
250    use pretty_assertions::assert_eq;
251    use temp_dir::TempDir;
252
253    // Output from `gh run --repo=github.com/luftkode/distro-template view 7858139663`
254    const TEST_OUTPUT_VIEW_RUN: &str = r#"
255    X master Use template and build image · 7858139663
256    Triggered via schedule about 10 hours ago
257
258    JOBS
259    ✓ enable-ssh-agent in 5s (ID 21442747661)
260    ✓ Test template raspberry in 19m20s (ID 21442749166)
261    X Test template xilinx in 5m41s (ID 21442749267)
262      ✓ Set up job
263      ✓ Log in to the Container registry
264      ✓ Cleanup build folder before start
265      ✓ Run actions/checkout@v4
266      ✓ Setup Rust and Just
267      ✓ 🗻 Make a templated project
268      ✓ ⚙️ Run new project setup steps
269      ✓ ⚒️ Build docker image
270      X 📦 Build yocto image
271      - 📩 Deploy image artifacts
272      ✓ Docker down
273      ✓ Cleanup build folder after done
274      ✓ Create issue on failure
275      ✓ Post Run actions/checkout@v4
276      ✓ Post Log in to the Container registry
277      ✓ Complete job
278
279    ANNOTATIONS
280    X Process completed with exit code 2.
281    Test template xilinx: .github#3839
282
283
284    To see what failed, try: gh run view 7858139663 --log-failed
285    View this run on GitHub: https://github.com/luftkode/distro-template/actions/runs/7858139663
286"#;
287
288    #[test]
289    fn test_take_lines_with_failed_jobs() {
290        let failed_jobs = take_lines_with_failed_jobs(TEST_OUTPUT_VIEW_RUN.to_string());
291        assert_eq!(failed_jobs.len(), 1, "Failed jobs: {:?}", failed_jobs);
292        assert_eq!(
293            failed_jobs[0],
294            "X Test template xilinx in 5m41s (ID 21442749267)"
295        );
296    }
297
298    #[test]
299    fn test_id_from_job_lines() {
300        let job_lines = vec![
301            "✓ Test template raspberry in 19m20s (ID 21442749166)".to_string(),
302            "X Test template xilinx in 5m41s (ID 21442749267)".to_string(),
303            "X Test template other in 5m1s (ID 01449267)".to_string(),
304        ];
305        let ids = id_from_job_lines(&job_lines);
306        assert_eq!(ids.len(), 3, "Job IDs: {:?}", ids);
307        assert_eq!(ids[0], "21442749166");
308        assert_eq!(ids[1], "21442749267");
309        assert_eq!(ids[2], "01449267");
310    }
311
312    #[test]
313    fn test_absolute_path_from_str() {
314        let test_str = r#" ERROR: Logfile of failure stored in: /app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616"#;
315        let path = first_abs_path_from_str(test_str).unwrap();
316        assert_eq!(
317            path,
318            PathBuf::from("/app/yocto/build/tmp/work/x86_64-linux/sqlite3-native/3.43.2/temp/log.do_fetch.21616")
319        );
320    }
321
322    const GH_CLI_VERSION_OK_STR: &str = r#"gh version 2.43.1 (2024-01-31)
323https://github.com/cli/cli/releases/tag/v2.43.1"#;
324    const GH_CLI_VERSION_BAD_STR: &str = r#"gh version 2.4.0 (2021-11-21)
325https://github.com/cli/cli/releases/tag/v2.4.0"#;
326
327    #[test]
328    fn test_check_gh_cli_version_is_ok() {
329        let version = check_gh_cli_version_str(GH_CLI_MIN_VERSION, GH_CLI_VERSION_OK_STR);
330        assert!(version.is_ok());
331    }
332
333    #[test]
334    fn test_check_gh_cli_version_bad() {
335        let version = check_gh_cli_version_str(GH_CLI_MIN_VERSION, GH_CLI_VERSION_BAD_STR);
336        assert!(version.is_err());
337    }
338
339    const GH_CLI_PATH: &str = "gh_cli/gh";
340
341    #[test]
342    pub fn test_compress_gh_cli_bz2() {
343        /// Max upload size for crates.io is 10 MiB
344        const MAX_CRATES_IO_UPLOAD_SIZE: usize = 1024 * 1024 * 10;
345        let gh_cli_bytes = std::fs::read(GH_CLI_PATH).unwrap();
346        let compressed = bzip2_compress(&gh_cli_bytes).unwrap();
347        assert!(compressed.len() < gh_cli_bytes.len());
348        assert!(compressed.len() < MAX_CRATES_IO_UPLOAD_SIZE); // Compressed size should be less than half the original size
349    }
350
351    #[test]
352    pub fn test_decompress_gh_cli_bz2() {
353        let gh_cli_bytes = std::fs::read(GH_CLI_PATH).unwrap();
354        let compressed = bzip2_compress(&gh_cli_bytes).unwrap();
355        let decompressed = bzip2_decompress(&compressed).unwrap();
356        assert_eq!(gh_cli_bytes, decompressed);
357    }
358
359    #[test]
360    pub fn test_compress_decompress_is_executable() {
361        let gh_cli_bytes = std::fs::read(GH_CLI_PATH).unwrap();
362        let compressed = bzip2_compress(&gh_cli_bytes).unwrap();
363        let decompressed = bzip2_decompress(&compressed).unwrap();
364        let temp_dir = TempDir::new().unwrap();
365        let file = temp_dir.path().join("gh_cli");
366        std::fs::write(&file, decompressed).unwrap();
367        if cfg!(target_os = "linux") {
368            set_linux_file_permissions(&file, 0o755).unwrap();
369        }
370        let output = std::process::Command::new(&file)
371            .arg("--version")
372            .output()
373            .unwrap();
374        assert!(output.status.success());
375        println!("Output: {}", String::from_utf8_lossy(&output.stdout));
376    }
377
378    #[test]
379    pub fn test_canonicalize_repo_url() {
380        let repo = "luftkode/distro-template";
381        let canonicalized = canonicalize_repo_url(repo, "github.com");
382        assert_eq!(canonicalized, "https://github.com/luftkode/distro-template");
383    }
384
385    #[test]
386    pub fn test_remove_timestamps() {
387        let test_str = "ID 8072883145 ";
388        let modified = remove_timestamps(test_str);
389        assert_eq!(modified, "ID");
390    }
391
392    #[test]
393    pub fn test_remove_timestamps_log_text() {
394        const LOG_TEXT: &'static str = r#"**Run ID**: 8072883145 [LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs/8072883145)
395
396        **1 job failed:**
397        - **`Test template xilinx`**
398
399        ### `Test template xilinx` (ID 22055505284)
400        **Step failed:** `📦 Build yocto image`
401        \
402        **Log:** https://github.com/luftkode/distro-template/actions/runs/8072883145/job/22055505284
403        "#;
404
405        const EXPECTED_MODIFIED: &'static str = r#"**Run ID**:[LINK TO RUN](https://github.com/luftkode/distro-template/actions/runs
406
407        **1 job failed:**
408        - **`Test template xilinx`**
409
410        ### `Test template xilinx` (ID
411        **Step failed:** `📦 Build yocto image`
412        \
413        **Log:** https://github.com/luftkode/distro-template/actions/runsjob        "#;
414
415        let modified = remove_timestamps(LOG_TEXT);
416        assert_eq!(
417            modified, EXPECTED_MODIFIED,
418            "Expected: {EXPECTED_MODIFIED}\nGot: {modified}"
419        );
420    }
421}