diffutilslib/
utils.rs

1// This file is part of the uutils diffutils package.
2//
3// For the full copyright and license information, please view the LICENSE-*
4// files that was distributed with this source code.
5
6use regex::Regex;
7use std::{ffi::OsString, io::Write};
8use unicode_width::UnicodeWidthStr;
9
10/// Replace tabs by spaces in the input line.
11/// Correctly handle multi-bytes characters.
12/// This assumes that line does not contain any line breaks (if it does, the result is undefined).
13#[must_use]
14pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec<u8> {
15    let tab = b'\t';
16    let ntabs = line.iter().filter(|c| **c == tab).count();
17    if ntabs == 0 {
18        return line.to_vec();
19    }
20    let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1));
21    let mut offset = 0;
22
23    let mut iter = line.split(|c| *c == tab).peekable();
24    while let Some(chunk) = iter.next() {
25        match String::from_utf8(chunk.to_vec()) {
26            Ok(s) => offset += UnicodeWidthStr::width(s.as_str()),
27            Err(_) => offset += chunk.len(),
28        }
29        result.extend_from_slice(chunk);
30        if iter.peek().is_some() {
31            result.resize(result.len() + tabsize - offset % tabsize, b' ');
32            offset = 0;
33        }
34    }
35
36    result
37}
38
39/// Write a single line to an output stream, expanding tabs to space if necessary.
40/// This assumes that line does not contain any line breaks
41/// (if it does and tabs are to be expanded to spaces, the result is undefined).
42pub fn do_write_line(
43    output: &mut Vec<u8>,
44    line: &[u8],
45    expand_tabs: bool,
46    tabsize: usize,
47) -> std::io::Result<()> {
48    if expand_tabs {
49        output.write_all(do_expand_tabs(line, tabsize).as_slice())
50    } else {
51        output.write_all(line)
52    }
53}
54
55/// Retrieves the modification time of the input file specified by file path
56/// If an error occurs, it returns the current system time
57pub fn get_modification_time(file_path: &str) -> String {
58    use chrono::{DateTime, Local};
59    use std::fs;
60    use std::time::SystemTime;
61
62    let modification_time: SystemTime = fs::metadata(file_path)
63        .and_then(|m| m.modified())
64        .unwrap_or(SystemTime::now());
65
66    let modification_time: DateTime<Local> = modification_time.into();
67    let modification_time: String = modification_time
68        .format("%Y-%m-%d %H:%M:%S%.9f %z")
69        .to_string();
70
71    modification_time
72}
73
74pub fn format_failure_to_read_input_file(
75    executable: &OsString,
76    filepath: &OsString,
77    error: &std::io::Error,
78) -> String {
79    // std::io::Error's display trait outputs "{detail} (os error {code})"
80    // but we want only the {detail} (error string) part
81    let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap();
82    format!(
83        "{}: {}: {}",
84        executable.to_string_lossy(),
85        filepath.to_string_lossy(),
86        error_code_re.replace(error.to_string().as_str(), ""),
87    )
88}
89
90pub fn report_failure_to_read_input_file(
91    executable: &OsString,
92    filepath: &OsString,
93    error: &std::io::Error,
94) {
95    eprintln!(
96        "{}",
97        format_failure_to_read_input_file(executable, filepath, error)
98    );
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    mod expand_tabs {
106        use super::*;
107        use pretty_assertions::assert_eq;
108
109        fn assert_tab_expansion(line: &str, tabsize: usize, expected: &str) {
110            assert_eq!(
111                do_expand_tabs(line.as_bytes(), tabsize),
112                expected.as_bytes()
113            );
114        }
115
116        #[test]
117        fn basics() {
118            assert_tab_expansion("foo barr   baz", 8, "foo barr   baz");
119            assert_tab_expansion("foo\tbarr\tbaz", 8, "foo     barr    baz");
120            assert_tab_expansion("foo\tbarr\tbaz", 5, "foo  barr baz");
121            assert_tab_expansion("foo\tbarr\tbaz", 2, "foo barr  baz");
122        }
123
124        #[test]
125        fn multibyte_chars() {
126            assert_tab_expansion("foo\tépée\tbaz", 8, "foo     épée    baz");
127            assert_tab_expansion("foo\t😉\tbaz", 5, "foo  😉   baz");
128
129            // Note: The Woman Scientist emoji (👩‍🔬) is a ZWJ sequence combining
130            // the Woman emoji (👩) and the Microscope emoji (🔬). On supported platforms
131            // it is displayed as a single emoji and has a print size of 2 columns.
132            // Terminal emulators tend to not support this, and display the two emojis
133            // side by side, thus accounting for a print size of 4 columns, but the
134            // unicode_width crate reports a correct size of 2.
135            assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo   👩‍🔬    baz");
136        }
137
138        #[test]
139        fn invalid_utf8() {
140            // [240, 240, 152, 137] is an invalid UTF-8 sequence, so it is handled as 4 bytes
141            assert_eq!(
142                do_expand_tabs(&[240, 240, 152, 137, 9, 102, 111, 111], 8),
143                &[240, 240, 152, 137, 32, 32, 32, 32, 102, 111, 111]
144            );
145        }
146    }
147
148    mod write_line {
149        use super::*;
150        use pretty_assertions::assert_eq;
151
152        fn assert_line_written(line: &str, expand_tabs: bool, tabsize: usize, expected: &str) {
153            let mut output: Vec<u8> = Vec::new();
154            assert!(do_write_line(&mut output, line.as_bytes(), expand_tabs, tabsize).is_ok());
155            assert_eq!(output, expected.as_bytes());
156        }
157
158        #[test]
159        fn basics() {
160            assert_line_written("foo bar baz", false, 8, "foo bar baz");
161            assert_line_written("foo bar\tbaz", false, 8, "foo bar\tbaz");
162            assert_line_written("foo bar\tbaz", true, 8, "foo bar baz");
163        }
164    }
165
166    mod modification_time {
167        use super::*;
168
169        #[test]
170        fn set_time() {
171            use chrono::{DateTime, Local};
172            use std::time::SystemTime;
173            use tempfile::NamedTempFile;
174
175            let temp = NamedTempFile::new().unwrap();
176            // set file modification time equal to current time
177            let current = SystemTime::now();
178            let _ = temp.as_file().set_modified(current);
179
180            // format current time
181            let current: DateTime<Local> = current.into();
182            let current: String = current.format("%Y-%m-%d %H:%M:%S%.9f %z").to_string();
183
184            // verify
185            assert_eq!(
186                current,
187                get_modification_time(&temp.path().to_string_lossy())
188            );
189        }
190
191        #[test]
192        fn invalid_file() {
193            use chrono::{DateTime, Local};
194            use std::time::SystemTime;
195
196            let invalid_file = "target/utils/invalid-file";
197
198            // store current time before calling `get_modification_time`
199            // Because the file is invalid, it will return SystemTime::now()
200            // which will be greater than previously saved time
201            let current_time: DateTime<Local> = SystemTime::now().into();
202            let m_time: DateTime<Local> = get_modification_time(invalid_file).parse().unwrap();
203
204            assert!(m_time > current_time);
205        }
206    }
207}