diffutilslib/
utils.rs

1// This file is part of the uutils diffutils package.
2//
3// For the full copyright and license information, please view the LICENSE-*
4// files that was distributed with this source code.
5
6use std::io::Write;
7
8use unicode_width::UnicodeWidthStr;
9
10/// Replace tabs by spaces in the input line.
11/// Correctly handle multi-bytes characters.
12/// This assumes that line does not contain any line breaks (if it does, the result is undefined).
13#[must_use]
14pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec<u8> {
15    let tab = b'\t';
16    let ntabs = line.iter().filter(|c| **c == tab).count();
17    if ntabs == 0 {
18        return line.to_vec();
19    }
20    let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1));
21    let mut offset = 0;
22
23    let mut iter = line.split(|c| *c == tab).peekable();
24    while let Some(chunk) = iter.next() {
25        match String::from_utf8(chunk.to_vec()) {
26            Ok(s) => offset += UnicodeWidthStr::width(s.as_str()),
27            Err(_) => offset += chunk.len(),
28        }
29        result.extend_from_slice(chunk);
30        if iter.peek().is_some() {
31            result.resize(result.len() + tabsize - offset % tabsize, b' ');
32            offset = 0;
33        }
34    }
35
36    result
37}
38
39/// Write a single line to an output stream, expanding tabs to space if necessary.
40/// This assumes that line does not contain any line breaks
41/// (if it does and tabs are to be expanded to spaces, the result is undefined).
42pub fn do_write_line(
43    output: &mut Vec<u8>,
44    line: &[u8],
45    expand_tabs: bool,
46    tabsize: usize,
47) -> std::io::Result<()> {
48    if expand_tabs {
49        output.write_all(do_expand_tabs(line, tabsize).as_slice())
50    } else {
51        output.write_all(line)
52    }
53}
54
55/// Retrieves the modification time of the input file specified by file path
56/// If an error occurs, it returns the current system time
57pub fn get_modification_time(file_path: &str) -> String {
58    use chrono::{DateTime, Local};
59    use std::fs;
60    use std::time::SystemTime;
61
62    let modification_time: SystemTime = fs::metadata(file_path)
63        .and_then(|m| m.modified())
64        .unwrap_or(SystemTime::now());
65
66    let modification_time: DateTime<Local> = modification_time.into();
67    let modification_time: String = modification_time
68        .format("%Y-%m-%d %H:%M:%S%.9f %z")
69        .to_string();
70
71    modification_time
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    mod expand_tabs {
79        use super::*;
80        use pretty_assertions::assert_eq;
81
82        fn assert_tab_expansion(line: &str, tabsize: usize, expected: &str) {
83            assert_eq!(
84                do_expand_tabs(line.as_bytes(), tabsize),
85                expected.as_bytes()
86            );
87        }
88
89        #[test]
90        fn basics() {
91            assert_tab_expansion("foo barr   baz", 8, "foo barr   baz");
92            assert_tab_expansion("foo\tbarr\tbaz", 8, "foo     barr    baz");
93            assert_tab_expansion("foo\tbarr\tbaz", 5, "foo  barr baz");
94            assert_tab_expansion("foo\tbarr\tbaz", 2, "foo barr  baz");
95        }
96
97        #[test]
98        fn multibyte_chars() {
99            assert_tab_expansion("foo\tépée\tbaz", 8, "foo     épée    baz");
100            assert_tab_expansion("foo\t😉\tbaz", 5, "foo  😉   baz");
101
102            // Note: The Woman Scientist emoji (👩‍🔬) is a ZWJ sequence combining
103            // the Woman emoji (👩) and the Microscope emoji (🔬). On supported platforms
104            // it is displayed as a single emoji and should have a print size of 2 columns,
105            // but terminal emulators tend to not support this, and display the two emojis
106            // side by side, thus accounting for a print size of 4 columns.
107            assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo   👩‍🔬  baz");
108        }
109
110        #[test]
111        fn invalid_utf8() {
112            // [240, 240, 152, 137] is an invalid UTF-8 sequence, so it is handled as 4 bytes
113            assert_eq!(
114                do_expand_tabs(&[240, 240, 152, 137, 9, 102, 111, 111], 8),
115                &[240, 240, 152, 137, 32, 32, 32, 32, 102, 111, 111]
116            );
117        }
118    }
119
120    mod write_line {
121        use super::*;
122        use pretty_assertions::assert_eq;
123
124        fn assert_line_written(line: &str, expand_tabs: bool, tabsize: usize, expected: &str) {
125            let mut output: Vec<u8> = Vec::new();
126            assert!(do_write_line(&mut output, line.as_bytes(), expand_tabs, tabsize).is_ok());
127            assert_eq!(output, expected.as_bytes());
128        }
129
130        #[test]
131        fn basics() {
132            assert_line_written("foo bar baz", false, 8, "foo bar baz");
133            assert_line_written("foo bar\tbaz", false, 8, "foo bar\tbaz");
134            assert_line_written("foo bar\tbaz", true, 8, "foo bar baz");
135        }
136    }
137
138    mod modification_time {
139        use super::*;
140
141        #[test]
142        fn set_time() {
143            use chrono::{DateTime, Local};
144            use std::time::SystemTime;
145            use tempfile::NamedTempFile;
146
147            let temp = NamedTempFile::new().unwrap();
148            // set file modification time equal to current time
149            let current = SystemTime::now();
150            let _ = temp.as_file().set_modified(current);
151
152            // format current time
153            let current: DateTime<Local> = current.into();
154            let current: String = current.format("%Y-%m-%d %H:%M:%S%.9f %z").to_string();
155
156            // verify
157            assert_eq!(
158                current,
159                get_modification_time(&temp.path().to_string_lossy())
160            );
161        }
162
163        #[test]
164        fn invalid_file() {
165            use chrono::{DateTime, Local};
166            use std::time::SystemTime;
167
168            let invalid_file = "target/utils/invalid-file";
169
170            // store current time before calling `get_modification_time`
171            // Because the file is invalid, it will return SystemTime::now()
172            // which will be greater than previously saved time
173            let current_time: DateTime<Local> = SystemTime::now().into();
174            let m_time: DateTime<Local> = get_modification_time(invalid_file).parse().unwrap();
175
176            assert!(m_time > current_time);
177        }
178    }
179}