1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3
4pub struct LsOutput {
6 pub files: Vec<LsOutputFile>,
8 pub folders: Vec<String>,
10}
11
12pub struct LsOutputFile {
14 pub name: String,
16 pub size_bytes: i64,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct Error {
23 pub kind: ErrorKind,
25 pub line: String,
27}
28
29#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum ErrorKind {
32 MissingFileMode,
34 MissingLinkCount,
36 MissingOwner,
38 MissingGroup,
40 MissingSize,
42 InvalidSize {
44 token: String,
46 },
47 MissingMonth,
49 MissingDay,
51 MissingTimestamp,
53 MissingName,
55 EmptyQuotedName,
57 InvalidEscapeSequence,
59}
60
61impl std::str::FromStr for LsOutput {
62 type Err = Error;
63
64 fn from_str(s: &str) -> Result<Self, Self::Err> {
65 let mut files = Vec::new();
66 let mut folders = Vec::new();
67 let input = s
68 .strip_prefix("\\\r\n")
69 .or_else(|| s.strip_prefix("\\\n"))
70 .unwrap_or(s);
71
72 for raw_line in input.lines() {
73 let line = raw_line.trim();
74
75 let parsed = parse_line(line).map_err(|kind| Error::new(kind, line.to_string()))?;
76
77 if let Some(parsed) = parsed {
78 match parsed {
79 ParsedLine::File(file) => files.push(file),
80 ParsedLine::Folder(folder) => folders.push(folder),
81 }
82 }
83 }
84
85 files.sort_by(|a, b| a.name.cmp(&b.name));
86 folders.sort();
87
88 Ok(Self { files, folders })
89 }
90}
91
92fn unescape_double_quoted(input: &str) -> Result<String, ErrorKind> {
93 let mut result = String::with_capacity(input.len());
94 let mut chars = input.chars();
95
96 while let Some(ch) = chars.next() {
97 if ch == '\\' {
98 let escaped = chars.next().ok_or(ErrorKind::InvalidEscapeSequence)?;
99 result.push(match escaped {
100 'n' => '\n',
101 'r' => '\r',
102 't' => '\t',
103 other => other,
104 });
105 } else {
106 result.push(ch);
107 }
108 }
109
110 Ok(result)
111}
112
113fn parse_name(raw: &str) -> Result<String, ErrorKind> {
114 if raw.is_empty() {
115 return Err(ErrorKind::MissingName);
116 }
117
118 if raw.len() >= 2 {
119 let bytes = raw.as_bytes();
120 if bytes[0] == b'"' && bytes[raw.len() - 1] == b'"' {
121 let value = unescape_double_quoted(&raw[1..raw.len() - 1])?;
122 if value.is_empty() {
123 return Err(ErrorKind::EmptyQuotedName);
124 }
125 return Ok(value);
126 }
127
128 if bytes[0] == b'\'' && bytes[raw.len() - 1] == b'\'' {
129 let value = &raw[1..raw.len() - 1];
130 if value.is_empty() {
131 return Err(ErrorKind::EmptyQuotedName);
132 }
133 return Ok(value.to_string());
134 }
135 }
136
137 Ok(raw.to_string())
138}
139
140enum ParsedLine {
141 File(LsOutputFile),
142 Folder(String),
143}
144
145fn parse_line(line: &str) -> Result<Option<ParsedLine>, ErrorKind> {
146 if line.is_empty() || line.starts_with("total ") {
147 return Ok(None);
148 }
149
150 let mut parts = line.split_whitespace();
151 let file_mode = parts.next().ok_or(ErrorKind::MissingFileMode)?;
152 if file_mode.len() == 10 {
153 match file_mode.as_bytes()[0] {
154 b'l' => return Ok(None), b'b' => return Ok(None), b'c' => return Ok(None), _ => {}
158 }
159 }
160
161 parts.next().ok_or(ErrorKind::MissingLinkCount)?;
163 parts.next().ok_or(ErrorKind::MissingOwner)?;
164 parts.next().ok_or(ErrorKind::MissingGroup)?;
165
166 let size_token = parts.next().ok_or(ErrorKind::MissingSize)?;
167 let size: i64 = size_token.parse().map_err(|_| ErrorKind::InvalidSize {
168 token: size_token.to_string(),
169 })?;
170
171 parts.next().ok_or(ErrorKind::MissingMonth)?;
173 parts.next().ok_or(ErrorKind::MissingDay)?;
174 parts.next().ok_or(ErrorKind::MissingTimestamp)?;
175
176 let mut raw_name = parts.collect::<Vec<_>>().join(" ");
177 if raw_name.is_empty() {
178 return Err(ErrorKind::MissingName);
179 }
180
181 let is_directory = raw_name.ends_with('/');
182 if is_directory {
183 while raw_name.ends_with('/') {
184 raw_name.pop();
185 }
186 }
187
188 let name = parse_name(&raw_name)?;
189
190 if name == "." || name == ".." {
191 return Ok(None);
192 }
193
194 if is_directory {
195 if name.is_empty() {
196 return Ok(None);
197 }
198
199 Ok(Some(ParsedLine::Folder(name)))
200 } else {
201 Ok(Some(ParsedLine::File(LsOutputFile {
202 name,
203 size_bytes: size,
204 })))
205 }
206}
207
208impl Error {
209 fn new(kind: ErrorKind, line: String) -> Self {
210 Self { kind, line }
211 }
212}
213
214impl std::fmt::Display for ErrorKind {
215 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216 match self {
217 Self::MissingFileMode => write!(f, "missing file mode field"),
218 Self::MissingLinkCount => write!(f, "missing link count field"),
219 Self::MissingOwner => write!(f, "missing owner field"),
220 Self::MissingGroup => write!(f, "missing group field"),
221 Self::MissingSize => write!(f, "missing size field"),
222 Self::InvalidSize { token } => write!(f, "invalid size value `{token}`"),
223 Self::MissingMonth => write!(f, "missing timestamp month field"),
224 Self::MissingDay => write!(f, "missing timestamp day field"),
225 Self::MissingTimestamp => write!(f, "missing timestamp time or year field"),
226 Self::MissingName => write!(f, "missing file name"),
227 Self::EmptyQuotedName => write!(f, "empty quoted file name"),
228 Self::InvalidEscapeSequence => write!(f, "unterminated escape sequence in file name"),
229 }
230 }
231}
232
233impl std::fmt::Display for Error {
234 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235 write!(f, "{} in line `{}`", self.kind, self.line)
236 }
237}
238
239impl std::error::Error for Error {}
240
241#[cfg(test)]
242mod tests {
243 use std::str::FromStr;
244
245 use super::*;
246
247 const EDGE_CASE_FILE_ENTRIES: [(&str, i64); 42] = [
248 (r" -space-dash-", 13),
249 (r" multiple consecutive spaces ", 34),
250 (r"!exclamation!mark!", 18),
251 (r#""double"quote""#, 14),
252 (r"#hash#tag#", 10),
253 (r"$dollar$sign$", 13),
254 (r"%percent%value%", 15),
255 (r"&ersand&symbol&", 18),
256 (r"'$'\n''newline'$'\n''line", 17),
257 (r"'$'\r''return'$'\r''carriage'$'\r", 20),
258 (r"'$'\t''tab'$'\t''indent'$'\t", 15),
259 (r"'single'quote'", 14),
260 (r"(paren(open(", 12),
261 (r")paren)close)", 13),
262 (r"*asterisk*star*", 15),
263 (r"+plus+sign+", 11),
264 (r",comma,list,", 12),
265 (r"---dash---triple---", 19),
266 (r"-hyphen-entry-", 14),
267 (r"..double..dot..", 15),
268 (r".hidden. with spaces.", 21),
269 (r":colon:case:", 12),
270 (r";semicolon;case;", 16),
271 (r"<less<than<", 11),
272 (r"=equals=case=", 13),
273 (r">greater>than>", 14),
274 (r"?question?mark?", 15),
275 (r"@at@symbol@", 11),
276 (r"[bracket[left[", 14),
277 (r"\backslash\path\", 19),
278 (r"\x20space\x20pad\x20", 20),
279 (r"]bracket]right]", 15),
280 (r"^caret^symbol^", 14),
281 (r"_underscore_label_", 18),
282 (r"`backtick`quote`", 16),
283 (r"{brace{left{", 12),
284 (r"|pipe|vertical|", 15),
285 (r"}brace}right}", 13),
286 (r"~tilde~wave~", 12),
287 (r"файл", 8),
288 (r"文件", 6),
289 (r"🚀rocket🚀ship🚀", 22),
290 ];
291
292 #[test]
293 fn folders() {
294 let input = "\
295total 16
296drwxr-xr-x 5 user user 4096 Jan 1 12:00 ./
297drwxr-xr-x 2 user user 4096 Jan 1 12:01 ../
298drwxr-xr-x 4 user user 4096 Jan 1 12:02 zeta/
299drwxr-xr-x 4 user user 4096 Jan 1 12:02 alpha/
300";
301
302 let output = LsOutput::from_str(input).unwrap();
303
304 assert_eq!(output.folders.len(), 2);
305 assert_eq!(output.files.len(), 0);
306 assert_eq!(output.folders, vec!["alpha", "zeta"]);
307 }
308
309 #[test]
310 fn files() {
311 let input = "\
312total 12
313drwxr-xr-x 5 root root 4096 Jan 1 00:00 ./
314drwxr-xr-x 5 root root 4096 Jan 1 00:00 ../
315-rw-r--r-- 1 root root 16 Jan 1 00:01 arrow -> name
316-rw-r--r-- 1 root root 16 Jan 1 00:01 notes.txt
317-rw-r--r-- 1 root root 8 Jan 1 00:02 .hidden
318";
319
320 let output = LsOutput::from_str(input).unwrap();
321
322 assert_eq!(output.folders.len(), 0);
323 assert_eq!(output.files.len(), 3);
324 let files: Vec<(&str, i64)> = output
325 .files
326 .iter()
327 .map(|f| (f.name.as_str(), f.size_bytes))
328 .collect();
329 assert_eq!(
330 files,
331 vec![(".hidden", 8), ("arrow -> name", 16), ("notes.txt", 16)]
332 );
333 }
334
335 #[test]
336 fn ignores_symlinks() {
337 let input = "\
338lrwxrwxrwx 1 user user 6 Jan 1 12:04 link -> target
339";
340
341 let output: LsOutput = input.parse().unwrap();
342 assert_eq!(output.folders.len(), 0);
343 assert_eq!(output.files.len(), 0);
344 }
345
346 #[test]
347 fn ignores_device_files() {
348 let input = "\
349brw-rw---- 1 root disk 8, 0 Jan 1 12:00 sda
350crw-rw---- 1 root disk 8, 1 Jan 1 12:00 sda1
351";
352
353 let output: LsOutput = input.parse().unwrap();
354 assert_eq!(output.folders.len(), 0);
355 assert_eq!(output.files.len(), 0);
356 }
357
358 #[test]
359 fn unicode_names() {
360 let input = "\
361drwxrwxr-x 2 imbolc imbolc 4096 Oct 14 10:43 пора/
362-rw-rw-r-- 1 imbolc imbolc 0 Oct 14 10:43 спать
363";
364
365 let output: LsOutput = input.parse().unwrap();
366 assert_eq!(output.folders.len(), 1);
367 assert_eq!(output.folders[0], "пора");
368 assert_eq!(output.files.len(), 1);
369 assert_eq!(output.files[0].name, "спать");
370 }
371
372 #[test]
373 fn spaces() {
374 let input = r#"\
375drwxrwxr-x 2 imbolc imbolc 4096 Oct 14 10:49 "let's play"/
376-rw-rw-r-- 1 imbolc imbolc 0 Oct 14 10:50 'давай играть'
377"#;
378
379 let output: LsOutput = input.parse().unwrap();
380 assert_eq!(output.folders.len(), 1);
381 assert_eq!(output.folders[0], "let's play");
382 assert_eq!(output.files.len(), 1);
383 assert_eq!(output.files[0].name, "давай играть");
384 }
385
386 #[test]
387 fn error_includes_offending_line() {
388 let err = match "broken line".parse::<LsOutput>() {
389 Err(err) => err,
390 Ok(_) => panic!("expected error"),
391 };
392 assert!(err.to_string().contains("broken line"));
393 assert_eq!(err.line, "broken line");
394 }
395
396 #[test]
397 fn rejects_malformed_line() {
398 assert!("broken line".parse::<LsOutput>().is_err());
399 }
400
401 #[test]
403 fn edge_case_files() {
404 let input = r#"\
405total 176
406drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ./
407drwxrwxr-x 4 imbolc imbolc 4096 Oct 15 12:05 ../
408-rw-rw-r-- 1 imbolc imbolc 13 Oct 15 12:05 '$dollar$sign$'
409-rw-rw-r-- 1 imbolc imbolc 18 Oct 15 12:05 '&ersand&symbol&'
410-rw-rw-r-- 1 imbolc imbolc 15 Oct 15 12:05 '*asterisk*star*'
411-rw-rw-r-- 1 imbolc imbolc 11 Oct 15 12:05 @at@symbol@
412-rw-rw-r-- 1 imbolc imbolc 19 Oct 15 12:05 '\backslash\path\'
413-rw-rw-r-- 1 imbolc imbolc 16 Oct 15 12:05 '`backtick`quote`'
414-rw-rw-r-- 1 imbolc imbolc 12 Oct 15 12:05 {brace{left{
415-rw-rw-r-- 1 imbolc imbolc 13 Oct 15 12:05 }brace}right}
416-rw-rw-r-- 1 imbolc imbolc 14 Oct 15 12:05 '[bracket[left['
417-rw-rw-r-- 1 imbolc imbolc 15 Oct 15 12:05 ]bracket]right]
418-rw-rw-r-- 1 imbolc imbolc 14 Oct 15 12:05 '^caret^symbol^'
419-rw-rw-r-- 1 imbolc imbolc 12 Oct 15 12:05 :colon:case:
420-rw-rw-r-- 1 imbolc imbolc 12 Oct 15 12:05 ,comma,list,
421-rw-rw-r-- 1 imbolc imbolc 19 Oct 15 12:05 ---dash---triple---
422-rw-rw-r-- 1 imbolc imbolc 15 Oct 15 12:05 ..double..dot..
423-rw-rw-r-- 1 imbolc imbolc 14 Oct 15 12:05 '"double"quote"'
424-rw-rw-r-- 1 imbolc imbolc 13 Oct 15 12:05 '=equals=case='
425-rw-rw-r-- 1 imbolc imbolc 18 Oct 15 12:05 '!exclamation!mark!'
426-rw-rw-r-- 1 imbolc imbolc 14 Oct 15 12:05 '>greater>than>'
427-rw-rw-r-- 1 imbolc imbolc 10 Oct 15 12:05 '#hash#tag#'
428-rw-rw-r-- 1 imbolc imbolc 21 Oct 15 12:05 '.hidden. with spaces.'
429-rw-rw-r-- 1 imbolc imbolc 14 Oct 15 12:05 -hyphen-entry-
430-rw-rw-r-- 1 imbolc imbolc 11 Oct 15 12:05 '<less<than<'
431-rw-rw-r-- 1 imbolc imbolc 34 Oct 15 12:05 ' multiple consecutive spaces '
432-rw-rw-r-- 1 imbolc imbolc 17 Oct 15 12:05 ''$'\n''newline'$'\n''line'
433-rw-rw-r-- 1 imbolc imbolc 13 Oct 15 12:05 ')paren)close)'
434-rw-rw-r-- 1 imbolc imbolc 12 Oct 15 12:05 '(paren(open('
435-rw-rw-r-- 1 imbolc imbolc 15 Oct 15 12:05 %percent%value%
436-rw-rw-r-- 1 imbolc imbolc 15 Oct 15 12:05 '|pipe|vertical|'
437-rw-rw-r-- 1 imbolc imbolc 11 Oct 15 12:05 +plus+sign+
438-rw-rw-r-- 1 imbolc imbolc 15 Oct 15 12:05 '?question?mark?'
439-rw-rw-r-- 1 imbolc imbolc 20 Oct 15 12:05 ''$'\r''return'$'\r''carriage'$'\r'
440-rw-rw-r-- 1 imbolc imbolc 22 Oct 15 12:05 🚀rocket🚀ship🚀
441-rw-rw-r-- 1 imbolc imbolc 16 Oct 15 12:05 ';semicolon;case;'
442-rw-rw-r-- 1 imbolc imbolc 14 Oct 15 12:05 "'single'quote'"
443-rw-rw-r-- 1 imbolc imbolc 13 Oct 15 12:05 ' -space-dash-'
444-rw-rw-r-- 1 imbolc imbolc 15 Oct 15 12:05 ''$'\t''tab'$'\t''indent'$'\t'
445-rw-rw-r-- 1 imbolc imbolc 12 Oct 15 12:05 '~tilde~wave~'
446-rw-rw-r-- 1 imbolc imbolc 18 Oct 15 12:05 _underscore_label_
447-rw-rw-r-- 1 imbolc imbolc 20 Oct 15 12:05 '\x20space\x20pad\x20'
448-rw-rw-r-- 1 imbolc imbolc 8 Oct 15 12:05 файл
449-rw-rw-r-- 1 imbolc imbolc 6 Oct 15 12:05 文件
450"#;
451
452 let output: LsOutput = input.parse().unwrap();
453 assert!(output.folders.is_empty());
454 let parsed_files: Vec<(&str, i64)> = output
455 .files
456 .iter()
457 .map(|file| (file.name.as_str(), file.size_bytes))
458 .collect();
459 assert_eq!(parsed_files, EDGE_CASE_FILE_ENTRIES);
460 }
461
462 #[test]
464 fn edge_case_folders() {
465 let input = r#"\
466total 176
467drwxrwxr-x 44 imbolc imbolc 4096 Oct 15 12:05 ./
468drwxrwxr-x 4 imbolc imbolc 4096 Oct 15 12:05 ../
469drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '$dollar$sign$'/
470drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '&ersand&symbol&'/
471drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '*asterisk*star*'/
472drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 @at@symbol@/
473drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '\backslash\path\'/
474drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '`backtick`quote`'/
475drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 {brace{left{/
476drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 }brace}right}/
477drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '[bracket[left['/
478drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ]bracket]right]/
479drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '^caret^symbol^'/
480drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 :colon:case:/
481drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ,comma,list,/
482drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ---dash---triple---/
483drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ..double..dot../
484drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '"double"quote"'/
485drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '=equals=case='/
486drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '!exclamation!mark!'/
487drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '>greater>than>'/
488drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '#hash#tag#'/
489drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '.hidden. with spaces.'/
490drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 -hyphen-entry-/
491drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '<less<than<'/
492drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ' multiple consecutive spaces '/
493drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ''$'\n''newline'$'\n''line'/
494drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ')paren)close)'/
495drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '(paren(open('/
496drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 %percent%value%/
497drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '|pipe|vertical|'/
498drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 +plus+sign+/
499drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '?question?mark?'/
500drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ''$'\r''return'$'\r''carriage'$'\r'/
501drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 🚀rocket🚀ship🚀/
502drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ';semicolon;case;'/
503drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 "'single'quote'"/
504drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ' -space-dash-'/
505drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 ''$'\t''tab'$'\t''indent'$'\t'/
506drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '~tilde~wave~'/
507drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 _underscore_label_/
508drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 '\x20space\x20pad\x20'/
509drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 файл/
510drwxrwxr-x 2 imbolc imbolc 4096 Oct 15 12:05 文件/
511"#;
512
513 let output: LsOutput = input.parse().unwrap();
514 assert!(output.files.is_empty());
515 let parsed_folders: Vec<&str> = output.folders.iter().map(String::as_str).collect();
516 let expected_folders: Vec<&str> = EDGE_CASE_FILE_ENTRIES
517 .iter()
518 .map(|(name, _)| *name)
519 .collect();
520 assert_eq!(parsed_folders, expected_folders);
521 }
522}