1pub fn line_aligned_chunks(data: &[u8], max_chunks: usize) -> Vec<(usize, usize)> {
9 let len = data.len();
10 if len == 0 {
11 return vec![];
12 }
13 let k = max_chunks.max(1).min(len);
14 let mut splits: Vec<usize> = (0..=k).map(|i| i * len / k).collect();
15 for split in splits.iter_mut().take(k).skip(1) {
16 let mut p = *split;
17 while p < len && p > 0 && data[p - 1] != b'\n' {
18 p += 1;
19 }
20 *split = p;
21 }
22 for i in 1..=k {
23 if splits[i] < splits[i - 1] {
24 splits[i] = splits[i - 1];
25 }
26 }
27 let mut out = Vec::new();
28 for i in 0..k {
29 let s = splits[i];
30 let e = splits[i + 1];
31 if s < e {
32 out.push((s, e));
33 }
34 }
35 if out.is_empty() {
36 out.push((0, len));
37 }
38 out
39}
40
41pub fn line_count_bytes(data: &[u8]) -> usize {
43 if data.is_empty() {
44 return 0;
45 }
46 let mut n = data.iter().filter(|&&b| b == b'\n').count();
47 if !data.ends_with(b"\n") {
48 n += 1;
49 }
50 n
51}
52
53pub fn line_to_perl_string(line: &[u8]) -> String {
55 let line = if line.ends_with(b"\r") && !line.is_empty() {
56 &line[..line.len() - 1]
57 } else {
58 line
59 };
60 crate::perl_decode::decode_utf8_or_latin1_line(line)
61}
62
63#[cfg(test)]
64mod tests {
65 use super::*;
66
67 #[test]
68 fn line_aligned_chunks_splits_without_breaking_lines() {
69 let data = b"a\nbb\nccc\n";
70 let chunks = line_aligned_chunks(data, 4);
71 let rebuilt: Vec<u8> = chunks
72 .iter()
73 .flat_map(|(s, e)| data[*s..*e].iter().copied())
74 .collect();
75 assert_eq!(rebuilt, data);
76 for (s, _e) in &chunks {
77 if *s > 0 {
78 assert_eq!(data[*s - 1], b'\n');
79 }
80 }
81 }
82
83 #[test]
84 fn line_count_bytes_matches_scan() {
85 assert_eq!(line_count_bytes(b""), 0);
86 assert_eq!(line_count_bytes(b"a\nb"), 2);
87 assert_eq!(line_count_bytes(b"a\nb\n"), 2);
88 assert_eq!(line_count_bytes(b"a"), 1);
89 }
90
91 #[test]
92 fn scan_lines_in_slice_three_lines() {
93 let data = b"one\ntwo\nthree";
94 let mut lines = Vec::new();
95 let mut s = 0usize;
96 while s < data.len() {
97 let e = data[s..]
98 .iter()
99 .position(|&b| b == b'\n')
100 .map(|p| s + p)
101 .unwrap_or(data.len());
102 lines.push(&data[s..e]);
103 if e >= data.len() {
104 break;
105 }
106 s = e + 1;
107 }
108 assert_eq!(lines, vec![&b"one"[..], &b"two"[..], &b"three"[..]]);
109 }
110
111 #[test]
112 fn line_aligned_chunks_empty_input() {
113 assert!(line_aligned_chunks(&[], 8).is_empty());
114 }
115
116 #[test]
117 fn line_aligned_chunks_single_byte() {
118 let c = line_aligned_chunks(b"x", 4);
119 assert_eq!(c, vec![(0, 1)]);
120 }
121
122 #[test]
123 fn line_aligned_chunks_max_chunks_zero_uses_one() {
124 let data = b"a\nb\n";
125 let c = line_aligned_chunks(data, 0);
126 assert!(!c.is_empty());
127 let rebuilt: Vec<u8> = c
128 .iter()
129 .flat_map(|(s, e)| data[*s..*e].iter().copied())
130 .collect();
131 assert_eq!(rebuilt, data);
132 }
133
134 #[test]
135 fn line_to_perl_string_strips_cr() {
136 assert_eq!(line_to_perl_string(b"row\r"), "row");
137 }
138
139 #[test]
140 fn line_to_perl_string_invalid_utf8_maps_octets() {
141 let s = line_to_perl_string(&[0xff, 0xfe]);
142 assert_eq!(s, "\u{00ff}\u{00fe}");
143 }
144}