1use regex::Regex;
2use std::fs;
3use std::io;
4
5#[derive(Clone, Debug)]
7pub enum Pattern {
8 Regex { regex: String, offset: i64 },
10 SkipTo { regex: String, offset: i64 },
13 LineNumber(usize),
15 Repeat(usize),
17 RepeatForever,
19}
20
21#[derive(Clone, Debug)]
23pub struct CsplitConfig {
24 pub prefix: String,
25 pub suffix_format: String,
26 pub digits: usize,
27 pub keep_files: bool,
28 pub quiet: bool,
29 pub elide_empty: bool,
30}
31
32impl Default for CsplitConfig {
33 fn default() -> Self {
34 Self {
35 prefix: "xx".to_string(),
36 suffix_format: String::new(),
37 digits: 2,
38 keep_files: false,
39 quiet: false,
40 elide_empty: false,
41 }
42 }
43}
44
45pub fn parse_pattern(s: &str) -> Result<Pattern, String> {
47 let s = s.trim();
48
49 if s == "{*}" {
51 return Ok(Pattern::RepeatForever);
52 }
53
54 if s.starts_with('{') && s.ends_with('}') {
56 let inner = &s[1..s.len() - 1];
57 let n: usize = inner
58 .parse()
59 .map_err(|_| format!("invalid repeat count: '{}'", s))?;
60 return Ok(Pattern::Repeat(n));
61 }
62
63 if s.starts_with('/') {
65 let rest = &s[1..];
66 if let Some(end_pos) = rest.rfind('/') {
67 let regex_str = &rest[..end_pos];
68 let after = rest[end_pos + 1..].trim();
69 let offset = if after.is_empty() {
70 0
71 } else {
72 after
73 .parse::<i64>()
74 .map_err(|_| format!("invalid offset: '{}'", after))?
75 };
76 Regex::new(regex_str).map_err(|e| format!("invalid regex '{}': {}", regex_str, e))?;
78 return Ok(Pattern::Regex {
79 regex: regex_str.to_string(),
80 offset,
81 });
82 }
83 return Err(format!("unmatched '/' in pattern: '{}'", s));
84 }
85
86 if s.starts_with('%') {
88 let rest = &s[1..];
89 if let Some(end_pos) = rest.rfind('%') {
90 let regex_str = &rest[..end_pos];
91 let after = rest[end_pos + 1..].trim();
92 let offset = if after.is_empty() {
93 0
94 } else {
95 after
96 .parse::<i64>()
97 .map_err(|_| format!("invalid offset: '{}'", after))?
98 };
99 Regex::new(regex_str).map_err(|e| format!("invalid regex '{}': {}", regex_str, e))?;
101 return Ok(Pattern::SkipTo {
102 regex: regex_str.to_string(),
103 offset,
104 });
105 }
106 return Err(format!("unmatched '%' in pattern: '{}'", s));
107 }
108
109 let n: usize = s.parse().map_err(|_| format!("invalid pattern: '{}'", s))?;
111 if n == 0 {
112 return Err("line number must be positive".to_string());
113 }
114 Ok(Pattern::LineNumber(n))
115}
116
117pub fn output_filename(config: &CsplitConfig, index: usize) -> String {
119 if config.suffix_format.is_empty() {
120 format!("{}{:0>width$}", config.prefix, index, width = config.digits)
121 } else {
122 let suffix = format_suffix(&config.suffix_format, index);
124 format!("{}{}", config.prefix, suffix)
125 }
126}
127
128pub fn format_suffix(fmt: &str, value: usize) -> String {
131 let mut result = String::new();
132 let mut chars = fmt.chars().peekable();
133
134 while let Some(ch) = chars.next() {
135 if ch == '%' {
136 let mut width_str = String::new();
138 let mut zero_pad = false;
139
140 if chars.peek() == Some(&'0') {
141 zero_pad = true;
142 chars.next();
143 }
144
145 while let Some(&c) = chars.peek() {
146 if c.is_ascii_digit() {
147 width_str.push(c);
148 chars.next();
149 } else {
150 break;
151 }
152 }
153
154 if chars.peek() == Some(&'d') {
156 chars.next();
157 let width: usize = width_str.parse().unwrap_or(0);
158 if zero_pad && width > 0 {
159 result.push_str(&format!("{:0>width$}", value, width = width));
160 } else if width > 0 {
161 result.push_str(&format!("{:>width$}", value, width = width));
162 } else {
163 result.push_str(&format!("{}", value));
164 }
165 } else if chars.peek() == Some(&'%') {
166 chars.next();
167 result.push('%');
168 } else {
169 result.push('%');
171 result.push_str(&width_str);
172 }
173 } else {
174 result.push(ch);
175 }
176 }
177
178 result
179}
180
181fn write_chunk(lines: &[String], filename: &str, config: &CsplitConfig) -> Result<u64, String> {
183 if config.elide_empty && lines.is_empty() {
184 return Ok(0);
185 }
186
187 let mut content = String::new();
188 for line in lines {
189 content.push_str(line);
190 content.push('\n');
191 }
192 let bytes = content.len() as u64;
193
194 if config.elide_empty && bytes == 0 {
195 return Ok(0);
196 }
197
198 fs::write(filename, &content).map_err(|e| format!("cannot write '{}': {}", filename, e))?;
199
200 Ok(bytes)
201}
202
203fn find_match(lines: &[String], regex: &Regex, start: usize) -> Option<usize> {
205 for (idx, line) in lines.iter().enumerate().skip(start) {
206 if regex.is_match(line) {
207 return Some(idx);
208 }
209 }
210 None
211}
212
213pub fn csplit_file(
217 input: &str,
218 patterns: &[Pattern],
219 config: &CsplitConfig,
220) -> Result<Vec<u64>, String> {
221 let lines: Vec<String> = input.lines().map(|l| l.to_string()).collect();
222 let total_lines = lines.len();
223
224 let expanded = expand_patterns(patterns)?;
226
227 let mut sizes: Vec<u64> = Vec::new();
228 let mut created_files: Vec<String> = Vec::new();
229 let mut file_index: usize = 0;
230 let mut current_line: usize = 0; let do_cleanup = |files: &[String], config: &CsplitConfig| {
233 if !config.keep_files {
234 for f in files {
235 let _ = fs::remove_file(f);
236 }
237 }
238 };
239
240 for pat in &expanded {
241 match pat {
242 Pattern::LineNumber(n) => {
243 let split_at = *n; if split_at <= current_line {
247 let msg = format!("{}: line number out of range", split_at);
248 do_cleanup(&created_files, config);
249 return Err(msg);
250 }
251
252 let end = if split_at > total_lines {
253 total_lines
254 } else {
255 split_at - 1 };
257
258 let chunk_lines = &lines[current_line..end];
259 let filename = output_filename(config, file_index);
260
261 let bytes = write_chunk(chunk_lines, &filename, config).inspect_err(|_| {
262 do_cleanup(&created_files, config);
263 })?;
264
265 if !(config.elide_empty && chunk_lines.is_empty()) {
266 created_files.push(filename);
267 sizes.push(bytes);
268 file_index += 1;
269 }
270
271 current_line = end;
272 }
273 Pattern::Regex { regex, offset } => {
274 let re = Regex::new(regex).map_err(|e| {
276 do_cleanup(&created_files, config);
277 format!("invalid regex: {}", e)
278 })?;
279
280 let search_start = if current_line > 0
285 && current_line < total_lines
286 && re.is_match(&lines[current_line])
287 {
288 current_line + 1
289 } else {
290 current_line
291 };
292
293 if let Some(match_idx) = find_match(&lines, &re, search_start) {
294 let target = match_idx as i64 + *offset;
296 let split_at = if target < current_line as i64 {
297 current_line
298 } else if target as usize > total_lines {
299 total_lines
300 } else {
301 target as usize
302 };
303
304 let chunk_lines = &lines[current_line..split_at];
305 let filename = output_filename(config, file_index);
306
307 let bytes = write_chunk(chunk_lines, &filename, config).inspect_err(|_| {
308 do_cleanup(&created_files, config);
309 })?;
310
311 if !(config.elide_empty && chunk_lines.is_empty()) {
312 created_files.push(filename);
313 sizes.push(bytes);
314 file_index += 1;
315 }
316
317 current_line = split_at;
318 } else {
319 let msg = format!("{}: no match", regex);
320 do_cleanup(&created_files, config);
321 return Err(msg);
322 }
323 }
324 Pattern::SkipTo { regex, offset } => {
325 let re = Regex::new(regex).map_err(|e| {
327 do_cleanup(&created_files, config);
328 format!("invalid regex: {}", e)
329 })?;
330
331 if let Some(match_idx) = find_match(&lines, &re, current_line) {
332 let target = match_idx as i64 + *offset;
333 let skip_to = if target < current_line as i64 {
334 current_line
335 } else if target as usize > total_lines {
336 total_lines
337 } else {
338 target as usize
339 };
340
341 current_line = skip_to;
343 } else {
344 let msg = format!("{}: no match", regex);
345 do_cleanup(&created_files, config);
346 return Err(msg);
347 }
348 }
349 Pattern::Repeat(_) | Pattern::RepeatForever => {
350 unreachable!("Repeat patterns should be expanded before processing");
352 }
353 }
354 }
355
356 if current_line < total_lines {
358 let chunk_lines = &lines[current_line..total_lines];
359 let filename = output_filename(config, file_index);
360
361 let bytes = write_chunk(chunk_lines, &filename, config).inspect_err(|_| {
362 do_cleanup(&created_files, config);
363 })?;
364
365 if !(config.elide_empty && chunk_lines.is_empty()) {
366 created_files.push(filename);
367 sizes.push(bytes);
368 }
369 } else if !config.elide_empty {
370 let filename = output_filename(config, file_index);
372 let bytes = write_chunk(&[], &filename, config).inspect_err(|_| {
373 do_cleanup(&created_files, config);
374 })?;
375 created_files.push(filename);
376 sizes.push(bytes);
377 }
378
379 Ok(sizes)
380}
381
382fn expand_patterns(patterns: &[Pattern]) -> Result<Vec<Pattern>, String> {
385 let mut expanded: Vec<Pattern> = Vec::new();
386 let mut i = 0;
387
388 while i < patterns.len() {
389 match &patterns[i] {
390 Pattern::Repeat(n) => {
391 if expanded.is_empty() {
392 return Err("{N}: no preceding pattern to repeat".to_string());
393 }
394 let prev = expanded.last().unwrap().clone();
395 for _ in 0..*n {
396 expanded.push(prev.clone());
397 }
398 i += 1;
399 }
400 Pattern::RepeatForever => {
401 if expanded.is_empty() {
402 return Err("{*}: no preceding pattern to repeat".to_string());
403 }
404 let prev = expanded.last().unwrap().clone();
408 for _ in 0..10000 {
409 expanded.push(prev.clone());
410 }
411 i += 1;
412 }
413 other => {
414 expanded.push(other.clone());
415 i += 1;
416 }
417 }
418 }
419
420 Ok(expanded)
421}
422
423pub fn csplit_from_path(
425 path: &str,
426 patterns: &[Pattern],
427 config: &CsplitConfig,
428) -> Result<Vec<u64>, String> {
429 let input = if path == "-" {
430 let mut buf = String::new();
431 io::stdin()
432 .read_line(&mut buf)
433 .map_err(|e| format!("read error: {}", e))?;
434 let mut all = buf;
436 let mut line = String::new();
437 while io::stdin()
438 .read_line(&mut line)
439 .map_err(|e| format!("read error: {}", e))?
440 > 0
441 {
442 all.push_str(&line);
443 line.clear();
444 }
445 all
446 } else {
447 fs::read_to_string(path).map_err(|e| format!("cannot open '{}': {}", path, e))?
448 };
449
450 csplit_file(&input, patterns, config)
451}
452
453pub fn print_sizes(sizes: &[u64]) {
455 for size in sizes {
456 println!("{}", size);
457 }
458}