1use regex::Regex;
2use std::fs;
3use std::io;
4
5#[derive(Clone, Debug)]
7pub enum Pattern {
8 Regex { regex: String, offset: i64 },
10 SkipTo { regex: String, offset: i64 },
13 LineNumber(usize),
15 Repeat(usize),
17 RepeatForever,
19}
20
21#[derive(Clone, Debug)]
23pub struct CsplitConfig {
24 pub prefix: String,
25 pub suffix_format: String,
26 pub digits: usize,
27 pub keep_files: bool,
28 pub quiet: bool,
29 pub elide_empty: bool,
30}
31
32impl Default for CsplitConfig {
33 fn default() -> Self {
34 Self {
35 prefix: "xx".to_string(),
36 suffix_format: String::new(),
37 digits: 2,
38 keep_files: false,
39 quiet: false,
40 elide_empty: false,
41 }
42 }
43}
44
45pub fn parse_pattern(s: &str) -> Result<Pattern, String> {
47 let s = s.trim();
48
49 if s == "{*}" {
51 return Ok(Pattern::RepeatForever);
52 }
53
54 if s.starts_with('{') && s.ends_with('}') {
56 let inner = &s[1..s.len() - 1];
57 let n: usize = inner
58 .parse()
59 .map_err(|_| format!("invalid repeat count: '{}'", s))?;
60 return Ok(Pattern::Repeat(n));
61 }
62
63 if s.starts_with('/') {
65 let rest = &s[1..];
66 if let Some(end_pos) = rest.rfind('/') {
67 let regex_str = &rest[..end_pos];
68 let after = rest[end_pos + 1..].trim();
69 let offset = if after.is_empty() {
70 0
71 } else {
72 after
73 .parse::<i64>()
74 .map_err(|_| format!("invalid offset: '{}'", after))?
75 };
76 Regex::new(regex_str).map_err(|e| format!("invalid regex '{}': {}", regex_str, e))?;
78 return Ok(Pattern::Regex {
79 regex: regex_str.to_string(),
80 offset,
81 });
82 }
83 return Err(format!("unmatched '/' in pattern: '{}'", s));
84 }
85
86 if s.starts_with('%') {
88 let rest = &s[1..];
89 if let Some(end_pos) = rest.rfind('%') {
90 let regex_str = &rest[..end_pos];
91 let after = rest[end_pos + 1..].trim();
92 let offset = if after.is_empty() {
93 0
94 } else {
95 after
96 .parse::<i64>()
97 .map_err(|_| format!("invalid offset: '{}'", after))?
98 };
99 Regex::new(regex_str).map_err(|e| format!("invalid regex '{}': {}", regex_str, e))?;
101 return Ok(Pattern::SkipTo {
102 regex: regex_str.to_string(),
103 offset,
104 });
105 }
106 return Err(format!("unmatched '%' in pattern: '{}'", s));
107 }
108
109 let n: usize = s.parse().map_err(|_| format!("invalid pattern: '{}'", s))?;
111 if n == 0 {
112 return Err("line number must be positive".to_string());
113 }
114 Ok(Pattern::LineNumber(n))
115}
116
117pub fn output_filename(config: &CsplitConfig, index: usize) -> String {
119 if config.suffix_format.is_empty() {
120 format!("{}{:0>width$}", config.prefix, index, width = config.digits)
121 } else {
122 let suffix = format_suffix(&config.suffix_format, index);
124 format!("{}{}", config.prefix, suffix)
125 }
126}
127
128pub fn format_suffix(fmt: &str, value: usize) -> String {
131 let mut result = String::new();
132 let mut chars = fmt.chars().peekable();
133
134 while let Some(ch) = chars.next() {
135 if ch == '%' {
136 let mut width_str = String::new();
138 let mut zero_pad = false;
139
140 if chars.peek() == Some(&'0') {
141 zero_pad = true;
142 chars.next();
143 }
144
145 while let Some(&c) = chars.peek() {
146 if c.is_ascii_digit() {
147 width_str.push(c);
148 chars.next();
149 } else {
150 break;
151 }
152 }
153
154 if chars.peek() == Some(&'d') {
156 chars.next();
157 let width: usize = width_str.parse().unwrap_or(0);
158 if zero_pad && width > 0 {
159 result.push_str(&format!("{:0>width$}", value, width = width));
160 } else if width > 0 {
161 result.push_str(&format!("{:>width$}", value, width = width));
162 } else {
163 result.push_str(&format!("{}", value));
164 }
165 } else if chars.peek() == Some(&'%') {
166 chars.next();
167 result.push('%');
168 } else {
169 result.push('%');
171 result.push_str(&width_str);
172 }
173 } else {
174 result.push(ch);
175 }
176 }
177
178 result
179}
180
181fn write_chunk(lines: &[String], filename: &str, config: &CsplitConfig) -> Result<u64, String> {
183 if config.elide_empty && lines.is_empty() {
184 return Ok(0);
185 }
186
187 let mut content = String::new();
188 for line in lines {
189 content.push_str(line);
190 content.push('\n');
191 }
192 let bytes = content.len() as u64;
193
194 if config.elide_empty && bytes == 0 {
195 return Ok(0);
196 }
197
198 fs::write(filename, &content).map_err(|e| format!("cannot write '{}': {}", filename, e))?;
199
200 Ok(bytes)
201}
202
203fn find_match(lines: &[String], regex: &Regex, start: usize) -> Option<usize> {
205 for (idx, line) in lines.iter().enumerate().skip(start) {
206 if regex.is_match(line) {
207 return Some(idx);
208 }
209 }
210 None
211}
212
213fn apply_regex_pattern(
217 lines: &[String],
218 total_lines: usize,
219 regex: &str,
220 offset: i64,
221 is_skip: bool,
222 current_line: &mut usize,
223 skip_current: &mut bool,
224 sizes: &mut Vec<u64>,
225 created_files: &mut Vec<String>,
226 file_index: &mut usize,
227 config: &CsplitConfig,
228 graceful_no_match: bool,
229) -> Result<bool, String> {
230 let re = Regex::new(regex).map_err(|e| format!("invalid regex: {}", e))?;
231
232 let search_start =
235 if *skip_current && *current_line < total_lines && re.is_match(&lines[*current_line]) {
236 *current_line + 1
237 } else {
238 *current_line
239 };
240
241 let match_idx = match find_match(lines, &re, search_start) {
242 Some(idx) => idx,
243 None => {
244 if graceful_no_match {
245 return Ok(false);
246 }
247 return Err(format!("{}: no match", regex));
248 }
249 };
250
251 let target = match_idx as i64 + offset;
252 let split_at = if target < *current_line as i64 {
253 *current_line
254 } else if target as usize > total_lines {
255 total_lines
256 } else {
257 target as usize
258 };
259
260 if is_skip {
261 *current_line = split_at;
263 *skip_current = false;
264 } else {
265 let chunk_lines = &lines[*current_line..split_at];
267 let filename = output_filename(config, *file_index);
268 let bytes = write_chunk(chunk_lines, &filename, config)?;
269
270 if !(config.elide_empty && chunk_lines.is_empty()) {
271 created_files.push(filename);
272 sizes.push(bytes);
273 *file_index += 1;
274 }
275
276 *current_line = split_at;
277 *skip_current = offset == 0;
279 }
280
281 Ok(true)
282}
283
284pub fn csplit_file(
288 input: &str,
289 patterns: &[Pattern],
290 config: &CsplitConfig,
291) -> Result<Vec<u64>, String> {
292 let lines: Vec<String> = input.lines().map(|l| l.to_string()).collect();
293 let total_lines = lines.len();
294
295 let mut sizes: Vec<u64> = Vec::new();
296 let mut created_files: Vec<String> = Vec::new();
297 let mut file_index: usize = 0;
298 let mut current_line: usize = 0; let mut skip_current = false; let do_cleanup = |files: &[String], config: &CsplitConfig| {
302 if !config.keep_files {
303 for f in files {
304 let _ = fs::remove_file(f);
305 }
306 }
307 };
308
309 let mut pat_idx = 0;
310 while pat_idx < patterns.len() {
311 match &patterns[pat_idx] {
312 Pattern::LineNumber(n) => {
313 let split_at = *n;
315 if split_at <= current_line {
316 let msg = format!("{}: line number out of range", split_at);
317 do_cleanup(&created_files, config);
318 return Err(msg);
319 }
320
321 let end = if split_at > total_lines {
322 total_lines
323 } else {
324 split_at - 1
325 };
326
327 let chunk_lines = &lines[current_line..end];
328 let filename = output_filename(config, file_index);
329
330 let bytes = write_chunk(chunk_lines, &filename, config).inspect_err(|_| {
331 do_cleanup(&created_files, config);
332 })?;
333
334 if !(config.elide_empty && chunk_lines.is_empty()) {
335 created_files.push(filename);
336 sizes.push(bytes);
337 file_index += 1;
338 }
339
340 current_line = end;
341 skip_current = false;
342 pat_idx += 1;
343 }
344 Pattern::Regex { regex, offset } => {
345 let regex = regex.clone();
346 let offset = *offset;
347 if let Err(e) = apply_regex_pattern(
348 &lines,
349 total_lines,
350 ®ex,
351 offset,
352 false,
353 &mut current_line,
354 &mut skip_current,
355 &mut sizes,
356 &mut created_files,
357 &mut file_index,
358 config,
359 false,
360 ) {
361 do_cleanup(&created_files, config);
362 return Err(e);
363 }
364 pat_idx += 1;
365 }
366 Pattern::SkipTo { regex, offset } => {
367 let regex = regex.clone();
368 let offset = *offset;
369 if let Err(e) = apply_regex_pattern(
370 &lines,
371 total_lines,
372 ®ex,
373 offset,
374 true,
375 &mut current_line,
376 &mut skip_current,
377 &mut sizes,
378 &mut created_files,
379 &mut file_index,
380 config,
381 false,
382 ) {
383 do_cleanup(&created_files, config);
384 return Err(e);
385 }
386 pat_idx += 1;
387 }
388 Pattern::Repeat(n) => {
389 let n = *n;
390 if pat_idx == 0 {
391 do_cleanup(&created_files, config);
392 return Err("{N}: no preceding pattern to repeat".to_string());
393 }
394 let prev_pat = find_prev_pattern(patterns, pat_idx);
396 let prev_pat = match prev_pat {
397 Some(p) => p.clone(),
398 None => {
399 do_cleanup(&created_files, config);
400 return Err("{N}: no preceding pattern to repeat".to_string());
401 }
402 };
403 for _ in 0..n {
404 match &prev_pat {
405 Pattern::LineNumber(ln) => {
406 let end = if *ln > total_lines {
409 total_lines
410 } else {
411 *ln - 1
412 };
413 if end <= current_line {
414 let msg = format!("{}: line number out of range", ln);
415 do_cleanup(&created_files, config);
416 return Err(msg);
417 }
418 let chunk_lines = &lines[current_line..end];
419 let filename = output_filename(config, file_index);
420 let bytes =
421 write_chunk(chunk_lines, &filename, config).inspect_err(|_| {
422 do_cleanup(&created_files, config);
423 })?;
424 if !(config.elide_empty && chunk_lines.is_empty()) {
425 created_files.push(filename);
426 sizes.push(bytes);
427 file_index += 1;
428 }
429 current_line = end;
430 skip_current = false;
431 }
432 Pattern::Regex { regex, offset } => {
433 if let Err(e) = apply_regex_pattern(
434 &lines,
435 total_lines,
436 regex,
437 *offset,
438 false,
439 &mut current_line,
440 &mut skip_current,
441 &mut sizes,
442 &mut created_files,
443 &mut file_index,
444 config,
445 false,
446 ) {
447 do_cleanup(&created_files, config);
448 return Err(e);
449 }
450 }
451 Pattern::SkipTo { regex, offset } => {
452 if let Err(e) = apply_regex_pattern(
453 &lines,
454 total_lines,
455 regex,
456 *offset,
457 true,
458 &mut current_line,
459 &mut skip_current,
460 &mut sizes,
461 &mut created_files,
462 &mut file_index,
463 config,
464 false,
465 ) {
466 do_cleanup(&created_files, config);
467 return Err(e);
468 }
469 }
470 _ => {}
471 }
472 }
473 pat_idx += 1;
474 }
475 Pattern::RepeatForever => {
476 if pat_idx == 0 {
477 do_cleanup(&created_files, config);
478 return Err("{*}: no preceding pattern to repeat".to_string());
479 }
480 let prev_pat = find_prev_pattern(patterns, pat_idx);
481 let prev_pat = match prev_pat {
482 Some(p) => p.clone(),
483 None => {
484 do_cleanup(&created_files, config);
485 return Err("{*}: no preceding pattern to repeat".to_string());
486 }
487 };
488 loop {
490 match &prev_pat {
491 Pattern::Regex { regex, offset } => {
492 match apply_regex_pattern(
493 &lines,
494 total_lines,
495 regex,
496 *offset,
497 false,
498 &mut current_line,
499 &mut skip_current,
500 &mut sizes,
501 &mut created_files,
502 &mut file_index,
503 config,
504 true, ) {
506 Ok(true) => continue,
507 Ok(false) => break,
508 Err(e) => {
509 do_cleanup(&created_files, config);
510 return Err(e);
511 }
512 }
513 }
514 Pattern::SkipTo { regex, offset } => {
515 match apply_regex_pattern(
516 &lines,
517 total_lines,
518 regex,
519 *offset,
520 true,
521 &mut current_line,
522 &mut skip_current,
523 &mut sizes,
524 &mut created_files,
525 &mut file_index,
526 config,
527 true,
528 ) {
529 Ok(true) => continue,
530 Ok(false) => break,
531 Err(e) => {
532 do_cleanup(&created_files, config);
533 return Err(e);
534 }
535 }
536 }
537 _ => break,
538 }
539 }
540 pat_idx += 1;
541 }
542 }
543 }
544
545 if current_line < total_lines {
547 let chunk_lines = &lines[current_line..total_lines];
548 let filename = output_filename(config, file_index);
549
550 let bytes = write_chunk(chunk_lines, &filename, config).inspect_err(|_| {
551 do_cleanup(&created_files, config);
552 })?;
553
554 if !(config.elide_empty && chunk_lines.is_empty()) {
555 created_files.push(filename);
556 sizes.push(bytes);
557 }
558 } else if !config.elide_empty {
559 let filename = output_filename(config, file_index);
561 let bytes = write_chunk(&[], &filename, config).inspect_err(|_| {
562 do_cleanup(&created_files, config);
563 })?;
564 created_files.push(filename);
565 sizes.push(bytes);
566 }
567
568 Ok(sizes)
569}
570
571fn find_prev_pattern(patterns: &[Pattern], idx: usize) -> Option<&Pattern> {
573 let mut i = idx;
574 while i > 0 {
575 i -= 1;
576 match &patterns[i] {
577 Pattern::Repeat(_) | Pattern::RepeatForever => continue,
578 other => return Some(other),
579 }
580 }
581 None
582}
583
584pub fn csplit_from_path(
586 path: &str,
587 patterns: &[Pattern],
588 config: &CsplitConfig,
589) -> Result<Vec<u64>, String> {
590 let input = if path == "-" {
591 let mut buf = String::new();
592 io::stdin()
593 .read_line(&mut buf)
594 .map_err(|e| format!("read error: {}", e))?;
595 let mut all = buf;
597 let mut line = String::new();
598 while io::stdin()
599 .read_line(&mut line)
600 .map_err(|e| format!("read error: {}", e))?
601 > 0
602 {
603 all.push_str(&line);
604 line.clear();
605 }
606 all
607 } else {
608 fs::read_to_string(path).map_err(|e| format!("cannot open '{}': {}", path, e))?
609 };
610
611 csplit_file(&input, patterns, config)
612}
613
614pub fn print_sizes(sizes: &[u64]) {
616 for size in sizes {
617 println!("{}", size);
618 }
619}