1use std::{cmp, str};
7
8extern crate rtools_traits;
9use rtools_traits::{RtoolT, LineProcessorT};
10
11const VERSION: &'static str = env!("CARGO_PKG_VERSION");
13
14pub fn version() -> &'static str {
16 VERSION
17}
18
19pub fn str_to_ranged_pair(char_part: &str) -> (usize, usize) {
21 assert!(char_part != "-", "invalid range with no endpoint: -");
22
23 let str_pos: Vec<&str> = char_part.split("-").collect();
24
25 if str_pos.len() == 1 {
26 let start_pos = char_part.parse::<usize>().unwrap();
27 (start_pos, start_pos)
28 } else {
29 assert!(str_pos.len() == 2);
30
31 let start_pos = if str_pos[0].is_empty() {
32 1
33 } else {
34 str_pos[0].parse::<usize>().unwrap()
35 };
36
37 let end_pos = if str_pos[1].is_empty() {
38 std::usize::MAX
39 } else {
40 str_pos[1].parse::<usize>().unwrap()
41 };
42
43 (start_pos, end_pos)
44 }
45}
46
47pub fn extract_ranged_pairs(ranged_pairs_str: &str) -> Vec<(usize, usize)> {
49 let unsorted_ranged_pairs: Vec<(usize, usize)> = ranged_pairs_str
50 .split(",")
51 .map(|char_part| str_to_ranged_pair(char_part))
52 .filter(|(start_pos, end_pos)| start_pos <= end_pos)
53 .collect();
54
55 unsorted_ranged_pairs
56}
57
58pub fn merge_ranged_pairs(mut unsorted_ranged_pairs: Vec<(usize, usize)>) -> Vec<(usize, usize)> {
60 unsorted_ranged_pairs.sort();
62
63 let mut ranged_pairs: Vec<(usize, usize)> = vec![];
64
65 for ranged_pair in &unsorted_ranged_pairs {
66 if ranged_pairs.is_empty() {
67 ranged_pairs.push(ranged_pair.clone());
68 } else {
69 let last_mut = ranged_pairs.last_mut().unwrap();
70
71 if ranged_pair.0 - 1 > last_mut.1 {
73 ranged_pairs.push(ranged_pair.clone());
74 } else {
75 last_mut.1 = cmp::max(last_mut.1, ranged_pair.1);
76 }
77 }
78 }
79
80 ranged_pairs
81}
82
83pub fn prepare_ranged_pairs(no_merge: bool, ranged_pairs_str: &str) -> Vec<(usize, usize)> {
85 let unsorted_ranged_pairs = extract_ranged_pairs(ranged_pairs_str);
86
87 let ranged_pairs = if no_merge {
88 unsorted_ranged_pairs
89 } else {
90 merge_ranged_pairs(unsorted_ranged_pairs)
91 };
92
93 ranged_pairs
94}
95
96pub trait CharContextT {
97 fn ranged_pairs(&self) -> &Vec<(usize, usize)>;
98}
99
100pub trait FieldContextT {
101 fn ranged_pairs(&self) -> &Vec<(usize, usize)>;
102
103 fn delim(&self) -> &str;
104}
105
106pub struct CharContext<'a> {
107 ranged_pairs: &'a Vec<(usize, usize)>,
108}
109
110impl<'a> CharContext<'a> {
111 pub fn new(ranged_pairs: &'a Vec<(usize, usize)>) -> CharContext<'a> {
112 CharContext {
113 ranged_pairs: ranged_pairs,
114 }
115 }
116}
117
118impl CharContextT for CharContext<'_> {
119 fn ranged_pairs(&self) -> &Vec<(usize, usize)> {
120 self.ranged_pairs
121 }
122}
123
124pub struct FieldContext<'a> {
125 ranged_pairs: &'a Vec<(usize, usize)>,
126 delim: &'a str,
127}
128
129impl<'a> FieldContext<'a> {
130 pub fn new(ranged_pairs: &'a Vec<(usize, usize)>, delim: &'a str) -> FieldContext<'a> {
131 FieldContext {
132 ranged_pairs,
133 delim,
134 }
135 }
136}
137
138impl FieldContextT for FieldContext<'_> {
139 fn ranged_pairs(&self) -> &Vec<(usize, usize)> {
140 self.ranged_pairs
141 }
142
143 fn delim(&self) -> &str {
144 self.delim
145 }
146}
147
148pub struct CharUtf8LineProcessor {}
149
150pub fn process_line_by_char_utf8(line: &str, ranged_pairs: &Vec<(usize, usize)>) -> Vec<u8> {
152 let uchars: Vec<char> = line.chars().collect();
153 let mut out_bytes: Vec<u8> = vec![];
154 let char_count = &uchars.len();
155
156 for (start_pos, end_pos) in ranged_pairs {
161 let mut char_pos: usize = start_pos.clone();
162
163 while char_pos <= *char_count && char_pos <= *end_pos {
164 let mut dst = [0; 8];
165 out_bytes.extend(uchars[char_pos - 1].encode_utf8(&mut dst).as_bytes());
166 char_pos += 1;
167 }
168 }
169
170 out_bytes.extend("\n".as_bytes());
171 out_bytes
172}
173
174impl<C: CharContextT> LineProcessorT<C> for CharUtf8LineProcessor {
175 fn process(&self, line: &str, context: &C) -> Vec<u8> {
177 process_line_by_char_utf8(line, context.ranged_pairs())
178 }
179}
180
181pub struct ByteLineProcessor {}
182
183pub fn process_line_by_byte(line: &str, ranged_pairs: &Vec<(usize, usize)>) -> Vec<u8> {
185 let mut out_bytes: Vec<u8> = vec![];
186 let bytes = line.as_bytes();
187 let len = &bytes.len();
188
189 for (start_pos, end_pos) in ranged_pairs {
191 if *start_pos > *len {
192 break;
193 }
194
195 let final_bytes = if *end_pos < *len {
197 &bytes[start_pos - 1..*end_pos]
198 } else {
199 &bytes[start_pos - 1..]
200 };
201
202 out_bytes.extend(final_bytes);
203 }
204
205 out_bytes.extend("\n".as_bytes());
206 out_bytes
207}
208
209impl<C: CharContextT> LineProcessorT<C> for ByteLineProcessor {
210 fn process(&self, line: &str, context: &C) -> Vec<u8> {
212 process_line_by_byte(line, context.ranged_pairs())
213 }
214}
215
216pub struct CharProcessor {}
217
218impl<C: CharContextT, P: LineProcessorT<C>> RtoolT<C, P> for CharProcessor {}
219
220pub struct FieldUtf8LineProcessor {}
221
222pub fn process_line_by_field_utf8(
224 line: &str,
225 ranged_pairs: &Vec<(usize, usize)>,
226 delim: &str,
227) -> Vec<u8> {
228 let mut out_bytes: Vec<u8> = vec![];
229 let delim = delim;
230
231 let fields: Vec<&str> = line.split(delim).collect();
232 let mut has_written = false;
233
234 for (start_pos, end_pos) in ranged_pairs {
235 let len = &fields.len();
236 if *start_pos > *len {
237 break;
238 }
239
240 let extracted_fields = if *end_pos < *len {
241 &fields[start_pos - 1..*end_pos]
242 } else {
243 &fields[start_pos - 1..]
244 };
245
246 for field in extracted_fields {
247 if has_written {
249 out_bytes.extend(delim.as_bytes());
250 } else {
251 has_written = true;
252 }
253
254 out_bytes.extend(field.as_bytes());
255 }
256 }
257
258 out_bytes.extend("\n".as_bytes());
259 out_bytes
260}
261
262impl<C: FieldContextT> LineProcessorT<C> for FieldUtf8LineProcessor {
263 fn process(&self, line: &str, context: &C) -> Vec<u8> {
265 process_line_by_field_utf8(line, context.ranged_pairs(), context.delim())
266 }
267}
268
269pub struct FieldProcessor {}
270
271impl<C: FieldContextT, P: LineProcessorT<C>> RtoolT<C, P> for FieldProcessor {}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276
277 const _STR_RANGES_01: &'static str = "9,4,7,3,12,5-15";
278 const _STR_BIRDS: &'static str = "ðĶðððĢðĪðĨðĶð§ððĶ
ðĶðĶĒðĶðĶðĶ";
279 const _STR_BIRDS_OUTPUT: &'static str = "ððĢðĶððĶĒðĪðĨðĶð§ððĶ
ðĶðĶĒðĶðĶðĶ\n";
280 const _STR_ALPHABET: &'static str = "abcdefghijklmnopqrstuvwxyz";
281 const _STR_ALPHABET_OUTPUT: &'static str = "idgclefghijklmno\n";
282
283 #[test]
284 fn test_str_to_ranged_pair_valid_inputs() {
285 assert_eq!(str_to_ranged_pair("1"), (1, 1));
286 assert_eq!(str_to_ranged_pair("2"), (2, 2));
287 assert_eq!(str_to_ranged_pair("-20"), (1, 20));
288 assert_eq!(str_to_ranged_pair("20-"), (20, std::usize::MAX));
289 assert_eq!(str_to_ranged_pair("3-7"), (3, 7));
290 }
291
292 #[test]
293 #[should_panic]
294 fn test_str_to_ranged_pair_empty_input() {
295 str_to_ranged_pair("");
296 }
297
298 #[test]
299 #[should_panic]
300 fn test_str_to_ranged_pair_no_range() {
301 str_to_ranged_pair("-");
302 }
303
304 #[test]
305 #[should_panic]
306 fn test_str_to_ranged_pair_invalid_char() {
307 str_to_ranged_pair(";");
308 }
309
310 #[test]
311 #[should_panic]
312 fn test_str_to_ranged_pair_space() {
313 str_to_ranged_pair(" ");
314 }
315
316 #[test]
317 #[should_panic]
318 fn test_str_to_ranged_pair_tab() {
319 str_to_ranged_pair("\t");
320 }
321
322 #[test]
323 fn test_extract_ranged_pairs_basic_valid_inputs() {
324 assert_eq!(extract_ranged_pairs("1"), vec![(1, 1)]);
325 assert_eq!(extract_ranged_pairs("1-8"), vec![(1, 8)]);
326 assert_eq!(extract_ranged_pairs("5-9"), vec![(5, 9)]);
327 assert_eq!(extract_ranged_pairs("9-5"), vec![]);
328 assert_eq!(extract_ranged_pairs("-5"), vec![(1, 5)]);
329 assert_eq!(extract_ranged_pairs("5-"), vec![(5, std::usize::MAX)]);
330 }
331
332 #[test]
333 fn test_extract_ranged_pairs_ensure_no_sorting() {
334 assert_eq!(
335 extract_ranged_pairs("3,4,5-"),
336 vec![(3, 3), (4, 4), (5, std::usize::MAX)]
337 );
338 assert_eq!(
339 extract_ranged_pairs("5-,3,4"),
340 vec![(5, std::usize::MAX), (3, 3), (4, 4)]
341 );
342 assert_eq!(
343 extract_ranged_pairs("6-10,5-"),
344 vec![(6, 10), (5, std::usize::MAX)]
345 );
346 assert_eq!(
347 extract_ranged_pairs("7,6-10,5-"),
348 vec![(7, 7), (6, 10), (5, std::usize::MAX)]
349 );
350 }
351
352 #[test]
353 #[should_panic]
354 fn test_extract_ranged_pairs_empty() {
355 extract_ranged_pairs("");
356 }
357
358 #[test]
359 #[should_panic]
360 fn test_extract_ranged_pairs_bad_range() {
361 extract_ranged_pairs("-");
362 }
363
364 #[test]
365 fn test_merge_ranged_pairs() {
366 assert_eq!(
367 merge_ranged_pairs(extract_ranged_pairs("3,4,5-")),
368 vec![(3, std::usize::MAX)]
369 );
370 assert_eq!(
371 merge_ranged_pairs(extract_ranged_pairs("3-4,5-")),
372 vec![(3, std::usize::MAX)]
373 );
374 assert_eq!(
375 merge_ranged_pairs(extract_ranged_pairs("3-5,5-")),
376 vec![(3, std::usize::MAX)]
377 );
378 assert_eq!(
379 merge_ranged_pairs(extract_ranged_pairs("3-6,5-")),
380 vec![(3, std::usize::MAX)]
381 );
382 assert_eq!(
383 merge_ranged_pairs(extract_ranged_pairs("7,6-10,5-")),
384 vec![(5, std::usize::MAX)]
385 );
386 assert_eq!(
387 merge_ranged_pairs(extract_ranged_pairs("3-7,8,2-10,12-20")),
388 vec![(2, 10), (12, 20)]
389 );
390 assert_eq!(
391 merge_ranged_pairs(extract_ranged_pairs("3-7,8,2-10,11-20")),
392 vec![(2, 20)]
393 );
394 }
395
396 #[test]
397 fn test_process_line_utf8() {
398 let char_processor = CharUtf8LineProcessor {};
399 let ranged_pairs = extract_ranged_pairs(_STR_RANGES_01);
400 assert_eq!(
401 _STR_BIRDS_OUTPUT.as_bytes().to_vec(),
402 char_processor.process(
403 _STR_BIRDS,
404 &CharContext {
405 ranged_pairs: &ranged_pairs
406 }
407 )
408 );
409 }
410
411 #[test]
412 fn test_process_line_ascii() {
413 let char_processor = ByteLineProcessor {};
414 let ranged_pairs = extract_ranged_pairs(_STR_RANGES_01);
415 assert_eq!(
416 _STR_ALPHABET_OUTPUT.as_bytes().to_vec(),
417 char_processor.process(
418 _STR_ALPHABET,
419 &CharContext {
420 ranged_pairs: &ranged_pairs
421 }
422 )
423 );
424 }
425
426 #[test]
427 #[should_panic]
428 fn test_process_line_ascii_panic() {
429 let char_processor = ByteLineProcessor {};
430 let ranged_pairs = extract_ranged_pairs(_STR_RANGES_01);
431 assert_eq!(
432 _STR_BIRDS_OUTPUT.as_bytes().to_vec(),
433 char_processor.process(
434 _STR_BIRDS,
435 &CharContext {
436 ranged_pairs: &ranged_pairs
437 }
438 )
439 );
440 }
441
442 #[test]
443 fn test_process_lines_utf8_with_cursor() {
444 use std::io::{BufReader, BufWriter};
445 use std::io::prelude::*;
446
447 let input = BufReader::new(std::io::Cursor::new(_STR_BIRDS));
450 let mut out_cursor = std::io::Cursor::new(Vec::<u8>::new());
451
452 let ranged_pairs = extract_ranged_pairs(_STR_RANGES_01);
453 let char_processor = CharProcessor {};
454 char_processor.process_lines(
456 &CharUtf8LineProcessor {},
457 input,
458 &mut BufWriter::new(&mut out_cursor),
459 &CharContext {
460 ranged_pairs: &ranged_pairs,
461 },
462 );
463
464 out_cursor.seek(std::io::SeekFrom::Start(0)).unwrap();
465 let mut out = Vec::new();
467 out_cursor.read_to_end(&mut out).unwrap();
468 assert_eq!(_STR_BIRDS_OUTPUT.as_bytes().to_vec(), out);
469 }
470
471 #[test]
472 fn test_process_ascii_fields_for_line_ignored_delim() {
473 let line_processor = FieldUtf8LineProcessor {};
474 let line = "1234";
475 let delim = ":";
476 let ranged_pairs: Vec<(usize, usize)> = vec![(2, 2), (4, 6)];
477 assert_eq!(
478 vec![10],
479 line_processor.process(
480 line,
481 &FieldContext {
482 delim,
483 ranged_pairs: &ranged_pairs
484 }
485 )
486 );
487 }
488
489 #[test]
490 fn test_process_ascii_fields_for_line_leading_delim() {
491 let line_processor = FieldUtf8LineProcessor {};
492 let line = ":1234";
493 let delim = ":";
494 let ranged_pairs: Vec<(usize, usize)> = vec![(2, 2), (4, 6)];
495 assert_eq!(
496 "1234\n".as_bytes().to_vec(),
497 line_processor.process(
498 line,
499 &FieldContext {
500 delim,
501 ranged_pairs: &ranged_pairs
502 }
503 )
504 );
505 }
506
507 #[test]
508 fn test_process_ascii_fields_for_line_trailing_delim() {
509 let line_processor = FieldUtf8LineProcessor {};
510 let line = "1234:";
511 let delim = ":";
512 let ranged_pairs: Vec<(usize, usize)> = vec![(2, 2), (4, 6)];
513 assert_eq!(
514 "\n".as_bytes().to_vec(),
515 line_processor.process(
516 line,
517 &FieldContext {
518 delim,
519 ranged_pairs: &ranged_pairs
520 }
521 )
522 );
523 }
524
525 #[test]
526 fn test_process_ascii_fields_for_line_1st_field_empty() {
527 let line_processor = FieldUtf8LineProcessor {};
528 let line = ":1:2:3";
529 let delim = ":";
530 assert_eq!(
531 ":2\n".as_bytes().to_vec(),
532 line_processor.process(
533 line,
534 &FieldContext {
535 delim,
536 ranged_pairs: &vec![(1, 1), (3, 3)]
537 },
538 )
539 );
540 assert_eq!(
541 ":2:3\n".as_bytes().to_vec(),
542 line_processor.process(
543 line,
544 &FieldContext {
545 delim,
546 ranged_pairs: &vec![(1, 1), (3, 3), (4, 4)]
547 }
548 )
549 );
550 assert_eq!(
551 ":3\n".as_bytes().to_vec(),
552 line_processor.process(
553 line,
554 &FieldContext {
555 delim,
556 ranged_pairs: &vec![(1, 1), (4, 4)]
557 }
558 )
559 );
560 assert_eq!(
561 ":2:3\n".as_bytes().to_vec(),
562 line_processor.process(
563 line,
564 &FieldContext {
565 delim,
566 ranged_pairs: &vec![(1, 1), (3, 4)]
567 }
568 )
569 );
570 assert_eq!(
571 ":2:3\n".as_bytes().to_vec(),
572 line_processor.process(
573 line,
574 &FieldContext {
575 delim,
576 ranged_pairs: &vec![(1, 1), (3, 5)]
577 }
578 )
579 );
580 }
581
582 #[test]
583 fn test_process_utf8_fields_for_line_1st_field_empty() {
584 let line_processor = FieldUtf8LineProcessor {};
585 let line = ":ðĢ:ðĨ:ð";
586 let delim = ":";
587 assert_eq!(
588 ":ðĨ\n".as_bytes().to_vec(),
589 line_processor.process(
590 line,
591 &FieldContext {
592 delim,
593 ranged_pairs: &vec![(1, 1), (3, 3)]
594 }
595 )
596 );
597 assert_eq!(
598 ":ðĨ:ð\n".as_bytes().to_vec(),
599 line_processor.process(
600 line,
601 &FieldContext {
602 delim,
603 ranged_pairs: &vec![(1, 1), (3, 3), (4, 4)]
604 }
605 )
606 );
607 assert_eq!(
608 ":ð\n".as_bytes().to_vec(),
609 line_processor.process(
610 line,
611 &FieldContext {
612 delim,
613 ranged_pairs: &vec![(1, 1), (4, 4)]
614 }
615 )
616 );
617 assert_eq!(
618 ":ðĨ:ð\n".as_bytes().to_vec(),
619 line_processor.process(
620 line,
621 &FieldContext {
622 delim,
623 ranged_pairs: &vec![(1, 1), (3, 4)]
624 }
625 )
626 );
627 assert_eq!(
628 ":ðĨ:ð\n".as_bytes().to_vec(),
629 line_processor.process(
630 line,
631 &FieldContext {
632 delim,
633 ranged_pairs: &vec![(1, 1), (3, 5)]
634 }
635 )
636 );
637 }
638}