1use filetime::FileTime;
9use hmac_sha256::Hash;
10use minicbor::{Decode, Encode};
11use smallvec::{smallvec, SmallVec};
12
13use std::collections::btree_map::Entry;
14use std::collections::BTreeMap;
15use std::fmt;
16use std::fs::File;
17use std::io::{BufRead, BufReader, BufWriter, Read, Seek, SeekFrom};
18use std::ops::Bound::Included;
19use std::path::{Path, PathBuf};
20use std::string::FromUtf8Error;
21use std::time::SystemTime;
22
23type FrameHandle = u32;
25
26#[derive(Debug)]
27pub enum Error {
28 OutOfBoundsError { begin: isize, end: isize },
29 InvalidUtf8Byte(usize),
30 EmptyText,
31 IOError(std::io::Error),
32 Utf8Error(FromUtf8Error),
33 InvalidHandle,
34 IndexError,
35 NotLoaded,
36 NoLineIndex,
37}
38
39impl fmt::Display for Error {
40 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
42 match self {
43 Self::OutOfBoundsError { begin, end } => write!(f, "Out of Bounds ({},{})", begin, end),
44 Self::InvalidUtf8Byte(byte) => write!(
45 f,
46 "Byte does not correspond with utf-8 character boundary ({})",
47 byte
48 ),
49 Self::EmptyText => write!(f, "text is empty"),
50 Self::IOError(e) => write!(f, "{}", e),
51 Self::Utf8Error(e) => write!(f, "{}", e),
52 Self::NotLoaded => write!(f, "text not loaded"),
53 Self::InvalidHandle => write!(f, "Invalid handle"),
54 Self::IndexError => write!(f, "Index I/O error"),
55 Self::NoLineIndex => write!(f, "No line index enabled"),
56 }
57 }
58}
59
60impl std::error::Error for Error {}
61
62#[derive(Debug, Clone, Decode, Encode)]
63pub struct PositionData<T>
64where
65 T: Eq + Ord + Copy,
66{
67 #[n(0)]
69 charpos: T,
70
71 #[n(1)]
73 bytepos: T,
74
75 #[n(2)]
77 size: u8,
78}
79
80pub trait Position {
81 fn charpos(&self) -> usize;
82 fn bytepos(&self) -> usize;
83 fn size(&self) -> u8;
84}
85
86impl Position for PositionData<u32> {
87 fn charpos(&self) -> usize {
88 self.charpos as usize
89 }
90 fn bytepos(&self) -> usize {
91 self.bytepos as usize
92 }
93 fn size(&self) -> u8 {
94 self.size
95 }
96}
97
98impl Position for PositionData<u64> {
99 fn charpos(&self) -> usize {
100 self.charpos as usize
101 }
102 fn bytepos(&self) -> usize {
103 self.bytepos as usize
104 }
105 fn size(&self) -> u8 {
106 self.size
107 }
108}
109
110pub struct TextFile {
113 path: PathBuf,
115
116 frames: Vec<TextFrame>,
118
119 frametable: BTreeMap<usize, SmallVec<[FrameHandle; 1]>>,
121
122 positionindex: PositionIndex,
124
125 metadata: std::fs::Metadata,
127}
128
129struct TextFrame {
131 beginbyte: usize,
132 endbyte: usize,
133 text: String,
134}
135
136#[derive(Debug, Clone, Decode, Encode)]
137struct PositionIndex {
138 #[n(0)]
140 charsize: usize,
141
142 #[n(1)]
144 bytesize: usize,
145
146 #[n(2)]
148 positions: Positions,
149
150 #[n(3)]
152 checksum: [u8; 32],
153
154 #[n(4)]
156 lines: Lines,
157}
158
159impl Default for PositionIndex {
160 fn default() -> Self {
161 Self {
162 charsize: 0,
163 bytesize: 0,
164 lines: Lines::default(),
165 positions: Positions::Large(Vec::default()),
166 checksum: Default::default(),
167 }
168 }
169}
170
171#[derive(Debug, Clone, Decode, Encode)]
172pub enum Positions {
174 #[n(0)]
175 Small(#[n(0)] Vec<PositionData<u16>>),
176
177 #[n(1)]
178 Large(#[n(0)] Vec<PositionData<u32>>),
179
180 #[n(2)]
181 Huge(#[n(0)] Vec<PositionData<u64>>),
182}
183
184impl Positions {
185 pub fn new(filesize: usize) -> Self {
186 if filesize < 65536 {
187 Self::Small(Vec::new())
188 } else if filesize < 4294967296 {
189 Self::Large(Vec::new())
190 } else {
191 Self::Huge(Vec::new())
192 }
193 }
194
195 pub fn len(&self) -> usize {
196 match self {
197 Self::Small(positions) => positions.len(),
198 Self::Large(positions) => positions.len(),
199 Self::Huge(positions) => positions.len(),
200 }
201 }
202
203 pub fn bytepos(&self, index: usize) -> Option<usize> {
204 match self {
205 Self::Small(positions) => positions.get(index).map(|x| x.bytepos as usize),
206 Self::Large(positions) => positions.get(index).map(|x| x.bytepos as usize),
207 Self::Huge(positions) => positions.get(index).map(|x| x.bytepos as usize),
208 }
209 }
210 pub fn charpos(&self, index: usize) -> Option<usize> {
211 match self {
212 Self::Small(positions) => positions.get(index).map(|x| x.charpos as usize),
213 Self::Large(positions) => positions.get(index).map(|x| x.charpos as usize),
214 Self::Huge(positions) => positions.get(index).map(|x| x.charpos as usize),
215 }
216 }
217 pub fn size(&self, index: usize) -> Option<u8> {
218 match self {
219 Self::Small(positions) => positions.get(index).map(|x| x.size),
220 Self::Large(positions) => positions.get(index).map(|x| x.size),
221 Self::Huge(positions) => positions.get(index).map(|x| x.size),
222 }
223 }
224
225 pub fn binary_search(&self, charpos: usize) -> Result<usize, usize> {
226 match self {
227 Self::Small(positions) => positions
228 .binary_search_by_key(&charpos, |posdata: &PositionData<u16>| {
229 posdata.charpos as usize
230 }),
231 Self::Large(positions) => positions
232 .binary_search_by_key(&charpos, |posdata: &PositionData<u32>| {
233 posdata.charpos as usize
234 }),
235 Self::Huge(positions) => positions
236 .binary_search_by_key(&charpos, |posdata: &PositionData<u64>| {
237 posdata.charpos as usize
238 }),
239 }
240 }
241
242 pub fn binary_search_by_bytepos(&self, bytepos: usize) -> Result<usize, usize> {
243 match self {
244 Self::Small(positions) => positions
245 .binary_search_by_key(&bytepos, |posdata: &PositionData<u16>| {
246 posdata.bytepos as usize
247 }),
248 Self::Large(positions) => positions
249 .binary_search_by_key(&bytepos, |posdata: &PositionData<u32>| {
250 posdata.bytepos as usize
251 }),
252 Self::Huge(positions) => positions
253 .binary_search_by_key(&bytepos, |posdata: &PositionData<u64>| {
254 posdata.bytepos as usize
255 }),
256 }
257 }
258
259 pub fn push(&mut self, charpos: usize, bytepos: usize, charsize: u8) {
260 match self {
261 Self::Small(positions) => positions.push(PositionData {
262 charpos: charpos as u16,
263 bytepos: bytepos as u16,
264 size: charsize,
265 }),
266 Self::Large(positions) => positions.push(PositionData {
267 charpos: charpos as u32,
268 bytepos: bytepos as u32,
269 size: charsize,
270 }),
271 Self::Huge(positions) => positions.push(PositionData {
272 charpos: charpos as u64,
273 bytepos: bytepos as u64,
274 size: charsize,
275 }),
276 }
277 }
278}
279
280#[derive(Debug, Clone, Decode, Encode)]
281pub enum Lines {
284 #[n(0)]
285 Small(#[n(0)] Vec<u16>),
286
287 #[n(1)]
288 Large(#[n(0)] Vec<u32>),
289
290 #[n(2)]
291 Huge(#[n(0)] Vec<u64>),
292}
293
294impl Lines {
295 pub fn new(filesize: usize) -> Self {
296 if filesize < 65536 {
297 Self::Small(Vec::new())
298 } else if filesize < 4294967296 {
299 Self::Large(Vec::new())
300 } else {
301 Self::Huge(Vec::new())
302 }
303 }
304
305 pub fn len(&self) -> usize {
307 match self {
308 Self::Small(positions) => positions.len(),
309 Self::Large(positions) => positions.len(),
310 Self::Huge(positions) => positions.len(),
311 }
312 }
313
314 pub fn get(&self, index: usize) -> Option<usize> {
316 match self {
317 Self::Small(positions) => positions.get(index).map(|x| *x as usize),
318 Self::Large(positions) => positions.get(index).map(|x| *x as usize),
319 Self::Huge(positions) => positions.get(index).map(|x| *x as usize),
320 }
321 }
322
323 pub fn push(&mut self, line: usize) {
324 match self {
325 Self::Small(positions) => positions.push(line as u16),
326 Self::Large(positions) => positions.push(line as u32),
327 Self::Huge(positions) => positions.push(line as u64),
328 }
329 }
330}
331
332impl Default for Lines {
333 fn default() -> Self {
334 Self::Large(Vec::new())
335 }
336}
337
338#[derive(Clone, Copy, Debug, PartialEq)]
339pub enum TextFileMode {
341 NoLineIndex,
343
344 WithLineIndex,
346}
347
348impl Default for TextFileMode {
349 fn default() -> Self {
350 Self::WithLineIndex
351 }
352}
353
354impl TextFile {
355 pub fn new(
361 path: impl Into<PathBuf>,
362 indexpath: Option<&Path>,
363 mode: TextFileMode,
364 ) -> Result<Self, Error> {
365 let path: PathBuf = path.into();
366 let metadata = std::fs::metadata(path.as_path()).map_err(|e| Error::IOError(e))?;
367 let mut build_index = true;
368 let mut positionindex = PositionIndex::default();
369 if let Some(indexpath) = indexpath.as_ref() {
370 if indexpath.exists() {
371 let indexmetadata = std::fs::metadata(indexpath).map_err(|e| Error::IOError(e))?;
372 if FileTime::from_last_modification_time(&indexmetadata)
373 >= FileTime::from_last_modification_time(&metadata)
374 {
375 positionindex = PositionIndex::from_file(indexpath)?;
376 build_index = false;
377 }
378 }
379 }
380 if build_index {
381 positionindex = PositionIndex::new(path.as_path(), metadata.len(), mode)?;
382 }
383 if let Some(indexpath) = indexpath.as_ref() {
384 positionindex.to_file(indexpath)?;
385 }
386 Ok(Self {
387 path,
388 frames: Vec::new(),
389 frametable: BTreeMap::new(),
390 positionindex,
391 metadata,
392 })
393 }
394
395 pub fn path(&self) -> &Path {
397 self.path.as_path()
398 }
399
400 pub fn get(&self, begin: isize, end: isize) -> Result<&str, Error> {
406 let (beginchar, endchar) = self.absolute_pos(begin, end)?;
407 let beginbyte = self.chars_to_bytes(beginchar)?;
408 let endbyte = self.chars_to_bytes(endchar)?;
409 self.get_byterange_unchecked(beginbyte, endbyte)
410 }
411
412 pub fn get_byterange(&self, beginbyte: usize, endbyte: usize) -> Result<&str, Error> {
414 self.frame(beginbyte, endbyte)
415 .ok_or(Error::NotLoaded)
416 .map(|frame| {
417 self.bytes_to_chars(beginbyte - frame.beginbyte)?;
419 self.bytes_to_chars(endbyte - frame.beginbyte)?;
420 Ok(
421 &frame.text.as_str()
422 [(beginbyte - frame.beginbyte)..(endbyte - frame.beginbyte)],
423 )
424 })?
425 }
426
427 pub fn get_byterange_unchecked(&self, beginbyte: usize, endbyte: usize) -> Result<&str, Error> {
430 self.frame(beginbyte, endbyte)
431 .ok_or(Error::NotLoaded)
432 .map(|frame| {
433 &frame.text.as_str()[(beginbyte - frame.beginbyte)..(endbyte - frame.beginbyte)]
434 })
435 }
436
437 pub fn get_lines(&self, begin: isize, end: isize) -> Result<&str, Error> {
446 let (beginbyte, endbyte) = self.line_range_to_byte_range(begin, end)?;
447 self.get_byterange_unchecked(beginbyte, endbyte)
448 }
449
450 pub fn get_or_load(&mut self, begin: isize, end: isize) -> Result<&str, Error> {
456 let (beginchar, endchar) = self.absolute_pos(begin, end)?;
457 let beginbyte = self.chars_to_bytes(beginchar)?;
458 let endbyte = self.chars_to_bytes(endchar)?;
459 match self.framehandle(beginbyte, endbyte) {
460 Some(framehandle) => {
461 let frame = self.resolve(framehandle)?;
462 Ok(
463 &frame.text.as_str()
464 [(beginbyte - frame.beginbyte)..(endbyte - frame.beginbyte)],
465 )
466 }
467 None => {
468 self.load_abs(beginchar, endchar)?;
469 self.get(begin, end)
470 }
471 }
472 }
473
474 pub fn get_or_load_lines(&mut self, begin: isize, end: isize) -> Result<&str, Error> {
483 let beginbyte = self.line_to_bytes(begin)?;
484 let endbyte = if end == 0 {
485 self.positionindex.bytesize
486 } else {
487 self.line_to_bytes(end)?
488 };
489 if let Some(framehandle) = self.framehandle(beginbyte, endbyte) {
490 let frame = self.resolve(framehandle)?;
491 return Ok(
492 &frame.text.as_str()[(beginbyte - frame.beginbyte)..(endbyte - frame.beginbyte)]
493 );
494 }
495 self.load_frame(beginbyte, endbyte)?;
496 if let Some(frame) = self.frame(beginbyte, endbyte) {
497 Ok(&frame.text.as_str()[(beginbyte - frame.beginbyte)..(endbyte - frame.beginbyte)])
498 } else {
499 Err(Error::NotLoaded)
500 }
501 }
502
503 pub fn load(&mut self, begin: isize, end: isize) -> Result<(), Error> {
508 let (beginchar, endchar) = self.absolute_pos(begin, end)?;
509 self.load_abs(beginchar, endchar)
510 }
511
512 fn resolve(&self, handle: FrameHandle) -> Result<&TextFrame, Error> {
514 if let Some(frame) = self.frames.get(handle as usize) {
515 Ok(frame)
516 } else {
517 Err(Error::InvalidHandle)
518 }
519 }
520
521 fn framehandle(&self, beginbyte: usize, endbyte: usize) -> Option<FrameHandle> {
523 let mut iter = self.frametable.range((Included(&0), Included(&beginbyte)));
524 while let Some((_, framehandles)) = iter.next_back() {
527 for handle in framehandles {
528 if let Some(frame) = self.frames.get(*handle as usize) {
529 if frame.endbyte >= endbyte {
530 return Some(*handle);
531 }
532 }
533 }
534 }
535 None
536 }
537
538 fn frame(&self, beginbyte: usize, endbyte: usize) -> Option<&TextFrame> {
540 let mut iter = self.frametable.range((Included(&0), Included(&beginbyte)));
541 while let Some((_, framehandles)) = iter.next_back() {
544 for handle in framehandles {
545 if let Some(frame) = self.frames.get(*handle as usize) {
546 if frame.endbyte >= endbyte {
547 return Some(frame);
548 }
549 }
550 }
551 }
552 None
553 }
554
555 fn load_abs(&mut self, beginchar: usize, endchar: usize) -> Result<(), Error> {
557 let beginbyte = self.chars_to_bytes(beginchar)?;
558 let endbyte = self.chars_to_bytes(endchar)?;
559 match self.load_frame(beginbyte, endbyte) {
560 Ok(_handle) => Ok(()),
561 Err(e) => Err(e),
562 }
563 }
564
565 fn load_frame(&mut self, beginbyte: usize, endbyte: usize) -> Result<FrameHandle, Error> {
567 if beginbyte > endbyte {
568 return Err(Error::OutOfBoundsError {
569 begin: beginbyte as isize,
570 end: endbyte as isize,
571 });
572 }
573 let mut buffer: Vec<u8> = vec![0; endbyte - beginbyte];
574 let mut file = File::open(self.path.as_path()).map_err(|e| Error::IOError(e))?;
575 file.seek(SeekFrom::Start(beginbyte as u64))
576 .map_err(|e| Error::IOError(e))?;
577 file.read_exact(&mut buffer)
578 .map_err(|e| Error::IOError(e))?;
579 let frame = TextFrame {
580 beginbyte,
581 endbyte,
582 text: String::from_utf8(buffer).map_err(|e| Error::Utf8Error(e))?,
583 };
584 self.frames.push(frame);
585 let handle = (self.frames.len() - 1) as FrameHandle;
586 match self.frametable.entry(beginbyte) {
587 Entry::Occupied(mut entry) => entry.get_mut().push(handle),
588 Entry::Vacant(entry) => {
589 entry.insert(smallvec!(handle));
590 }
591 }
592 Ok(handle)
593 }
594
595 pub fn chars_to_bytes(&self, charpos: usize) -> Result<usize, Error> {
597 match self.positionindex.positions.binary_search(charpos) {
598 Ok(index) => {
599 Ok(self
601 .positionindex
602 .positions
603 .bytepos(index)
604 .expect("position should exist"))
605 }
606 Err(0) => {
607 Err(Error::EmptyText)
609 }
610 Err(index) => {
611 let charpos2 = self
613 .positionindex
614 .positions
615 .charpos(index - 1)
616 .expect("position should exist");
617 let charoffset = charpos - charpos2;
618 let bytepos = self
619 .positionindex
620 .positions
621 .bytepos(index - 1)
622 .expect("position should exist")
623 + (self
624 .positionindex
625 .positions
626 .size(index - 1)
627 .expect("position should exist") as usize
628 * charoffset);
629 if bytepos > self.positionindex.bytesize {
630 Err(Error::OutOfBoundsError {
631 begin: bytepos as isize,
632 end: 0,
633 })
634 } else {
635 Ok(bytepos)
636 }
637 }
638 }
639 }
640
641 pub fn bytes_to_chars(&self, bytepos: usize) -> Result<usize, Error> {
643 if bytepos > self.positionindex.bytesize {
644 return Err(Error::OutOfBoundsError {
645 begin: bytepos as isize,
646 end: 0,
647 });
648 }
649
650 match self
651 .positionindex
652 .positions
653 .binary_search_by_bytepos(bytepos)
654 {
655 Ok(index) => Ok(self.positionindex.positions.charpos(index).unwrap()),
656 Err(0) => {
657 Err(Error::EmptyText)
659 }
660 Err(index) => {
661 let prev_byte = self.positionindex.positions.bytepos(index - 1).unwrap();
662 let prev_char = self.positionindex.positions.charpos(index - 1).unwrap();
663 let size = self.positionindex.positions.size(index - 1).unwrap() as usize;
664 if (bytepos - prev_byte) % size == 0 {
665 Ok(prev_char + (bytepos - prev_byte) / size)
666 } else {
667 Err(Error::InvalidUtf8Byte(bytepos))
668 }
669 }
670 }
671 }
672
673 pub fn line_to_bytes(&self, line: isize) -> Result<usize, Error> {
677 let num_lines = self.positionindex.lines.len();
678
679 if num_lines == 0 {
680 return Err(Error::NoLineIndex);
681 }
682
683 let line = if line < 0 {
685 let abs = line.unsigned_abs();
686 if abs > num_lines {
687 return Err(Error::OutOfBoundsError {
688 begin: line,
689 end: 0,
690 });
691 }
692 num_lines - abs
693 } else {
694 line as usize
695 };
696
697 if line == num_lines {
699 return Ok(self.positionindex.bytesize);
700 }
701
702 self.positionindex
703 .lines
704 .get(line)
705 .ok_or(Error::OutOfBoundsError {
706 begin: line as isize,
707 end: 0,
708 })
709 }
710
711 pub fn line_range_to_byte_range(
712 &self,
713 begin: isize,
714 end: isize,
715 ) -> Result<(usize, usize), Error> {
716 let beginbyte = self.line_to_bytes(begin)?;
717 let endbyte = if end == 0 {
718 self.positionindex.bytesize
719 } else {
720 self.line_to_bytes(end)?
721 };
722
723 Ok((beginbyte, endbyte))
724 }
725
726 pub fn absolute_pos(&self, mut begin: isize, mut end: isize) -> Result<(usize, usize), Error> {
731 if begin < 0 {
732 begin += self.positionindex.charsize as isize;
733 }
734
735 if end <= 0 {
736 end += self.positionindex.charsize as isize;
737 }
738
739 if begin < 0 || end < 0 || begin > end {
740 return Err(Error::OutOfBoundsError { begin, end });
741 }
742
743 Ok((begin as usize, end as usize))
744 }
745
746 pub fn absolute_line_pos(
754 &self,
755 mut begin: isize,
756 mut end: isize,
757 ) -> Result<(usize, usize), Error> {
758 if begin < 0 {
759 begin += self.positionindex.lines.len() as isize;
760 }
761
762 if end <= 0 {
763 end += self.positionindex.lines.len() as isize;
764 }
765
766 if begin < 0 || end < 0 || begin > end {
767 return Err(Error::OutOfBoundsError { begin, end });
768 }
769
770 let beginbyte = self.line_to_bytes(begin)?;
771 let endbyte = self.line_to_bytes(end)?;
772
773 Ok((
774 self.bytes_to_chars(beginbyte)?,
775 self.bytes_to_chars(endbyte)?,
776 ))
777 }
778
779 pub fn len(&self) -> usize {
781 self.positionindex.charsize
782 }
783
784 pub fn len_utf8(&self) -> usize {
786 self.positionindex.bytesize
787 }
788
789 pub fn mtime(&self) -> u64 {
791 if let Ok(modified) = self.metadata.modified() {
792 modified
793 .duration_since(SystemTime::UNIX_EPOCH)
794 .expect("invalid file timestamp (before unix epoch)")
795 .as_secs()
796 } else {
797 0
798 }
799 }
800
801 pub fn checksum(&self) -> &[u8; 32] {
803 &self.positionindex.checksum
804 }
805
806 pub fn checksum_digest(&self) -> String {
808 format!("{:x}", HexDigest(self.checksum()))
809 }
810}
811
812impl PositionIndex {
813 fn new(textfile: &Path, filesize: u64, options: TextFileMode) -> Result<Self, Error> {
815 let mut charpos = 0;
816 let mut bytepos = 0;
817 let mut prevcharsize = 0;
818 let textfile = File::open(textfile).map_err(|e| Error::IOError(e))?;
819
820 let mut reader = BufReader::new(textfile);
822 let mut positions = Positions::new(filesize as usize);
823 let mut lines = Lines::new(filesize as usize);
824 let mut line = String::new();
825 let mut checksum = Hash::new();
826 loop {
827 let read_bytes = reader.read_line(&mut line).map_err(|e| Error::IOError(e))?;
828 if read_bytes == 0 {
829 break;
831 } else {
832 checksum.update(&line);
833 if options == TextFileMode::WithLineIndex {
834 lines.push(bytepos);
835 }
836 for char in line.chars() {
837 let charsize = char.len_utf8() as u8;
838 if charsize != prevcharsize {
839 positions.push(charpos, bytepos, charsize);
840 }
841 charpos += 1;
842 bytepos += charsize as usize;
843 prevcharsize = charsize;
844 }
845 line.clear();
847 }
848 }
849 let checksum = checksum.finalize();
850 if options == TextFileMode::WithLineIndex {
851 lines.push(bytepos);
853 }
854 Ok(PositionIndex {
855 charsize: charpos,
856 bytesize: bytepos,
857 positions,
858 checksum,
859 lines,
860 })
861 }
862
863 fn to_file(&mut self, path: &Path) -> Result<(), Error> {
865 let file = File::create(path).map_err(|e| Error::IOError(e))?;
866 let writer = BufWriter::new(file);
867 let writer = minicbor::encode::write::Writer::new(writer);
868 minicbor::encode(self, writer).map_err(|_| Error::IndexError)?;
869 Ok(())
870 }
871
872 fn from_file(path: &Path) -> Result<Self, Error> {
874 let file = File::open(path).map_err(|e| Error::IOError(e))?;
875 let mut reader = BufReader::new(file);
876 let mut buffer: Vec<u8> = Vec::new(); reader
878 .read_to_end(&mut buffer)
879 .map_err(|e| Error::IOError(e))?;
880 Ok(minicbor::decode(&buffer).map_err(|_| Error::IndexError)?)
881 }
882}
883
884struct HexDigest<'a>(&'a [u8; 32]);
885
886impl fmt::LowerHex for HexDigest<'_> {
888 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
889 for byte in self.0 {
890 write!(f, "{:02x}", byte)?;
891 }
892 Ok(())
893 }
894}
895
896#[cfg(test)]
897mod tests {
898 use super::*;
899 use std::io::Write;
900 use tempfile::NamedTempFile;
901
902 const EXAMPLE_ASCII_TEXT: &str = "
904Article 1
905
906All human beings are born free and equal in dignity and rights. They are endowed with reason and conscience and should act towards one another in a spirit of brotherhood.
907
908Article 2
909
910Everyone is entitled to all the rights and freedoms set forth in this Declaration, without distinction of any kind, such as race, colour, sex, language, religion, political or other opinion, national or social origin, property, birth or other status. Furthermore, no distinction shall be made on the basis of the political, jurisdictional or international status of the country or territory to which a person belongs, whether it be independent, trust, non-self-governing or under any other limitation of sovereignty.
911
912Article 3
913
914Everyone has the right to life, liberty and security of person.
915
916Article 4
917
918No one shall be held in slavery or servitude; slavery and the slave trade shall be prohibited in all their forms.
919";
920
921 const EXAMPLE_UNICODE_TEXT: &str = "
923第一条
924
925人人生而自由,在尊严和权利上一律平等。他们赋有理性和良心,并应以兄弟关系的精神相对待。
926第二条
927
928人人有资格享有本宣言所载的一切权利和自由,不分种族、肤色、性别、语言、宗教、政治或其他见解、国籍或社会出身、财产、出生或其他身分等任何区别。
929
930并且不得因一人所属的国家或领土的政治的、行政的或者国际的地位之不同而有所区别,无论该领土是独立领土、托管领土、非自治领土或者处于其他任何主权受限制的情况之下。
931第三条
932
933人人有权享有生命、自由和人身安全。
934第四条
935
936任何人不得使为奴隶或奴役;一切形式的奴隶制度和奴隶买卖,均应予以禁止。
937";
938 const EXAMPLE_3_TEXT: &str = "ПРИВЕТ";
939
940 fn setup_ascii() -> NamedTempFile {
941 let mut file = tempfile::NamedTempFile::new().expect("temp file");
942 write!(file, "{}", EXAMPLE_ASCII_TEXT).expect("write must work");
943 file
944 }
945
946 fn setup_unicode() -> NamedTempFile {
947 let mut file = tempfile::NamedTempFile::new().expect("temp file");
948 write!(file, "{}", EXAMPLE_UNICODE_TEXT).expect("write must work");
949 file
950 }
951
952 fn setup_3() -> NamedTempFile {
953 let mut file = tempfile::NamedTempFile::new().expect("temp file");
954 write!(file, "{}", EXAMPLE_3_TEXT).expect("write must work");
955 file
956 }
957
958 fn setup_empty() -> NamedTempFile {
959 let file = tempfile::NamedTempFile::new().expect("temp file");
960 file
961 }
962
963 #[test]
964 pub fn test001_init_ascii() {
965 let file = setup_ascii();
966 let textfile =
967 TextFile::new(file.path(), None, Default::default()).expect("file must load");
968 assert_eq!(textfile.len(), 914);
969 assert_eq!(textfile.len_utf8(), 914);
970 }
971
972 #[test]
973 pub fn test001_init_unicode() {
974 let file = setup_unicode();
975 let textfile =
976 TextFile::new(file.path(), None, Default::default()).expect("file must load");
977 assert_eq!(textfile.len(), 271);
978 assert_eq!(textfile.len_utf8(), 771);
979 }
980
981 #[test]
982 pub fn test002_load_ascii() {
983 let file = setup_ascii();
984 let mut textfile =
985 TextFile::new(file.path(), None, Default::default()).expect("file must load");
986 let text = textfile.get_or_load(0, 0).expect("text should exist");
987 assert_eq!(text, EXAMPLE_ASCII_TEXT);
988 }
989
990 #[test]
991 pub fn test002_load_ascii_explicit() {
992 let file = setup_ascii();
993 let mut textfile =
994 TextFile::new(file.path(), None, Default::default()).expect("file must load");
995 assert!(textfile.load(0, 0).is_ok());
996 let text = textfile.get(0, 0).expect("text should exist");
997 assert_eq!(text, EXAMPLE_ASCII_TEXT);
998 }
999
1000 #[test]
1001 pub fn test002_load_unicode() {
1002 let file = setup_unicode();
1003 let mut textfile =
1004 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1005 let text = textfile.get_or_load(0, 0).expect("text should exist");
1006 assert_eq!(text, EXAMPLE_UNICODE_TEXT);
1007 }
1008
1009 #[test]
1010 pub fn test002_load_unicode_tiny() {
1011 let file = setup_3();
1012 let mut textfile =
1013 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1014 let text = textfile.get_or_load(0, 0).expect("text should exist");
1015 assert_eq!(text, EXAMPLE_3_TEXT);
1016 }
1017
1018 #[test]
1019 pub fn test003_subpart_of_loaded_frame() {
1020 let file = setup_ascii();
1021 let mut textfile =
1022 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1023 assert!(textfile.load(0, 0).is_ok());
1024 let text = textfile.get(1, 10).expect("text should exist");
1025 assert_eq!(text, "Article 1");
1026 }
1027
1028 #[test]
1029 pub fn test004_excerpt_in_frame() {
1030 let file = setup_ascii();
1031 let mut textfile =
1032 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1033 let text = textfile.get_or_load(1, 10).expect("text should exist");
1034 assert_eq!(text, "Article 1");
1035 }
1036
1037 #[test]
1038 pub fn test004_end_excerpt_in_frame() {
1039 let file = setup_ascii();
1040 let mut textfile =
1041 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1042 let text = textfile.get_or_load(-7, 0).expect("text should exist");
1043 assert_eq!(text, "forms.\n");
1044 }
1045
1046 #[test]
1047 pub fn test004_excerpt_in_frame_unicode() {
1048 let file = setup_unicode();
1049 let mut textfile =
1050 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1051 let text = textfile.get_or_load(1, 4).expect("text should exist");
1052 assert_eq!(text, "第一条");
1053 }
1054
1055 #[test]
1056 pub fn test004_end_excerpt_in_frame_unicode() {
1057 let file = setup_unicode();
1058 let mut textfile =
1059 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1060 let text = textfile.get_or_load(-3, 0).expect("text should exist");
1061 assert_eq!(text, "止。\n");
1062 }
1063
1064 #[test]
1065 pub fn test005_out_of_bounds() {
1066 let file = setup_ascii();
1067 let mut textfile =
1068 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1069 assert!(textfile.load(0, 0).is_ok());
1070 assert!(textfile.get(1, 999).is_err());
1071 }
1072
1073 #[test]
1074 pub fn test006_checksum() {
1075 let file = setup_ascii();
1076 let textfile =
1086 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1087 assert_eq!(
1088 textfile.checksum_digest(),
1089 "c6b079e561f19702d63111a3201d4850e9649b8a3ef1929d6530a780f3815215"
1090 );
1091 }
1092
1093 #[test]
1094 pub fn test007_positionindex_size() {
1095 let file = setup_3();
1096 let mut textfile =
1097 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1098 assert!(textfile.load(0, 0).is_ok());
1099 assert_eq!(textfile.positionindex.positions.len(), 1);
1100 }
1101
1102 #[test]
1103 pub fn test008_line_ascii() {
1104 let file = setup_ascii();
1105 let mut textfile =
1106 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1107 let text = textfile.get_or_load_lines(1, 2).expect("text should exist"); assert_eq!(text, "Article 1\n");
1109 }
1110
1111 #[test]
1112 pub fn test008_empty_line_ascii() {
1113 let file = setup_ascii();
1114 let mut textfile =
1115 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1116 let text = textfile.get_or_load_lines(0, 1).expect("text should exist"); assert_eq!(text, "\n");
1118 }
1119
1120 #[test]
1121 pub fn test008_empty_last_line_ascii() {
1122 let file = setup_ascii();
1123 let mut textfile =
1124 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1125 let text = textfile
1126 .get_or_load_lines(-1, 0)
1127 .expect("text should exist"); assert_eq!(text, "");
1129 }
1130
1131 #[test]
1132 pub fn test008_empty_last_line() {
1133 let file = setup_ascii();
1134 let mut textfile =
1135 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1136 let text = textfile
1137 .get_or_load_lines(-2, -1)
1138 .expect("text should exist");
1139 assert_eq!(text, "No one shall be held in slavery or servitude; slavery and the slave trade shall be prohibited in all their forms.\n");
1140 }
1141
1142 #[test]
1143 pub fn test008_all_lines() {
1144 let file = setup_unicode();
1145 let mut textfile =
1146 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1147 assert!(textfile.load(0, 0).is_ok());
1148 let text = textfile.get_lines(0, 0).expect("text shoulde exist");
1149 assert_eq!(text, EXAMPLE_UNICODE_TEXT);
1150 }
1151
1152 #[test]
1153 pub fn test009_line_out_of_bounds() {
1154 let file = setup_ascii();
1155 let mut textfile =
1156 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1157 assert!(textfile.load(0, 0).is_ok());
1158 assert!(textfile.get_lines(1, 999).is_err());
1159 }
1160
1161 #[test]
1162 pub fn test010_bytes_to_chars_ascii() {
1163 let file = setup_ascii();
1164 let textfile =
1165 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1166 assert_eq!(textfile.bytes_to_chars(0).unwrap(), 0);
1168 assert_eq!(textfile.bytes_to_chars(10).unwrap(), 10);
1169 assert_eq!(textfile.bytes_to_chars(914).unwrap(), 914);
1170 }
1171
1172 #[test]
1173 pub fn test010_bytes_to_chars_unicode() {
1174 let file = setup_unicode();
1175 let textfile =
1176 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1177 assert_eq!(textfile.bytes_to_chars(0).unwrap(), 0);
1179 assert_eq!(textfile.bytes_to_chars(1).unwrap(), 1);
1180 assert_eq!(textfile.bytes_to_chars(4).unwrap(), 2);
1183 assert_eq!(textfile.bytes_to_chars(7).unwrap(), 3);
1184 assert_eq!(textfile.bytes_to_chars(771).unwrap(), 271);
1186 }
1187
1188 #[test]
1189 pub fn test010_bytes_to_chars_roundtrip() {
1190 let file = setup_unicode();
1191 let textfile =
1192 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1193 for charpos in [0, 1, 10, 50, 100, 200, 271] {
1195 let bytepos = textfile.chars_to_bytes(charpos).unwrap();
1196 let back = textfile.bytes_to_chars(bytepos).unwrap();
1197 assert_eq!(back, charpos, "roundtrip failed for charpos {}", charpos);
1198 }
1199 }
1200
1201 #[test]
1202 pub fn test010_bytes_to_chars_out_of_bounds() {
1203 let file = setup_ascii();
1204 let textfile =
1205 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1206 assert!(textfile.bytes_to_chars(9999).is_err());
1207 }
1208
1209 #[test]
1210 pub fn test010_get_byterange() {
1211 let file = setup_unicode();
1212 let mut textfile =
1213 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1214 textfile.load(0, 0).unwrap();
1215 let text = textfile.get_byterange(1, 4).expect("text should exist");
1216 assert_eq!(text, "第");
1217 }
1218
1219 #[test]
1220 pub fn test010_get_invalid_byterange() {
1221 let file = setup_unicode();
1222 let mut textfile =
1223 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1224 textfile.load(0, 0).unwrap();
1225 assert!(matches!(
1226 textfile.get_byterange(1, 3), Err(Error::InvalidUtf8Byte(..))
1228 ));
1229 }
1230
1231 #[test]
1232 pub fn test011_absolute_line_pos() {
1233 let file = setup_ascii();
1234 let textfile =
1235 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1236 let (begin, end) = textfile.absolute_line_pos(0, 1).unwrap();
1238 assert_eq!(begin, 0);
1239 assert_eq!(end, 1);
1241 let (begin, end) = textfile.absolute_line_pos(1, 2).unwrap();
1243 assert_eq!(begin, 1);
1244 assert_eq!(end, 11);
1245 }
1246
1247 #[test]
1248 pub fn test011_absolute_line_pos_negative() {
1249 let file = setup_ascii();
1250 let textfile =
1251 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1252 let (begin, end) = textfile.absolute_line_pos(-2, 0).unwrap();
1254 assert_eq!(begin, textfile.len() - 114);
1255 assert_eq!(end, textfile.len());
1256 }
1257
1258 #[test]
1259 pub fn test011_absolute_line_pos_full() {
1260 let file = setup_unicode();
1261 let textfile =
1262 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1263 let (begin, end) = textfile.absolute_line_pos(0, 0).unwrap();
1265 assert_eq!(begin, 0);
1266 assert_eq!(end, textfile.len());
1267 }
1268
1269 #[test]
1270 pub fn test011_absolute_line_pos_no_line_index() {
1271 let file = setup_ascii();
1272 let textfile =
1273 TextFile::new(file.path(), None, TextFileMode::NoLineIndex).expect("file must load");
1274 assert!(matches!(
1275 textfile.absolute_line_pos(0, 1),
1276 Err(Error::NoLineIndex)
1277 ));
1278 }
1279
1280 #[test]
1281 pub fn test011_absolute_line_pos_out_of_bounds() {
1282 let file = setup_ascii();
1283 let textfile =
1284 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1285 assert!(textfile.absolute_line_pos(0, 9999).is_err());
1286 assert!(textfile.absolute_line_pos(-9999, 0).is_err());
1287 }
1288
1289 #[test]
1290 pub fn test012_empty_file() {
1291 let file = setup_empty();
1292 let textfile =
1293 TextFile::new(file.path(), None, Default::default()).expect("file must load");
1294 assert!(matches!(textfile.bytes_to_chars(0), Err(Error::EmptyText)));
1295 assert!(matches!(textfile.chars_to_bytes(0), Err(Error::EmptyText)));
1296 }
1297}