1#![doc = include_str!("../README.md")]
2
3pub mod maps;
4pub mod accessor;
5
6use accessor::{Accessor, TsvText};
7use maps::{MemoryMap, SafeMemoryMap};
8
9
10
11pub struct Table<Map: MemoryMap, Access : Accessor> {
13 accessor: Access,
14 map: Map,
15}
16
17pub struct Line<'a, Map: MemoryMap, Access : Accessor> {
19 textdb: &'a Table<Map, Access>,
20 line: &'a [u8],
21}
22
23impl Table<SafeMemoryMap, TsvText> {
24 pub fn text_tsv_from_string(text: String) -> Self {
26 let accessor = accessor::TsvText::<0>::default();
27 let map = maps::SafeMemoryMap::from_string(text);
28 Table::new(map, accessor)
29 }
30
31 pub fn text_tsv_from_str<S : AsRef<str>>(text: S) -> Self {
33 let accessor = accessor::TsvText::<0>::default();
34 let map = maps::SafeMemoryMap::from_str(text.as_ref());
35 Table::new(map, accessor)
36 }
37}
38
39
40impl<Access : Accessor, Map: MemoryMap> Table<Map, Access> {
41 pub fn new(map: Map, accessor: Access) -> Self {
43 Self {
44 map,
45 accessor
46 }
47 }
48
49 pub fn is_sorted(&self) -> anyhow::Result<bool> {
52 let bytes = self.map.bytes();
53 let mut iter = bytes.split(|b| *b == b'\n');
54 let mut prev_line = iter.next().unwrap_or_default();
55 for line in iter {
56 if self.accessor.compare_lines(prev_line, line) == std::cmp::Ordering::Greater {
57 return Ok(false);
58 }
59 prev_line = line;
60 }
61 Ok(true)
62 }
63
64 pub fn keys(&self) -> impl Iterator<Item=Result<&str, std::str::Utf8Error>> {
67 self.map.bytes().split(|b| *b == b'\n').map(|line| {
68 std::str::from_utf8(self.accessor.key(line))
69 })
70 }
71
72 pub fn cols(&self, i: usize) -> impl Iterator<Item=Result<&str, std::str::Utf8Error>> {
74 self.map.bytes().split(|b| *b == b'\n').map(move |line| {
75 std::str::from_utf8(self.accessor.col(line, i))
76 })
77 }
78
79 fn find_line_at(bytes: &[u8], min: usize, max: usize, pos: usize) -> (usize, usize, &[u8]) {
81 let start = bytes[min..pos].iter().rposition(|b| *b == b'\n').map(|p| min + p + 1).unwrap_or(min);
82 let end = bytes[pos..max].iter().position(|b| *b == b'\n').map(|p| pos + p + 1).unwrap_or(max);
83 assert!(start >= min);
84 assert!(end <= max);
85 assert!(end >= start);
86
87 let line_end = if end != 0 && bytes[end-1] == b'\n' { end-1 } else { end };
89 let line = &bytes[start..line_end];
90 (start, end, line)
91 }
92
93 pub fn get_matching_lines(&self, key: &Access::KeyType) -> impl Iterator<Item=Line<Map, Access>> {
95 let bytes = self.map.bytes();
96
97 let mut min = 0;
99
100 let mut max = bytes.len();
102 loop {
103 let mid = min + (max - min) / 2;
104 let (start, end, line) = Self::find_line_at(bytes, min, max, mid);
105
106 #[cfg(test)]
107 {
108 let cmp = self.accessor.compare_key(line, key);
109 let range = std::str::from_utf8(&bytes[min..max]).unwrap();
110 println!("min={min} mid={mid} max={max} line ={:?} cmp={cmp:?} r={range:?}", std::str::from_utf8(line).unwrap());
111 }
112
113 match self.accessor.compare_key(line, key) {
114 std::cmp::Ordering::Less => {
116 assert!(min != end);
118 min = end;
119 }
120 std::cmp::Ordering::Equal => {
121 let (_start, end, line) = Self::find_line_at(bytes, min, max, min);
122 #[cfg(test)]
123 {
124 assert_eq!(start, min);
125 let range = std::str::from_utf8(&bytes[min..max]).unwrap();
126 let cmp = self.accessor.compare_key(line, key);
127 println!("=min min={min} mid={mid} max={max} line ={:?} cmp={cmp:?} r={range:?}", std::str::from_utf8(line).unwrap());
128 }
129 let mut min_is_equal = false;
130 match self.accessor.compare_key(line, key) {
131 std::cmp::Ordering::Less => {
132 assert!(min != end);
133 min = end;
134 }
135 std::cmp::Ordering::Equal => {
136 min_is_equal = true;
137 }
138 std::cmp::Ordering::Greater => {
139 max = min;
141 break;
142 }
143 }
144
145 let (start, _end, line) = Self::find_line_at(bytes, min, max, max-1);
146 #[cfg(test)]
147 {
148 assert_eq!(_end, max);
149 let range = std::str::from_utf8(&bytes[min..max]).unwrap();
150 let cmp = self.accessor.compare_key(line, key);
151 println!("=max min={min} mid={mid} max={max} line ={:?} cmp={cmp:?} r={range:?}", std::str::from_utf8(line).unwrap());
152 }
153 match self.accessor.compare_key(line, key) {
154 std::cmp::Ordering::Less => {
155 max = min;
157 break;
158 }
159 std::cmp::Ordering::Equal => {
160 if min_is_equal {
161 max = if max != 0 && bytes[max-1] == b'\n' { max-1 } else { max };
164 break;
165 }
166 }
167 std::cmp::Ordering::Greater => {
168 assert!(max != start);
170 max = start;
171 }
172 }
173 }
174 std::cmp::Ordering::Greater => {
175 assert!(max != start);
176 max = start;
177 }
178 }
179 }
180
181 bytes[min..max].split(|b| *b == b'\n').map(|line| {
182 Line {
183 textdb: self,
184 line,
185 }
186 })
187 }
188}
189
190impl<'a, Access : Accessor, Map: MemoryMap> Line<'a, Map, Access> {
191 pub fn key(&self) -> Result<&str, std::str::Utf8Error> {
193 std::str::from_utf8(self.textdb.accessor.key(self.line))
194 }
195
196 pub fn col(&self, i: usize) -> Result<&str, std::str::Utf8Error> {
198 std::str::from_utf8(self.textdb.accessor.col(self.line, i))
199 }
200
201 pub fn line(&self) -> Result<&str, std::str::Utf8Error> {
202 std::str::from_utf8(self.line)
203 }
204}
205
206