1use crate::data::datatable::{DataColumn, DataRow, DataTable, DataValue};
2use crate::sql::generators::TableGenerator;
3use anyhow::{anyhow, Result};
4use regex::Regex;
5use std::fs::File;
6use std::io::{BufRead, BufReader};
7use std::sync::Arc;
8
9const MAX_LINES_PER_FILE: usize = 1_000_000;
13
14fn require_string(args: &[DataValue], idx: usize, name: &str) -> Result<String> {
16 match args.get(idx) {
17 Some(DataValue::String(s)) => Ok(s.clone()),
18 Some(DataValue::InternedString(s)) => Ok(s.as_str().to_string()),
19 Some(DataValue::Null) | None => Err(anyhow!("{} requires argument {}", name, idx + 1)),
20 Some(v) => Err(anyhow!(
21 "{} argument {} must be a string, got {:?}",
22 name,
23 idx + 1,
24 v
25 )),
26 }
27}
28
29fn optional_string(args: &[DataValue], idx: usize) -> Option<String> {
31 match args.get(idx) {
32 Some(DataValue::String(s)) => Some(s.clone()),
33 Some(DataValue::InternedString(s)) => Some(s.as_str().to_string()),
34 _ => None,
35 }
36}
37
38fn read_filtered_lines(path: &str, match_regex: Option<&Regex>) -> Result<Vec<(i64, String)>> {
44 let file = File::open(path).map_err(|e| anyhow!("Failed to open '{}': {}", path, e))?;
45 let reader = BufReader::new(file);
46
47 let mut out = Vec::new();
48 let mut truncated = false;
49
50 for (idx, line_result) in reader.lines().enumerate() {
51 let line = line_result.map_err(|e| anyhow!("Error reading '{}': {}", path, e))?;
52 let line_num = (idx + 1) as i64;
53
54 if let Some(re) = match_regex {
55 if !re.is_match(&line) {
56 continue;
57 }
58 }
59
60 if out.len() >= MAX_LINES_PER_FILE {
61 truncated = true;
62 break;
63 }
64 out.push((line_num, line));
65 }
66
67 if truncated {
68 eprintln!(
69 "WARNING: truncated to {} rows (max_lines_per_file cap) when reading '{}'",
70 MAX_LINES_PER_FILE, path
71 );
72 }
73
74 Ok(out)
75}
76
77pub struct ReadText;
82
83impl TableGenerator for ReadText {
84 fn name(&self) -> &str {
85 "READ_TEXT"
86 }
87
88 fn columns(&self) -> Vec<DataColumn> {
89 vec![DataColumn::new("line_num"), DataColumn::new("line")]
90 }
91
92 fn generate(&self, args: Vec<DataValue>) -> Result<Arc<DataTable>> {
93 if args.is_empty() || args.len() > 2 {
94 return Err(anyhow!(
95 "READ_TEXT expects 1 or 2 arguments: (path [, match_regex])"
96 ));
97 }
98
99 let path = require_string(&args, 0, "READ_TEXT")?;
100 let match_regex = optional_string(&args, 1)
101 .map(|s| Regex::new(&s).map_err(|e| anyhow!("Invalid match_regex: {}", e)))
102 .transpose()?;
103
104 let lines = read_filtered_lines(&path, match_regex.as_ref())?;
105
106 let mut table = DataTable::new("read_text");
107 table.add_column(DataColumn::new("line_num"));
108 table.add_column(DataColumn::new("line"));
109
110 for (line_num, line) in lines {
111 table
112 .add_row(DataRow::new(vec![
113 DataValue::Integer(line_num),
114 DataValue::String(line),
115 ]))
116 .map_err(|e| anyhow!(e))?;
117 }
118
119 Ok(Arc::new(table))
120 }
121
122 fn description(&self) -> &str {
123 "Read a text file line-by-line. Optional second arg is a regex that filters lines at read time."
124 }
125
126 fn arg_count(&self) -> usize {
127 2
128 }
129}
130
131pub struct Grep;
136
137impl TableGenerator for Grep {
138 fn name(&self) -> &str {
139 "GREP"
140 }
141
142 fn columns(&self) -> Vec<DataColumn> {
143 vec![DataColumn::new("line_num"), DataColumn::new("line")]
144 }
145
146 fn generate(&self, args: Vec<DataValue>) -> Result<Arc<DataTable>> {
147 if args.len() < 2 || args.len() > 3 {
148 return Err(anyhow!(
149 "GREP expects 2 or 3 arguments: (path, pattern [, invert])"
150 ));
151 }
152
153 let path = require_string(&args, 0, "GREP")?;
154 let pattern_str = require_string(&args, 1, "GREP")?;
155 let pattern =
156 Regex::new(&pattern_str).map_err(|e| anyhow!("Invalid GREP pattern: {}", e))?;
157
158 let invert = match args.get(2) {
159 Some(DataValue::Boolean(b)) => *b,
160 Some(DataValue::Integer(n)) => *n != 0,
161 Some(DataValue::Null) | None => false,
162 Some(v) => return Err(anyhow!("GREP invert flag must be boolean, got {:?}", v)),
163 };
164
165 let lines = if invert {
168 let all = read_filtered_lines(&path, None)?;
169 all.into_iter()
170 .filter(|(_, line)| !pattern.is_match(line))
171 .collect::<Vec<_>>()
172 } else {
173 read_filtered_lines(&path, Some(&pattern))?
174 };
175
176 let mut table = DataTable::new("grep");
177 table.add_column(DataColumn::new("line_num"));
178 table.add_column(DataColumn::new("line"));
179
180 for (line_num, line) in lines {
181 table
182 .add_row(DataRow::new(vec![
183 DataValue::Integer(line_num),
184 DataValue::String(line),
185 ]))
186 .map_err(|e| anyhow!(e))?;
187 }
188
189 Ok(Arc::new(table))
190 }
191
192 fn description(&self) -> &str {
193 "Read only lines matching a regex (third arg inverts the match, like grep -v)"
194 }
195
196 fn arg_count(&self) -> usize {
197 3
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204 use std::io::Write;
205 use tempfile::NamedTempFile;
206
207 fn write_tmp(contents: &str) -> NamedTempFile {
208 let mut f = NamedTempFile::new().unwrap();
209 f.write_all(contents.as_bytes()).unwrap();
210 f
211 }
212
213 #[test]
214 fn test_read_text_returns_all_lines() {
215 let f = write_tmp("one\ntwo\nthree\n");
216 let table = ReadText
217 .generate(vec![DataValue::String(
218 f.path().to_string_lossy().to_string(),
219 )])
220 .unwrap();
221 assert_eq!(table.row_count(), 3);
222 assert_eq!(
223 table.get_value(0, 1).unwrap(),
224 &DataValue::String("one".to_string())
225 );
226 assert_eq!(table.get_value(2, 0).unwrap(), &DataValue::Integer(3));
227 }
228
229 #[test]
230 fn test_read_text_with_match_regex_filters_lines() {
231 let f = write_tmp("INFO boot\nERROR disk full\nINFO shutdown\nERROR oom\n");
232 let table = ReadText
233 .generate(vec![
234 DataValue::String(f.path().to_string_lossy().to_string()),
235 DataValue::String("ERROR".to_string()),
236 ])
237 .unwrap();
238 assert_eq!(table.row_count(), 2);
239 assert_eq!(table.get_value(0, 0).unwrap(), &DataValue::Integer(2));
241 assert_eq!(table.get_value(1, 0).unwrap(), &DataValue::Integer(4));
242 }
243
244 #[test]
245 fn test_read_text_requires_path() {
246 assert!(ReadText.generate(vec![]).is_err());
247 }
248
249 #[test]
250 fn test_read_text_invalid_regex_errors_early() {
251 let f = write_tmp("hello\n");
252 let err = ReadText
253 .generate(vec![
254 DataValue::String(f.path().to_string_lossy().to_string()),
255 DataValue::String("(unclosed".to_string()),
256 ])
257 .unwrap_err();
258 assert!(err.to_string().contains("match_regex"));
259 }
260
261 #[test]
262 fn test_grep_matches_like_grep() {
263 let f = write_tmp("apple\nbanana\ncherry\napricot\n");
264 let table = Grep
265 .generate(vec![
266 DataValue::String(f.path().to_string_lossy().to_string()),
267 DataValue::String("^ap".to_string()),
268 ])
269 .unwrap();
270 assert_eq!(table.row_count(), 2);
271 assert_eq!(
272 table.get_value(0, 1).unwrap(),
273 &DataValue::String("apple".to_string())
274 );
275 assert_eq!(
276 table.get_value(1, 1).unwrap(),
277 &DataValue::String("apricot".to_string())
278 );
279 }
280
281 #[test]
282 fn test_grep_invert_like_grep_v() {
283 let f = write_tmp("apple\nbanana\ncherry\napricot\n");
284 let table = Grep
285 .generate(vec![
286 DataValue::String(f.path().to_string_lossy().to_string()),
287 DataValue::String("^ap".to_string()),
288 DataValue::Boolean(true),
289 ])
290 .unwrap();
291 assert_eq!(table.row_count(), 2);
292 assert_eq!(
293 table.get_value(0, 1).unwrap(),
294 &DataValue::String("banana".to_string())
295 );
296 }
297}