Skip to main content

alimentar/repl/
completer.rs

1//! Schema-Aware Autocomplete (ALIM-REPL-003)
2//!
3//! Implements Poka-Yoke (mistake proofing) through context-aware completion.
4//! Users cannot easily enter invalid commands or column names.
5//!
6//! # References
7//! - [6] Myers (1990). Taxonomies of Visual Programming
8//! - [13] Ko et al. (2004). Six Learning Barriers
9
10use super::{commands::CommandParser, session::ReplSession};
11
12/// Schema-aware completer for Poka-Yoke input validation
13#[derive(Debug)]
14pub struct SchemaAwareCompleter {
15    /// Known command names
16    commands: Vec<String>,
17    /// Known subcommands by command
18    subcommands: Vec<(String, Vec<String>)>,
19    /// Column names from active dataset schema
20    columns: Vec<String>,
21    /// Loaded dataset names
22    datasets: Vec<String>,
23}
24
25impl SchemaAwareCompleter {
26    /// Create a new completer from session state
27    #[must_use]
28    pub fn new(session: &ReplSession) -> Self {
29        Self {
30            commands: CommandParser::command_names()
31                .iter()
32                .map(|s| (*s).to_string())
33                .collect(),
34            subcommands: vec![
35                (
36                    "quality".to_string(),
37                    vec!["check".to_string(), "score".to_string()],
38                ),
39                ("drift".to_string(), vec!["detect".to_string()]),
40            ],
41            columns: session.column_names(),
42            datasets: session.datasets(),
43        }
44    }
45
46    /// Get completions for the given input
47    ///
48    /// Returns a list of possible completions.
49    #[must_use]
50    pub fn complete(&self, input: &str) -> Vec<String> {
51        let trimmed = input.trim();
52        let parts: Vec<&str> = trimmed.split_whitespace().collect();
53        let ends_with_space = input.ends_with(' ') && !trimmed.is_empty();
54
55        match parts.len() {
56            0 => {
57                // Empty input - suggest all commands
58                self.commands.clone()
59            }
60            1 if !ends_with_space => {
61                // Partial command - filter matching commands
62                let prefix = parts[0].to_lowercase();
63                self.commands
64                    .iter()
65                    .filter(|cmd| cmd.starts_with(&prefix))
66                    .cloned()
67                    .collect()
68            }
69            1 if ends_with_space => {
70                // Command complete, start context completion
71                let cmd = parts[0].to_lowercase();
72                self.context_complete(&cmd, &[], input)
73            }
74            _ => {
75                // Command entered - context-aware completion
76                let cmd = parts[0].to_lowercase();
77                self.context_complete(&cmd, &parts[1..], input)
78            }
79        }
80    }
81
82    /// Context-aware completion based on command
83    fn context_complete(&self, cmd: &str, args: &[&str], full_input: &str) -> Vec<String> {
84        match cmd {
85            "load" => {
86                // File path completion - not implemented (OS level)
87                vec![]
88            }
89            "use" => {
90                // Suggest loaded dataset names
91                if args.is_empty() || !full_input.ends_with(' ') {
92                    let prefix = args.first().map_or("", |s| *s);
93                    self.datasets
94                        .iter()
95                        .filter(|d| d.starts_with(prefix))
96                        .cloned()
97                        .collect()
98                } else {
99                    self.datasets.clone()
100                }
101            }
102            "quality" | "drift" => {
103                // Suggest subcommands
104                self.complete_subcommand(cmd, args)
105            }
106            "convert" => {
107                // Suggest formats
108                vec!["csv".to_string(), "parquet".to_string(), "json".to_string()]
109                    .into_iter()
110                    .filter(|f| args.first().map_or(true, |prefix| f.starts_with(*prefix)))
111                    .collect()
112            }
113            "select" | "drop" => {
114                // Suggest column names for column operations
115                if args.is_empty() || !full_input.ends_with(' ') {
116                    let prefix = args.last().map_or("", |s| *s);
117                    self.columns
118                        .iter()
119                        .filter(|c| c.starts_with(prefix))
120                        .cloned()
121                        .collect()
122                } else {
123                    self.columns.clone()
124                }
125            }
126            "help" => {
127                // Suggest help topics
128                vec![
129                    "quality".to_string(),
130                    "drift".to_string(),
131                    "export".to_string(),
132                ]
133                .into_iter()
134                .filter(|t| args.first().map_or(true, |prefix| t.starts_with(*prefix)))
135                .collect()
136            }
137            _ => vec![],
138        }
139    }
140
141    /// Complete subcommands for commands with children
142    fn complete_subcommand(&self, cmd: &str, args: &[&str]) -> Vec<String> {
143        let subcommands: Vec<String> = self
144            .subcommands
145            .iter()
146            .find(|(c, _)| c == cmd)
147            .map(|(_, subs)| subs.clone())
148            .unwrap_or_default();
149
150        if args.is_empty() {
151            subcommands
152        } else {
153            let prefix = args[0].to_lowercase();
154            subcommands
155                .into_iter()
156                .filter(|sub| sub.starts_with(&prefix))
157                .collect()
158        }
159    }
160
161    /// Update column names from session
162    pub fn update_columns(&mut self, session: &ReplSession) {
163        self.columns = session.column_names();
164    }
165
166    /// Update dataset names from session
167    pub fn update_datasets(&mut self, session: &ReplSession) {
168        self.datasets = session.datasets();
169    }
170}
171
172#[cfg(feature = "repl")]
173use reedline::{Completer, Span, Suggestion};
174
175#[cfg(feature = "repl")]
176impl Completer for SchemaAwareCompleter {
177    fn complete(&mut self, line: &str, pos: usize) -> Vec<Suggestion> {
178        // Get the part of the line up to cursor
179        let input = &line[..pos];
180
181        // Get completions
182        let completions = Self::complete(self, input);
183
184        // Find the word being completed
185        let word_start = input.rfind(' ').map_or(0, |i| i + 1);
186        let span = Span::new(word_start, pos);
187
188        completions
189            .into_iter()
190            .map(|value| Suggestion {
191                value,
192                description: None,
193                style: None,
194                extra: None,
195                span,
196                append_whitespace: true,
197            })
198            .collect()
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use std::sync::Arc;
205
206    use arrow::{
207        array::{Int32Array, StringArray},
208        datatypes::{DataType, Field, Schema as ArrowSchema},
209        record_batch::RecordBatch,
210    };
211
212    use super::*;
213    use crate::ArrowDataset;
214
215    fn create_test_dataset() -> ArrowDataset {
216        let schema = Arc::new(ArrowSchema::new(vec![
217            Field::new("id", DataType::Int32, false),
218            Field::new("name", DataType::Utf8, true),
219        ]));
220        let batch = RecordBatch::try_new(
221            schema,
222            vec![
223                Arc::new(Int32Array::from(vec![1, 2, 3])),
224                Arc::new(StringArray::from(vec!["a", "b", "c"])),
225            ],
226        )
227        .unwrap();
228        ArrowDataset::new(vec![batch]).unwrap()
229    }
230
231    #[test]
232    fn test_completer_new() {
233        let session = ReplSession::new();
234        let completer = SchemaAwareCompleter::new(&session);
235        assert!(!completer.commands.is_empty());
236    }
237
238    #[test]
239    fn test_completer_empty_input() {
240        let session = ReplSession::new();
241        let completer = SchemaAwareCompleter::new(&session);
242        let completions = completer.complete("");
243        assert!(!completions.is_empty());
244        assert!(completions.contains(&"help".to_string()));
245    }
246
247    #[test]
248    fn test_completer_partial_command() {
249        let session = ReplSession::new();
250        let completer = SchemaAwareCompleter::new(&session);
251        let completions = completer.complete("he");
252        assert!(
253            completions.contains(&"help".to_string()) || completions.contains(&"head".to_string())
254        );
255    }
256
257    #[test]
258    fn test_completer_command_with_space() {
259        let session = ReplSession::new();
260        let completer = SchemaAwareCompleter::new(&session);
261        let completions = completer.complete("quality ");
262        assert!(
263            completions.contains(&"check".to_string())
264                || completions.contains(&"score".to_string())
265        );
266    }
267
268    #[test]
269    fn test_completer_use_with_datasets() {
270        let mut session = ReplSession::new();
271        session.load_dataset("test.parquet", create_test_dataset());
272        let completer = SchemaAwareCompleter::new(&session);
273        let completions = completer.complete("use ");
274        assert!(completions.contains(&"test.parquet".to_string()));
275    }
276
277    #[test]
278    fn test_completer_select_with_columns() {
279        let mut session = ReplSession::new();
280        session.load_dataset("test.parquet", create_test_dataset());
281        let completer = SchemaAwareCompleter::new(&session);
282        let completions = completer.complete("select ");
283        assert!(completions.contains(&"id".to_string()));
284        assert!(completions.contains(&"name".to_string()));
285    }
286
287    #[test]
288    fn test_completer_drop_with_columns() {
289        let mut session = ReplSession::new();
290        session.load_dataset("test.parquet", create_test_dataset());
291        let completer = SchemaAwareCompleter::new(&session);
292        let completions = completer.complete("drop ");
293        assert!(completions.contains(&"id".to_string()));
294        assert!(completions.contains(&"name".to_string()));
295    }
296
297    #[test]
298    fn test_completer_convert_formats() {
299        let session = ReplSession::new();
300        let completer = SchemaAwareCompleter::new(&session);
301        let completions = completer.complete("convert ");
302        assert!(completions.contains(&"csv".to_string()));
303        assert!(completions.contains(&"parquet".to_string()));
304        assert!(completions.contains(&"json".to_string()));
305    }
306
307    #[test]
308    fn test_completer_convert_partial_format() {
309        let session = ReplSession::new();
310        let completer = SchemaAwareCompleter::new(&session);
311        let completions = completer.complete("convert cs");
312        assert!(completions.contains(&"csv".to_string()));
313        assert!(!completions.contains(&"parquet".to_string()));
314    }
315
316    #[test]
317    fn test_completer_quality_subcommands() {
318        let session = ReplSession::new();
319        let completer = SchemaAwareCompleter::new(&session);
320        let completions = completer.complete("quality ");
321        assert!(completions.contains(&"check".to_string()));
322        assert!(completions.contains(&"score".to_string()));
323    }
324
325    #[test]
326    fn test_completer_quality_partial_subcommand() {
327        let session = ReplSession::new();
328        let completer = SchemaAwareCompleter::new(&session);
329        let completions = completer.complete("quality ch");
330        assert!(completions.contains(&"check".to_string()));
331        assert!(!completions.contains(&"score".to_string()));
332    }
333
334    #[test]
335    fn test_completer_drift_subcommands() {
336        let session = ReplSession::new();
337        let completer = SchemaAwareCompleter::new(&session);
338        let completions = completer.complete("drift ");
339        assert!(completions.contains(&"detect".to_string()));
340    }
341
342    #[test]
343    fn test_completer_help_topics() {
344        let session = ReplSession::new();
345        let completer = SchemaAwareCompleter::new(&session);
346        let completions = completer.complete("help ");
347        assert!(completions.contains(&"quality".to_string()));
348        assert!(completions.contains(&"drift".to_string()));
349        assert!(completions.contains(&"export".to_string()));
350    }
351
352    #[test]
353    fn test_completer_help_partial_topic() {
354        let session = ReplSession::new();
355        let completer = SchemaAwareCompleter::new(&session);
356        let completions = completer.complete("help qu");
357        assert!(completions.contains(&"quality".to_string()));
358        assert!(!completions.contains(&"drift".to_string()));
359    }
360
361    #[test]
362    fn test_completer_load_returns_empty() {
363        let session = ReplSession::new();
364        let completer = SchemaAwareCompleter::new(&session);
365        let completions = completer.complete("load ");
366        assert!(completions.is_empty());
367    }
368
369    #[test]
370    fn test_completer_unknown_command_returns_empty() {
371        let session = ReplSession::new();
372        let completer = SchemaAwareCompleter::new(&session);
373        let completions = completer.complete("unknowncommand ");
374        assert!(completions.is_empty());
375    }
376
377    #[test]
378    fn test_completer_update_columns() {
379        let session = ReplSession::new();
380        let mut completer = SchemaAwareCompleter::new(&session);
381        assert!(completer.columns.is_empty());
382
383        let mut session_with_data = ReplSession::new();
384        session_with_data.load_dataset("test.parquet", create_test_dataset());
385        completer.update_columns(&session_with_data);
386        assert!(!completer.columns.is_empty());
387    }
388
389    #[test]
390    fn test_completer_update_datasets() {
391        let session = ReplSession::new();
392        let mut completer = SchemaAwareCompleter::new(&session);
393        assert!(completer.datasets.is_empty());
394
395        let mut session_with_data = ReplSession::new();
396        session_with_data.load_dataset("test.parquet", create_test_dataset());
397        completer.update_datasets(&session_with_data);
398        assert!(completer.datasets.contains(&"test.parquet".to_string()));
399    }
400
401    #[test]
402    fn test_completer_select_partial_column() {
403        let mut session = ReplSession::new();
404        session.load_dataset("test.parquet", create_test_dataset());
405        let completer = SchemaAwareCompleter::new(&session);
406        let completions = completer.complete("select i");
407        assert!(completions.contains(&"id".to_string()));
408        assert!(!completions.contains(&"name".to_string()));
409    }
410
411    #[test]
412    fn test_completer_use_partial_dataset() {
413        let mut session = ReplSession::new();
414        session.load_dataset("test.parquet", create_test_dataset());
415        session.load_dataset("other.csv", create_test_dataset());
416        let completer = SchemaAwareCompleter::new(&session);
417        let completions = completer.complete("use te");
418        assert!(completions.contains(&"test.parquet".to_string()));
419        assert!(!completions.contains(&"other.csv".to_string()));
420    }
421
422    #[test]
423    fn test_completer_multiple_args() {
424        let mut session = ReplSession::new();
425        session.load_dataset("test.parquet", create_test_dataset());
426        let completer = SchemaAwareCompleter::new(&session);
427        let completions = completer.complete("select id ");
428        assert!(completions.contains(&"id".to_string()));
429        assert!(completions.contains(&"name".to_string()));
430    }
431
432    #[test]
433    fn test_completer_debug() {
434        let session = ReplSession::new();
435        let completer = SchemaAwareCompleter::new(&session);
436        let debug = format!("{:?}", completer);
437        assert!(debug.contains("SchemaAwareCompleter"));
438    }
439
440    #[test]
441    fn test_complete_subcommand_unknown_command() {
442        let session = ReplSession::new();
443        let completer = SchemaAwareCompleter::new(&session);
444        let completions = completer.complete_subcommand("unknown", &[]);
445        assert!(completions.is_empty());
446    }
447
448    #[test]
449    fn test_complete_subcommand_with_partial() {
450        let session = ReplSession::new();
451        let completer = SchemaAwareCompleter::new(&session);
452        let completions = completer.complete_subcommand("quality", &["sc"]);
453        assert!(completions.contains(&"score".to_string()));
454        assert!(!completions.contains(&"check".to_string()));
455    }
456}