Skip to main content

z3950_rs/
query_languages.rs

1use crate::error::{Error, Result};
2use crate::pdu::{
3    AttributeElement, AttributeValue, AttributesPlusTerm, Operator, Operand, Query, RpnQuery,
4    RpnRpnOperator, RpnStructure, Term,
5};
6use rasn::types::OctetString;
7
8/// Query language representation
9#[derive(Debug, Clone)]
10pub enum QueryLanguage {
11    CQL(String),
12}
13
14/// CQL AST node
15#[derive(Debug, Clone)]
16enum CqlNode {
17    /// Simple term: index = "value"
18    Term {
19        index: String,
20        relation: String,
21        value: String,
22    },
23    /// Binary operator: left AND right, left OR right
24    BinaryOp {
25        op: CqlOperator,
26        left: Box<CqlNode>,
27        right: Box<CqlNode>,
28    },
29    /// Unary operator: NOT term
30    UnaryOp {
31        op: CqlOperator,
32        operand: Box<CqlNode>,
33    },
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37enum CqlOperator {
38    And,
39    Or,
40    Not,
41}
42
43/// CQL parser with error handling
44struct CqlParser {
45    input: Vec<char>,
46    pos: usize,
47}
48
49impl CqlParser {
50    fn new(input: &str) -> Self {
51        Self {
52            input: input.chars().collect(),
53            pos: 0,
54        }
55    }
56
57    fn parse(&mut self) -> Result<CqlNode> {
58        let node = self.parse_expression()?;
59        self.skip_whitespace();
60        if self.pos < self.input.len() {
61            return Err(Error::Protocol(format!(
62                "Unexpected character at position {}: '{}'",
63                self.pos,
64                self.input[self.pos]
65            )));
66        }
67        Ok(node)
68    }
69
70    fn parse_expression(&mut self) -> Result<CqlNode> {
71        self.parse_or_expression()
72    }
73
74    fn parse_or_expression(&mut self) -> Result<CqlNode> {
75        let mut left = self.parse_and_expression()?;
76        self.skip_whitespace();
77
78        while self.peek() == Some('O') && self.peek_str(3).as_deref() == Some("OR ") {
79            self.consume_str("OR");
80            self.skip_whitespace();
81            let right = self.parse_and_expression()?;
82            left = CqlNode::BinaryOp {
83                op: CqlOperator::Or,
84                left: Box::new(left),
85                right: Box::new(right),
86            };
87            self.skip_whitespace();
88        }
89
90        Ok(left)
91    }
92
93    fn parse_and_expression(&mut self) -> Result<CqlNode> {
94        let mut left = self.parse_not_expression()?;
95        self.skip_whitespace();
96
97        while self.peek() == Some('A') && self.peek_str(4).as_deref() == Some("AND ") {
98            self.consume_str("AND");
99            self.skip_whitespace();
100            let right = self.parse_not_expression()?;
101            left = CqlNode::BinaryOp {
102                op: CqlOperator::And,
103                left: Box::new(left),
104                right: Box::new(right),
105            };
106            self.skip_whitespace();
107        }
108
109        Ok(left)
110    }
111
112    fn parse_not_expression(&mut self) -> Result<CqlNode> {
113        self.skip_whitespace();
114        if self.peek() == Some('N') && self.peek_str(4).as_deref() == Some("NOT ") {
115            self.consume_str("NOT");
116            self.skip_whitespace();
117            let operand = self.parse_primary()?;
118            Ok(CqlNode::UnaryOp {
119                op: CqlOperator::Not,
120                operand: Box::new(operand),
121            })
122        } else {
123            self.parse_primary()
124        }
125    }
126
127    fn parse_primary(&mut self) -> Result<CqlNode> {
128        self.skip_whitespace();
129
130        if self.peek() == Some('(') {
131            self.consume_char('(')?;
132            let node = self.parse_expression()?;
133            self.skip_whitespace();
134            self.consume_char(')')?;
135            Ok(node)
136        } else {
137            self.parse_term()
138        }
139    }
140
141    fn parse_term(&mut self) -> Result<CqlNode> {
142        self.skip_whitespace();
143
144        // Parse index (identifier)
145        let index = self.parse_identifier()?;
146        self.skip_whitespace();
147
148        // Parse relation (default to "=" if not specified)
149        let relation = if self.peek() == Some('=') {
150            self.consume_char('=')?;
151            "=".to_string()
152        } else if self.peek_str(2).as_deref() == Some(">=") {
153            self.consume_str(">=");
154            ">=".to_string()
155        } else if self.peek_str(2).as_deref() == Some("<=") {
156            self.consume_str("<=");
157            "<=".to_string()
158        } else if self.peek() == Some('>') {
159            self.consume_char('>')?;
160            ">".to_string()
161        } else if self.peek() == Some('<') {
162            self.consume_char('<')?;
163            "<".to_string()
164        } else if self.peek_str(2).as_deref() == Some("<>") {
165            self.consume_str("<>");
166            "<>".to_string()
167        } else {
168            return Err(Error::Protocol(format!(
169                "Expected relation operator at position {}",
170                self.pos
171            )));
172        };
173
174        self.skip_whitespace();
175
176        // Parse value (quoted string or unquoted string)
177        let value = if self.peek() == Some('"') {
178            self.parse_quoted_string()?
179        } else {
180            self.parse_unquoted_string()?
181        };
182
183        Ok(CqlNode::Term {
184            index,
185            relation,
186            value,
187        })
188    }
189
190    fn parse_identifier(&mut self) -> Result<String> {
191        self.skip_whitespace();
192        let start = self.pos;
193
194        if self.pos >= self.input.len() {
195            return Err(Error::Protocol("Unexpected end of input while parsing identifier".into()));
196        }
197
198        let first = self.input[self.pos];
199        if !first.is_alphabetic() && first != '_' {
200            return Err(Error::Protocol(format!(
201                "Invalid identifier start character: '{}' at position {}",
202                first, self.pos
203            )));
204        }
205
206        self.pos += 1;
207
208        while self.pos < self.input.len() {
209            let ch = self.input[self.pos];
210            if ch.is_alphanumeric() || ch == '_' || ch == '.' {
211                self.pos += 1;
212            } else {
213                break;
214            }
215        }
216
217        Ok(self.input[start..self.pos].iter().collect())
218    }
219
220    fn parse_quoted_string(&mut self) -> Result<String> {
221        self.consume_char('"')?;
222        let mut result = String::new();
223        let mut escaped = false;
224
225        while self.pos < self.input.len() {
226            let ch = self.input[self.pos];
227            self.pos += 1;
228
229            if escaped {
230                match ch {
231                    'n' => result.push('\n'),
232                    't' => result.push('\t'),
233                    'r' => result.push('\r'),
234                    '\\' => result.push('\\'),
235                    '"' => result.push('"'),
236                    _ => result.push(ch),
237                }
238                escaped = false;
239            } else if ch == '\\' {
240                escaped = true;
241            } else if ch == '"' {
242                return Ok(result);
243            } else {
244                result.push(ch);
245            }
246        }
247
248        Err(Error::Protocol("Unterminated quoted string".into()))
249    }
250
251    fn parse_unquoted_string(&mut self) -> Result<String> {
252        let start = self.pos;
253        let mut in_string = false;
254
255        while self.pos < self.input.len() {
256            let ch = self.input[self.pos];
257            if ch.is_whitespace() || ch == ')' || ch == '(' {
258                if in_string {
259                    break;
260                } else {
261                    self.pos += 1;
262                    continue;
263                }
264            }
265            in_string = true;
266            self.pos += 1;
267        }
268
269        if !in_string {
270            return Err(Error::Protocol(format!(
271                "Expected value at position {}",
272                start
273            )));
274        }
275
276        Ok(self.input[start..self.pos].iter().collect())
277    }
278
279    fn skip_whitespace(&mut self) {
280        while self.pos < self.input.len() && self.input[self.pos].is_whitespace() {
281            self.pos += 1;
282        }
283    }
284
285    fn peek(&self) -> Option<char> {
286        if self.pos < self.input.len() {
287            Some(self.input[self.pos])
288        } else {
289            None
290        }
291    }
292
293    fn peek_str(&self, len: usize) -> Option<String> {
294        if self.pos + len <= self.input.len() {
295            Some(self.input[self.pos..self.pos + len].iter().collect())
296        } else {
297            None
298        }
299    }
300
301    fn consume_char(&mut self, expected: char) -> Result<()> {
302        if self.peek() == Some(expected) {
303            self.pos += 1;
304            Ok(())
305        } else {
306            Err(Error::Protocol(format!(
307                "Expected '{}' at position {}, found '{:?}'",
308                expected,
309                self.pos,
310                self.peek()
311            )))
312        }
313    }
314
315    fn consume_str(&mut self, expected: &str) {
316        let len = expected.len();
317        if self.pos + len <= self.input.len() {
318            let found: String = self.input[self.pos..self.pos + len].iter().collect();
319            if found == expected {
320                self.pos += len;
321            }
322        }
323    }
324}
325
326/// Maps CQL index to BIB-1 Use attribute value
327/// Common mappings for Dublin Core and other standard indexes
328/// BIB-1 Use attribute values: Title=4, Author=1003, etc.
329fn map_index_to_use_attribute_value(index: &str) -> i64 {
330    // Dublin Core mappings to BIB-1 Use attribute values
331    match index {
332        "dc.title" | "title" | "t" => 4,      // Title
333        "dc.creator" | "author" | "a" => 1003, // Author
334        "dc.subject" | "subject" | "s" => 21, // Subject
335        "dc.date" | "date" | "d" => 31,       // Date
336        "dc.identifier" | "isbn" => 7,         // ISBN
337        "dc.publisher" | "publisher" => 1018, // Publisher
338        "dc.language" | "language" => 54,     // Language
339        "dc.type" | "type" => 1016,          // Type
340        "dc.format" | "format" => 1017,      // Format
341        "dc.description" | "description" => 62, // Abstract
342        "dc.relation" | "relation" => 1019,  // Relation
343        "dc.coverage" | "coverage" => 1020,  // Coverage
344        "dc.rights" | "rights" => 1021,      // Rights
345        "dc.contributor" | "contributor" => 1004, // Contributor
346        "dc.source" | "source" => 1015,      // Source
347        // Numeric index (e.g., "1", "2", "3")
348        _ => {
349            if let Ok(num) = index.parse::<i64>() {
350                num
351            } else {
352                // Default to "any" attribute (1016)
353                1016
354            }
355        }
356    }
357}
358
359/// Maps CQL relation operator to BIB-1 Relation attribute value
360/// BIB-1 Relation attribute values: less than=1, less than or equal=2, equal=3, greater than or equal=4, greater than=5, not equal=6
361fn map_relation_to_bib1_relation(relation: &str) -> i64 {
362    match relation {
363        "<" => 1,   // less than
364        "<=" => 2,  // less than or equal
365        "=" => 3,   // equal
366        ">=" => 4,  // greater than or equal
367        ">" => 5,   // greater than
368        "<>" => 6,  // not equal
369        _ => 3,     // default to equal
370    }
371}
372
373/// Converts CQL AST node to RPN structure
374fn cql_node_to_rpn(node: CqlNode) -> Result<RpnStructure> {
375    match node {
376        CqlNode::Term { index, relation, value } => {
377            // BIB-1 attributes: we need multiple AttributeElement
378            // - Type 1 (Use): the field to search (Title=4, Author=1003, etc.)
379            // - Type 2 (Relation): the relation operator (=, <, >, etc.)
380            // - Type 3 (Position): position in field (default: any=3)
381            // - Type 4 (Structure): structure of term (default: word=2)
382            // - Type 5 (Truncation): truncation (default: right truncation=100)
383            // - Type 6 (Completeness): completeness (default: incomplete=1)
384            
385            let use_value = map_index_to_use_attribute_value(&index);
386            let relation_value = map_relation_to_bib1_relation(&relation);
387            
388            let mut attributes = Vec::new();
389            
390            // Use attribute (Type 1) - REQUIRED
391            // Note: attribute_set is specified at RpnQuery level, not in each AttributeElement
392            attributes.push(AttributeElement {
393                attribute_set: None, // attribute_set is at RpnQuery level
394                attribute_type: 1.into(), // Use attribute type
395                attribute_value: AttributeValue::Numeric(use_value.into()),
396            });
397            
398            // Relation attribute (Type 2) - REQUIRED
399            attributes.push(AttributeElement {
400                attribute_set: None, // attribute_set is at RpnQuery level
401                attribute_type: 2.into(), // Relation attribute type
402                attribute_value: AttributeValue::Numeric(relation_value.into()),
403            });
404            
405            // For exact match (=), use minimal attributes
406            // For other relations, add additional attributes for better matching
407            if relation != "=" {
408                // Position attribute (Type 3) - default to "any" (3)
409                attributes.push(AttributeElement {
410                    attribute_set: None,
411                    attribute_type: 3.into(), // Position attribute type
412                    attribute_value: AttributeValue::Numeric(3.into()), // any position
413                });
414                
415                // Structure attribute (Type 4) - default to "word" (2)
416                attributes.push(AttributeElement {
417                    attribute_set: None,
418                    attribute_type: 4.into(), // Structure attribute type
419                    attribute_value: AttributeValue::Numeric(2.into()), // word
420                });
421                
422                // Truncation attribute (Type 5) - right truncation (100)
423                attributes.push(AttributeElement {
424                    attribute_set: None,
425                    attribute_type: 5.into(), // Truncation attribute type
426                    attribute_value: AttributeValue::Numeric(100.into()), // right truncation
427                });
428            }
429
430            Ok(RpnStructure::Op(Operand::AttributesPlusTerm(
431                AttributesPlusTerm {
432                    attributes,
433                    term: Term::General(OctetString::from(value.as_bytes().to_vec())),
434                },
435            )))
436        }
437        CqlNode::BinaryOp { op, left, right } => {
438            let rpn1 = cql_node_to_rpn(*left)?;
439            let rpn2 = cql_node_to_rpn(*right)?;
440
441            let operator = match op {
442                CqlOperator::And => Operator::And(()),
443                CqlOperator::Or => Operator::Or(()),
444                CqlOperator::Not => {
445                    return Err(Error::Protocol(
446                        "NOT operator must be unary, not binary".into(),
447                    ));
448                }
449            };
450
451            Ok(RpnStructure::RpnRpnOperator(RpnRpnOperator {
452                rpn1: Box::new(rpn1),
453                rpn2: Box::new(rpn2),
454                op: operator,
455            }))
456        }
457        CqlNode::UnaryOp { op, operand } => {
458            let rpn_operand = cql_node_to_rpn(*operand)?;
459
460            match op {
461                CqlOperator::Not => {
462                    // AND NOT: we need to create (operand AND NOT operand2)
463                    // For simplicity, we'll create a structure with the operand and a dummy term
464                    // In practice, AND NOT requires two operands, so we'll use the operand twice
465                    // This is a simplified approach - a full implementation might need different handling
466                    Ok(RpnStructure::RpnRpnOperator(RpnRpnOperator {
467                        rpn1: Box::new(rpn_operand.clone()),
468                        rpn2: Box::new(rpn_operand),
469                        op: Operator::AndNot(()),
470                    }))
471                }
472                _ => Err(Error::Protocol(format!(
473                    "Unsupported unary operator: {:?}",
474                    op
475                ))),
476            }
477        }
478    }
479}
480
481impl From<QueryLanguage> for Query {
482    fn from(query_language: QueryLanguage) -> Self {
483        match query_language {
484            QueryLanguage::CQL(query) => {
485                // Try to parse and convert CQL to Query
486                // If parsing fails, we'll return an error query (Type100 with error message)
487                // In practice, you might want to return a Result instead
488                match parse_cql_to_query(&query) {
489                    Ok(q) => q,
490                    Err(_) => {
491                        // Fallback: create a Type100 query with the raw CQL string
492                        Query::Type100(OctetString::from(query.as_bytes().to_vec()))
493                    }
494                }
495            }
496        }
497    }
498}
499
500
501
502
503
504
505/// Parses CQL string and converts it to a Z39.50 Query
506/// For simple queries like "title=value", sends Type-1 RPN with the term but no explicit attributes
507/// (like yaz-client does - the server interprets "index=value" syntax)
508/// For complex queries with AND/OR/NOT, uses full RPN with explicit BIB-1 attributes
509pub fn parse_cql_to_query(cql: &str) -> Result<Query> {
510    // Check if this is a simple query (no operators, no parentheses)
511    // Simple format: "index=value" or "index=\"value\""
512    if cql.contains('=') && !cql.contains(" AND ") && !cql.contains(" OR ") && !cql.contains(" NOT ") 
513        && !cql.contains('(') && !cql.contains(')') {
514        // For simple queries, send the raw query string as the term WITHOUT explicit attributes
515        // This is what yaz-client does: RPN @attrset Bib-1 title=athena
516        // The server interprets "index=value" syntax
517        
518        // Strip quotes from value if present for clean query
519        let clean_query = if cql.contains('"') {
520            cql.replace('"', "")
521        } else {
522            cql.to_string()
523        };
524        
525        let rpn_query = RpnQuery {
526            attribute_set: crate::pdu::bib1_attribute_set()?,
527            rpn: RpnStructure::Op(Operand::AttributesPlusTerm(AttributesPlusTerm {
528                attributes: vec![], // No explicit attributes - server interprets "index=value"
529                term: Term::General(OctetString::from(clean_query.as_bytes().to_vec())),
530            })),
531        };
532        
533        return Ok(Query::Type1(rpn_query));
534    }
535    
536    // For complex queries with AND/OR/NOT, use full RPN with explicit BIB-1 attributes
537    let mut parser = CqlParser::new(cql);
538    let ast = parser.parse()?;
539    let rpn_structure = cql_node_to_rpn(ast)?;
540
541    let rpn_query = RpnQuery {
542        attribute_set: crate::pdu::bib1_attribute_set()?,
543        rpn: rpn_structure,
544    };
545
546    Ok(Query::Type1(rpn_query))
547}