Skip to main content

brainwires_seal/
query_core.rs

1//! Semantic Query Core Extraction
2//!
3//! Extracts structured "query cores" from natural language questions.
4//! Query cores capture the essential semantics of a question in a format
5//! that can be executed against the relationship graph.
6//!
7//! ## S-Expression Inspired Design
8//!
9//! Based on the SEAL paper's approach, we extract simplified query structures:
10//!
11//! - `JOIN(relation, subject, object)` - Traverse a relationship
12//! - `AND(expr1, expr2, ...)` - Conjunction of expressions
13//! - `OR(expr1, expr2, ...)` - Disjunction of expressions
14//! - `FILTER(source, predicate)` - Filter results
15//! - `COUNT(expr)` - Count results
16//! - `ARGMAX/ARGMIN(expr, property)` - Superlative queries
17//!
18//! ## Example
19//!
20//! ```rust,ignore
21//! let extractor = QueryCoreExtractor::new();
22//!
23//! // "What uses main.rs?"
24//! let core = extractor.extract(
25//!     "What uses main.rs?",
26//!     &[("main.rs".to_string(), EntityType::File)]
27//! );
28//!
29//! // Produces: QueryCore {
30//! //     question_type: Dependency,
31//! //     op: Join {
32//! //         relation: DependsOn,
33//! //         subject: Variable("?dependent"),
34//! //         object: Constant("main.rs", File)
35//! //     }
36//! // }
37//! ```
38
39use brainwires_core::graph::{EdgeType, EntityType, RelationshipGraphT};
40use regex::Regex;
41use std::collections::HashMap;
42use std::sync::LazyLock;
43
44// --- LazyLock regex statics for question classification patterns ---
45
46// Definition patterns
47static RE_WHAT_IS: LazyLock<Regex> =
48    LazyLock::new(|| Regex::new(r"(?i)what\s+is\s+(\w+)").expect("valid regex"));
49static RE_EXPLAIN: LazyLock<Regex> =
50    LazyLock::new(|| Regex::new(r"(?i)explain\s+(\w+)").expect("valid regex"));
51
52// Location patterns
53static RE_WHERE_IS: LazyLock<Regex> = LazyLock::new(|| {
54    Regex::new(r"(?i)where\s+is\s+(.+?)\s*(defined|declared|located)").expect("valid regex")
55});
56static RE_WHICH_FILE: LazyLock<Regex> = LazyLock::new(|| {
57    Regex::new(r"(?i)which\s+file\s+(contains|has|defines)\s+(.+)").expect("valid regex")
58});
59static RE_FIND_IN: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r"(?i)find\s+(.+?)\s+in").expect("valid regex"));
61
62// Dependency patterns
63static RE_WHAT_USES: LazyLock<Regex> = LazyLock::new(|| {
64    Regex::new(r"(?i)what\s+(uses|depends\s+on|calls|imports)\s+(.+)").expect("valid regex")
65});
66static RE_WHAT_DOES_USE: LazyLock<Regex> = LazyLock::new(|| {
67    Regex::new(r"(?i)what\s+does\s+(.+?)\s+(use|depend\s+on|call|import)").expect("valid regex")
68});
69static RE_SHOW_DEPS: LazyLock<Regex> = LazyLock::new(|| {
70    Regex::new(r"(?i)show\s+(dependencies|usages)\s+(of|for)\s+(.+)").expect("valid regex")
71});
72
73// Count patterns
74static RE_HOW_MANY: LazyLock<Regex> =
75    LazyLock::new(|| Regex::new(r"(?i)how\s+many\s+(.+)").expect("valid regex"));
76static RE_COUNT: LazyLock<Regex> =
77    LazyLock::new(|| Regex::new(r"(?i)count\s+(.+)").expect("valid regex"));
78
79// Superlative patterns
80static RE_WHICH_MOST: LazyLock<Regex> = LazyLock::new(|| {
81    Regex::new(r"(?i)which\s+(.+?)\s+has\s+the\s+(most|least|highest|lowest)").expect("valid regex")
82});
83static RE_LARGEST: LazyLock<Regex> =
84    LazyLock::new(|| Regex::new(r"(?i)(largest|smallest|biggest)\s+(.+)").expect("valid regex"));
85
86// Enumeration patterns
87static RE_LIST: LazyLock<Regex> =
88    LazyLock::new(|| Regex::new(r"(?i)list\s+(all\s+)?(.+)").expect("valid regex"));
89static RE_SHOW: LazyLock<Regex> =
90    LazyLock::new(|| Regex::new(r"(?i)show\s+(all\s+)?(.+)").expect("valid regex"));
91
92// Boolean patterns
93static RE_DOES_USE: LazyLock<Regex> = LazyLock::new(|| {
94    Regex::new(r"(?i)does\s+(.+?)\s+(use|depend|call|import|contain)\s+(.+)").expect("valid regex")
95});
96static RE_IS_USED_BY: LazyLock<Regex> = LazyLock::new(|| {
97    Regex::new(r"(?i)is\s+(.+?)\s+(used|called|imported)\s+by\s+(.+)").expect("valid regex")
98});
99
100/// Core operations in the query language (S-expression inspired)
101#[derive(Debug, Clone)]
102pub enum QueryOp {
103    /// Join two expressions via a relationship
104    Join {
105        /// The relationship connecting subject and object.
106        relation: RelationType,
107        /// The subject expression.
108        subject: Box<QueryExpr>,
109        /// The object expression.
110        object: Box<QueryExpr>,
111    },
112    /// Logical AND of expressions
113    And(Vec<QueryExpr>),
114    /// Logical OR of expressions
115    Or(Vec<QueryExpr>),
116    /// Literal values
117    Values(Vec<String>),
118    /// Filter results by predicate
119    Filter {
120        /// The expression to filter.
121        source: Box<QueryExpr>,
122        /// The filter predicate to apply.
123        predicate: FilterPredicate,
124    },
125    /// Count results
126    Count(Box<QueryExpr>),
127    /// Superlative query (argmax/argmin)
128    Superlative {
129        /// The expression to evaluate.
130        source: Box<QueryExpr>,
131        /// The property to compare.
132        property: String,
133        /// Whether to find the maximum or minimum.
134        direction: SuperlativeDir,
135    },
136}
137
138/// A query expression (variable, constant, or operation)
139#[derive(Debug, Clone)]
140pub enum QueryExpr {
141    /// A variable binding (e.g., ?file, ?function)
142    Variable(String),
143    /// A constant value with type
144    Constant(String, EntityType),
145    /// A complex operation
146    Op(QueryOp),
147}
148
149impl QueryExpr {
150    /// Create a new variable expression
151    pub fn var(name: &str) -> Self {
152        QueryExpr::Variable(format!("?{}", name.trim_start_matches('?')))
153    }
154
155    /// Create a new constant expression
156    pub fn constant(value: &str, entity_type: EntityType) -> Self {
157        QueryExpr::Constant(value.to_string(), entity_type)
158    }
159
160    /// Create a join operation
161    pub fn join(relation: RelationType, subject: QueryExpr, object: QueryExpr) -> Self {
162        QueryExpr::Op(QueryOp::Join {
163            relation,
164            subject: Box::new(subject),
165            object: Box::new(object),
166        })
167    }
168
169    /// Create a count operation
170    pub fn count(inner: QueryExpr) -> Self {
171        QueryExpr::Op(QueryOp::Count(Box::new(inner)))
172    }
173
174    /// Check if this is a variable
175    pub fn is_variable(&self) -> bool {
176        matches!(self, QueryExpr::Variable(_))
177    }
178
179    /// Get the variable name if this is a variable
180    pub fn as_variable(&self) -> Option<&str> {
181        match self {
182            QueryExpr::Variable(name) => Some(name),
183            _ => None,
184        }
185    }
186}
187
188/// Relation types that map to graph edge types
189#[derive(Debug, Clone, PartialEq, Eq, Hash)]
190pub enum RelationType {
191    /// Containment relationship (e.g., file contains function).
192    Contains,
193    /// Reference relationship (e.g., function references type).
194    References,
195    /// Dependency relationship.
196    DependsOn,
197    /// Modification relationship.
198    Modifies,
199    /// Definition relationship.
200    Defines,
201    /// Co-occurrence relationship.
202    CoOccurs,
203    /// Type annotation relationship.
204    HasType,
205    /// Error association relationship.
206    HasError,
207    /// Creation timestamp relationship.
208    CreatedAt,
209    /// Modification timestamp relationship.
210    ModifiedAt,
211    /// User-defined relationship type.
212    Custom(String),
213}
214
215impl RelationType {
216    /// Convert to the storage EdgeType
217    pub fn to_edge_type(&self) -> Option<EdgeType> {
218        match self {
219            RelationType::Contains => Some(EdgeType::Contains),
220            RelationType::References => Some(EdgeType::References),
221            RelationType::DependsOn => Some(EdgeType::DependsOn),
222            RelationType::Modifies => Some(EdgeType::Modifies),
223            RelationType::Defines => Some(EdgeType::Defines),
224            RelationType::CoOccurs => Some(EdgeType::CoOccurs),
225            _ => None,
226        }
227    }
228
229    /// Get the inverse relation (if applicable)
230    pub fn inverse(&self) -> Option<RelationType> {
231        match self {
232            RelationType::Contains => Some(RelationType::Custom("ContainedBy".to_string())),
233            RelationType::DependsOn => Some(RelationType::Custom("DependedOnBy".to_string())),
234            RelationType::Defines => Some(RelationType::Custom("DefinedBy".to_string())),
235            RelationType::Modifies => Some(RelationType::Custom("ModifiedBy".to_string())),
236            RelationType::References => Some(RelationType::Custom("ReferencedBy".to_string())),
237            _ => None,
238        }
239    }
240}
241
242/// Filter predicates for query results
243#[derive(Debug, Clone)]
244pub enum FilterPredicate {
245    /// Type constraint
246    HasType(EntityType),
247    /// Name pattern (regex)
248    NameMatches(String),
249    /// Existence in set
250    In(Vec<String>),
251    /// Not in set
252    NotIn(Vec<String>),
253    /// Property comparison
254    Property {
255        /// The property name.
256        name: String,
257        /// The comparison operator.
258        op: CompareOp,
259        /// The value to compare against.
260        value: String,
261    },
262}
263
264/// Comparison operators
265#[derive(Debug, Clone)]
266pub enum CompareOp {
267    /// Equal.
268    Eq,
269    /// Not equal.
270    Ne,
271    /// Less than.
272    Lt,
273    /// Less than or equal.
274    Le,
275    /// Greater than.
276    Gt,
277    /// Greater than or equal.
278    Ge,
279    /// Contains substring.
280    Contains,
281    /// Starts with prefix.
282    StartsWith,
283    /// Ends with suffix.
284    EndsWith,
285}
286
287/// Direction for superlative queries
288#[derive(Debug, Clone)]
289pub enum SuperlativeDir {
290    /// Find the maximum value.
291    Max,
292    /// Find the minimum value.
293    Min,
294}
295
296/// Question type classification
297#[derive(Debug, Clone, PartialEq, Eq, Hash)]
298pub enum QuestionType {
299    /// "What is X?" - Definition query
300    Definition,
301    /// "Where is X defined?" - Location query
302    Location,
303    /// "What uses X?" / "What does X depend on?" - Dependency query
304    Dependency,
305    /// "How many X?" - Count query
306    Count,
307    /// "Which X has the most Y?" - Superlative query
308    Superlative,
309    /// "List all X that Y" - Enumeration query
310    Enumeration,
311    /// "Does X relate to Y?" - Boolean query
312    Boolean,
313    /// Complex multi-hop query
314    MultiHop,
315    /// Unknown question type
316    Unknown,
317}
318
319/// A complete query core with metadata
320#[derive(Debug, Clone)]
321pub struct QueryCore {
322    /// The question type
323    pub question_type: QuestionType,
324    /// The root query expression
325    pub root: QueryExpr,
326    /// Entities mentioned in the query
327    pub entities: Vec<(String, EntityType)>,
328    /// Original question text
329    pub original: String,
330    /// Resolved question text (after coreference resolution), if different from original
331    pub resolved: Option<String>,
332    /// Confidence score (0.0 - 1.0)
333    pub confidence: f32,
334}
335
336impl QueryCore {
337    /// Create a new query core
338    pub fn new(
339        question_type: QuestionType,
340        root: QueryExpr,
341        entities: Vec<(String, EntityType)>,
342        original: String,
343    ) -> Self {
344        Self {
345            question_type,
346            root,
347            entities,
348            original,
349            resolved: None,
350            confidence: 1.0,
351        }
352    }
353
354    /// Set the confidence score
355    pub fn with_confidence(mut self, confidence: f32) -> Self {
356        self.confidence = confidence;
357        self
358    }
359
360    /// Set the resolved query text (after coreference resolution)
361    pub fn with_resolved(mut self, resolved: String) -> Self {
362        self.resolved = Some(resolved);
363        self
364    }
365
366    /// Convert to a human-readable string representation
367    pub fn to_sexp(&self) -> String {
368        Self::expr_to_sexp(&self.root)
369    }
370
371    fn expr_to_sexp(expr: &QueryExpr) -> String {
372        match expr {
373            QueryExpr::Variable(name) => name.clone(),
374            QueryExpr::Constant(value, _) => format!("\"{}\"", value),
375            QueryExpr::Op(op) => match op {
376                QueryOp::Join {
377                    relation,
378                    subject,
379                    object,
380                } => {
381                    format!(
382                        "(JOIN {:?} {} {})",
383                        relation,
384                        Self::expr_to_sexp(subject),
385                        Self::expr_to_sexp(object)
386                    )
387                }
388                QueryOp::And(exprs) => {
389                    let inner: Vec<_> = exprs.iter().map(Self::expr_to_sexp).collect();
390                    format!("(AND {})", inner.join(" "))
391                }
392                QueryOp::Or(exprs) => {
393                    let inner: Vec<_> = exprs.iter().map(Self::expr_to_sexp).collect();
394                    format!("(OR {})", inner.join(" "))
395                }
396                QueryOp::Values(vals) => {
397                    format!("(VALUES {})", vals.join(" "))
398                }
399                QueryOp::Filter { source, predicate } => {
400                    format!("(FILTER {} {:?})", Self::expr_to_sexp(source), predicate)
401                }
402                QueryOp::Count(inner) => {
403                    format!("(COUNT {})", Self::expr_to_sexp(inner))
404                }
405                QueryOp::Superlative {
406                    source,
407                    property,
408                    direction,
409                } => {
410                    let dir = match direction {
411                        SuperlativeDir::Max => "ARGMAX",
412                        SuperlativeDir::Min => "ARGMIN",
413                    };
414                    format!("({} {} {})", dir, Self::expr_to_sexp(source), property)
415                }
416            },
417        }
418    }
419}
420
421/// Result of executing a query core
422#[derive(Debug, Clone)]
423pub struct QueryResult {
424    /// Result values
425    pub values: Vec<QueryResultValue>,
426    /// Total count (if count query)
427    pub count: Option<usize>,
428    /// Whether the query succeeded
429    pub success: bool,
430    /// Error message (if any)
431    pub error: Option<String>,
432}
433
434/// A single result value
435#[derive(Debug, Clone)]
436pub struct QueryResultValue {
437    /// The value
438    pub value: String,
439    /// Entity type (if known)
440    pub entity_type: Option<EntityType>,
441    /// Score/relevance
442    pub score: f32,
443    /// Additional metadata
444    pub metadata: HashMap<String, String>,
445}
446
447impl Default for QueryResult {
448    fn default() -> Self {
449        Self {
450            values: Vec::new(),
451            count: None,
452            success: true,
453            error: None,
454        }
455    }
456}
457
458impl QueryResult {
459    /// Create an empty successful result
460    pub fn empty() -> Self {
461        Self::default()
462    }
463
464    /// Create an error result
465    pub fn error(msg: &str) -> Self {
466        Self {
467            values: Vec::new(),
468            count: None,
469            success: false,
470            error: Some(msg.to_string()),
471        }
472    }
473
474    /// Create a result with values
475    pub fn with_values(values: Vec<QueryResultValue>) -> Self {
476        Self {
477            count: Some(values.len()),
478            values,
479            success: true,
480            error: None,
481        }
482    }
483}
484
485/// Question pattern for classification
486struct QuestionPattern {
487    regex: &'static Regex,
488    question_type: QuestionType,
489    relation: Option<RelationType>,
490}
491
492/// Query core extractor
493pub struct QueryCoreExtractor {
494    /// Patterns for question classification
495    patterns: Vec<QuestionPattern>,
496}
497
498impl QueryCoreExtractor {
499    /// Create a new query core extractor
500    pub fn new() -> Self {
501        Self {
502            patterns: Self::build_patterns(),
503        }
504    }
505
506    fn build_patterns() -> Vec<QuestionPattern> {
507        vec![
508            // Definition patterns
509            QuestionPattern {
510                regex: &RE_WHAT_IS,
511                question_type: QuestionType::Definition,
512                relation: Some(RelationType::Defines),
513            },
514            QuestionPattern {
515                regex: &RE_EXPLAIN,
516                question_type: QuestionType::Definition,
517                relation: Some(RelationType::Defines),
518            },
519            // Location patterns
520            QuestionPattern {
521                regex: &RE_WHERE_IS,
522                question_type: QuestionType::Location,
523                relation: Some(RelationType::Contains),
524            },
525            QuestionPattern {
526                regex: &RE_WHICH_FILE,
527                question_type: QuestionType::Location,
528                relation: Some(RelationType::Contains),
529            },
530            QuestionPattern {
531                regex: &RE_FIND_IN,
532                question_type: QuestionType::Location,
533                relation: Some(RelationType::Contains),
534            },
535            // Dependency patterns
536            QuestionPattern {
537                regex: &RE_WHAT_USES,
538                question_type: QuestionType::Dependency,
539                relation: Some(RelationType::DependsOn),
540            },
541            QuestionPattern {
542                regex: &RE_WHAT_DOES_USE,
543                question_type: QuestionType::Dependency,
544                relation: Some(RelationType::DependsOn),
545            },
546            QuestionPattern {
547                regex: &RE_SHOW_DEPS,
548                question_type: QuestionType::Dependency,
549                relation: Some(RelationType::DependsOn),
550            },
551            // Count patterns
552            QuestionPattern {
553                regex: &RE_HOW_MANY,
554                question_type: QuestionType::Count,
555                relation: None,
556            },
557            QuestionPattern {
558                regex: &RE_COUNT,
559                question_type: QuestionType::Count,
560                relation: None,
561            },
562            // Superlative patterns
563            QuestionPattern {
564                regex: &RE_WHICH_MOST,
565                question_type: QuestionType::Superlative,
566                relation: None,
567            },
568            QuestionPattern {
569                regex: &RE_LARGEST,
570                question_type: QuestionType::Superlative,
571                relation: None,
572            },
573            // Enumeration patterns
574            QuestionPattern {
575                regex: &RE_LIST,
576                question_type: QuestionType::Enumeration,
577                relation: None,
578            },
579            QuestionPattern {
580                regex: &RE_SHOW,
581                question_type: QuestionType::Enumeration,
582                relation: None,
583            },
584            // Boolean patterns
585            QuestionPattern {
586                regex: &RE_DOES_USE,
587                question_type: QuestionType::Boolean,
588                relation: Some(RelationType::DependsOn),
589            },
590            QuestionPattern {
591                regex: &RE_IS_USED_BY,
592                question_type: QuestionType::Boolean,
593                relation: Some(RelationType::DependsOn),
594            },
595        ]
596    }
597
598    /// Extract a query core from natural language
599    pub fn extract(&self, query: &str, entities: &[(String, EntityType)]) -> Option<QueryCore> {
600        // Classify the question
601        let (question_type, relation) = self.classify_question(query);
602
603        if question_type == QuestionType::Unknown {
604            return None;
605        }
606
607        // Find mentioned entities in the query
608        let mentioned: Vec<_> = entities
609            .iter()
610            .filter(|(name, _)| query.to_lowercase().contains(&name.to_lowercase()))
611            .cloned()
612            .collect();
613
614        // Build the query expression based on question type
615        let root = match question_type {
616            QuestionType::Definition => {
617                if let Some((name, entity_type)) = mentioned.first() {
618                    QueryExpr::join(
619                        RelationType::Defines,
620                        QueryExpr::var("definer"),
621                        QueryExpr::constant(name, entity_type.clone()),
622                    )
623                } else {
624                    return None;
625                }
626            }
627            QuestionType::Location => {
628                if let Some((name, entity_type)) = mentioned.first() {
629                    QueryExpr::join(
630                        RelationType::Contains,
631                        QueryExpr::var("container"),
632                        QueryExpr::constant(name, entity_type.clone()),
633                    )
634                } else {
635                    return None;
636                }
637            }
638            QuestionType::Dependency => {
639                let rel = relation.unwrap_or(RelationType::DependsOn);
640                if let Some((name, entity_type)) = mentioned.first() {
641                    // Determine direction based on query wording
642                    if query.to_lowercase().contains("what uses")
643                        || query.to_lowercase().contains("what depends on")
644                    {
645                        // X depends on <entity>
646                        QueryExpr::join(
647                            rel,
648                            QueryExpr::var("dependent"),
649                            QueryExpr::constant(name, entity_type.clone()),
650                        )
651                    } else {
652                        // <entity> depends on X
653                        QueryExpr::join(
654                            rel,
655                            QueryExpr::constant(name, entity_type.clone()),
656                            QueryExpr::var("dependency"),
657                        )
658                    }
659                } else {
660                    return None;
661                }
662            }
663            QuestionType::Count => {
664                // Count query - wrap in COUNT
665                if let Some((name, entity_type)) = mentioned.first() {
666                    QueryExpr::count(QueryExpr::join(
667                        RelationType::Contains,
668                        QueryExpr::var("container"),
669                        QueryExpr::constant(name, entity_type.clone()),
670                    ))
671                } else {
672                    // General count query
673                    QueryExpr::count(QueryExpr::var("entity"))
674                }
675            }
676            QuestionType::Superlative => {
677                // Superlative - determine property and direction
678                let direction = if query.to_lowercase().contains("most")
679                    || query.to_lowercase().contains("largest")
680                    || query.to_lowercase().contains("highest")
681                {
682                    SuperlativeDir::Max
683                } else {
684                    SuperlativeDir::Min
685                };
686
687                QueryExpr::Op(QueryOp::Superlative {
688                    source: Box::new(QueryExpr::var("entity")),
689                    property: "mention_count".to_string(),
690                    direction,
691                })
692            }
693            QuestionType::Enumeration => {
694                // List all entities of a type
695                if let Some((name, entity_type)) = mentioned.first() {
696                    QueryExpr::join(
697                        RelationType::Contains,
698                        QueryExpr::var("container"),
699                        QueryExpr::constant(name, entity_type.clone()),
700                    )
701                } else {
702                    QueryExpr::var("entity")
703                }
704            }
705            QuestionType::Boolean => {
706                if mentioned.len() >= 2 {
707                    let rel = relation.unwrap_or(RelationType::DependsOn);
708                    QueryExpr::join(
709                        rel,
710                        QueryExpr::constant(&mentioned[0].0, mentioned[0].1.clone()),
711                        QueryExpr::constant(&mentioned[1].0, mentioned[1].1.clone()),
712                    )
713                } else {
714                    return None;
715                }
716            }
717            QuestionType::MultiHop | QuestionType::Unknown => {
718                return None;
719            }
720        };
721
722        Some(QueryCore::new(
723            question_type,
724            root,
725            mentioned,
726            query.to_string(),
727        ))
728    }
729
730    /// Classify a question by type
731    pub fn classify_question(&self, query: &str) -> (QuestionType, Option<RelationType>) {
732        for pattern in &self.patterns {
733            if pattern.regex.is_match(query) {
734                return (pattern.question_type.clone(), pattern.relation.clone());
735            }
736        }
737        (QuestionType::Unknown, None)
738    }
739}
740
741impl Default for QueryCoreExtractor {
742    fn default() -> Self {
743        Self::new()
744    }
745}
746
747/// Query executor for running query cores against a relationship graph
748pub struct QueryExecutor<'a> {
749    graph: &'a dyn RelationshipGraphT,
750}
751
752impl<'a> QueryExecutor<'a> {
753    /// Create a new query executor
754    pub fn new(graph: &'a dyn RelationshipGraphT) -> Self {
755        Self { graph }
756    }
757
758    /// Execute a query core
759    pub fn execute(&self, query: &QueryCore) -> QueryResult {
760        self.execute_expr(&query.root)
761    }
762
763    fn execute_expr(&self, expr: &QueryExpr) -> QueryResult {
764        match expr {
765            QueryExpr::Variable(_) => {
766                // Return all entities as candidates
767                let values: Vec<_> = self
768                    .graph
769                    .search("", 100)
770                    .iter()
771                    .map(|node| QueryResultValue {
772                        value: node.entity_name.clone(),
773                        entity_type: Some(node.entity_type.clone()),
774                        score: node.importance,
775                        metadata: HashMap::new(),
776                    })
777                    .collect();
778                QueryResult::with_values(values)
779            }
780            QueryExpr::Constant(value, entity_type) => {
781                // Return the constant as a single result
782                QueryResult::with_values(vec![QueryResultValue {
783                    value: value.clone(),
784                    entity_type: Some(entity_type.clone()),
785                    score: 1.0,
786                    metadata: HashMap::new(),
787                }])
788            }
789            QueryExpr::Op(op) => self.execute_op(op),
790        }
791    }
792
793    fn execute_op(&self, op: &QueryOp) -> QueryResult {
794        match op {
795            QueryOp::Join {
796                relation,
797                subject,
798                object,
799            } => {
800                // Execute the join
801                let edge_type = relation.to_edge_type();
802
803                // Determine which side is the variable
804                if let QueryExpr::Constant(name, _) = object.as_ref() {
805                    // Find entities that have a relationship to this constant
806                    let neighbors = self.graph.get_neighbors(name);
807                    let edges = self.graph.get_edges(name);
808
809                    let values: Vec<_> = neighbors
810                        .iter()
811                        .zip(edges.iter())
812                        .filter(|(_, edge)| {
813                            edge_type.as_ref().is_none_or(|et| edge.edge_type == *et)
814                        })
815                        .map(|(node, edge)| QueryResultValue {
816                            value: node.entity_name.clone(),
817                            entity_type: Some(node.entity_type.clone()),
818                            score: edge.weight,
819                            metadata: HashMap::new(),
820                        })
821                        .collect();
822
823                    QueryResult::with_values(values)
824                } else if let QueryExpr::Constant(name, _) = subject.as_ref() {
825                    // Find entities that this constant has a relationship with
826                    let neighbors = self.graph.get_neighbors(name);
827                    let edges = self.graph.get_edges(name);
828
829                    let values: Vec<_> = neighbors
830                        .iter()
831                        .zip(edges.iter())
832                        .filter(|(_, edge)| {
833                            edge_type.as_ref().is_none_or(|et| edge.edge_type == *et)
834                        })
835                        .map(|(node, edge)| QueryResultValue {
836                            value: node.entity_name.clone(),
837                            entity_type: Some(node.entity_type.clone()),
838                            score: edge.weight,
839                            metadata: HashMap::new(),
840                        })
841                        .collect();
842
843                    QueryResult::with_values(values)
844                } else {
845                    // Both are variables - return all edges
846                    QueryResult::empty()
847                }
848            }
849            QueryOp::And(exprs) => {
850                // Intersection of results
851                let mut results: Option<Vec<QueryResultValue>> = None;
852
853                for expr in exprs {
854                    let result = self.execute_expr(expr);
855                    if !result.success {
856                        return result;
857                    }
858
859                    if let Some(ref mut existing) = results {
860                        let new_values: std::collections::HashSet<_> =
861                            result.values.iter().map(|v| v.value.clone()).collect();
862                        existing.retain(|v| new_values.contains(&v.value));
863                    } else {
864                        results = Some(result.values);
865                    }
866                }
867
868                QueryResult::with_values(results.unwrap_or_default())
869            }
870            QueryOp::Or(exprs) => {
871                // Union of results
872                let mut values = Vec::new();
873                let mut seen = std::collections::HashSet::new();
874
875                for expr in exprs {
876                    let result = self.execute_expr(expr);
877                    for v in result.values {
878                        if seen.insert(v.value.clone()) {
879                            values.push(v);
880                        }
881                    }
882                }
883
884                QueryResult::with_values(values)
885            }
886            QueryOp::Values(vals) => QueryResult::with_values(
887                vals.iter()
888                    .map(|v| QueryResultValue {
889                        value: v.clone(),
890                        entity_type: None,
891                        score: 1.0,
892                        metadata: HashMap::new(),
893                    })
894                    .collect(),
895            ),
896            QueryOp::Filter { source, predicate } => {
897                let mut result = self.execute_expr(source);
898
899                result.values.retain(|v| match predicate {
900                    FilterPredicate::HasType(t) => v.entity_type.as_ref() == Some(t),
901                    FilterPredicate::NameMatches(pattern) => Regex::new(pattern)
902                        .map(|r| r.is_match(&v.value))
903                        .unwrap_or(false),
904                    FilterPredicate::In(set) => set.contains(&v.value),
905                    FilterPredicate::NotIn(set) => !set.contains(&v.value),
906                    FilterPredicate::Property { name, op, value } => {
907                        if let Some(prop_value) = v.metadata.get(name) {
908                            match op {
909                                CompareOp::Eq => prop_value == value,
910                                CompareOp::Ne => prop_value != value,
911                                CompareOp::Contains => prop_value.contains(value),
912                                CompareOp::StartsWith => prop_value.starts_with(value),
913                                CompareOp::EndsWith => prop_value.ends_with(value),
914                                _ => false, // Lt, Le, Gt, Ge require numeric comparison
915                            }
916                        } else {
917                            false
918                        }
919                    }
920                });
921
922                result.count = Some(result.values.len());
923                result
924            }
925            QueryOp::Count(inner) => {
926                let result = self.execute_expr(inner);
927                QueryResult {
928                    values: Vec::new(),
929                    count: Some(result.values.len()),
930                    success: result.success,
931                    error: result.error,
932                }
933            }
934            QueryOp::Superlative {
935                source,
936                property: _,
937                direction,
938            } => {
939                let mut result = self.execute_expr(source);
940
941                // Sort by score
942                result.values.sort_by(|a, b| match direction {
943                    SuperlativeDir::Max => b
944                        .score
945                        .partial_cmp(&a.score)
946                        .unwrap_or(std::cmp::Ordering::Equal),
947                    SuperlativeDir::Min => a
948                        .score
949                        .partial_cmp(&b.score)
950                        .unwrap_or(std::cmp::Ordering::Equal),
951                });
952
953                // Take the top result
954                result.values.truncate(1);
955                result.count = Some(result.values.len());
956                result
957            }
958        }
959    }
960}
961
962#[cfg(test)]
963mod tests {
964    use super::*;
965
966    #[test]
967    fn test_classify_definition_question() {
968        let extractor = QueryCoreExtractor::new();
969        let (qtype, _) = extractor.classify_question("What is EntityStore?");
970        assert_eq!(qtype, QuestionType::Definition);
971    }
972
973    #[test]
974    fn test_classify_location_question() {
975        let extractor = QueryCoreExtractor::new();
976        let (qtype, _) = extractor.classify_question("Where is main defined?");
977        assert_eq!(qtype, QuestionType::Location);
978    }
979
980    #[test]
981    fn test_classify_dependency_question() {
982        let extractor = QueryCoreExtractor::new();
983        let (qtype, rel) = extractor.classify_question("What uses EntityStore?");
984        assert_eq!(qtype, QuestionType::Dependency);
985        assert_eq!(rel, Some(RelationType::DependsOn));
986    }
987
988    #[test]
989    fn test_classify_count_question() {
990        let extractor = QueryCoreExtractor::new();
991        let (qtype, _) = extractor.classify_question("How many functions are there?");
992        assert_eq!(qtype, QuestionType::Count);
993    }
994
995    #[test]
996    fn test_extract_dependency_query() {
997        let extractor = QueryCoreExtractor::new();
998        let entities = vec![("main.rs".to_string(), EntityType::File)];
999
1000        let core = extractor.extract("What uses main.rs?", &entities);
1001        assert!(core.is_some());
1002
1003        let core = core.unwrap();
1004        assert_eq!(core.question_type, QuestionType::Dependency);
1005
1006        // Verify the S-expression output
1007        let sexp = core.to_sexp();
1008        assert!(sexp.contains("JOIN"));
1009        assert!(sexp.contains("DependsOn"));
1010    }
1011
1012    #[test]
1013    fn test_extract_location_query() {
1014        let extractor = QueryCoreExtractor::new();
1015        let entities = vec![("process_data".to_string(), EntityType::Function)];
1016
1017        let core = extractor.extract("Where is process_data defined?", &entities);
1018        assert!(core.is_some());
1019
1020        let core = core.unwrap();
1021        assert_eq!(core.question_type, QuestionType::Location);
1022    }
1023
1024    #[test]
1025    fn test_query_expr_helpers() {
1026        let var = QueryExpr::var("file");
1027        assert!(var.is_variable());
1028        assert_eq!(var.as_variable(), Some("?file"));
1029
1030        let constant = QueryExpr::constant("main.rs", EntityType::File);
1031        assert!(!constant.is_variable());
1032        assert!(constant.as_variable().is_none());
1033    }
1034
1035    #[test]
1036    fn test_query_result() {
1037        let result = QueryResult::with_values(vec![
1038            QueryResultValue {
1039                value: "test1".to_string(),
1040                entity_type: Some(EntityType::File),
1041                score: 0.9,
1042                metadata: HashMap::new(),
1043            },
1044            QueryResultValue {
1045                value: "test2".to_string(),
1046                entity_type: Some(EntityType::Function),
1047                score: 0.8,
1048                metadata: HashMap::new(),
1049            },
1050        ]);
1051
1052        assert!(result.success);
1053        assert_eq!(result.count, Some(2));
1054        assert_eq!(result.values.len(), 2);
1055    }
1056
1057    #[test]
1058    fn test_query_result_error() {
1059        let result = QueryResult::error("Entity not found");
1060        assert!(!result.success);
1061        assert!(result.error.is_some());
1062    }
1063
1064    #[test]
1065    fn test_relation_type_inverse() {
1066        assert!(RelationType::Contains.inverse().is_some());
1067        assert!(RelationType::DependsOn.inverse().is_some());
1068        assert!(RelationType::CoOccurs.inverse().is_none());
1069    }
1070}