use crate::cli::completion::{CompletionContext, CompletionItem, CompletionType};
use oxirs_core::model::{Object, Predicate, Subject};
use oxirs_core::rdf_store::RdfStore;
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
#[derive(Debug, Clone)]
struct CacheEntry<T> {
data: T,
cached_at: Instant,
ttl: Duration,
}
impl<T> CacheEntry<T> {
fn new(data: T, ttl: Duration) -> Self {
Self {
data,
cached_at: Instant::now(),
ttl,
}
}
fn is_expired(&self) -> bool {
self.cached_at.elapsed() > self.ttl
}
fn get(&self) -> Option<&T> {
if self.is_expired() {
None
} else {
Some(&self.data)
}
}
}
#[derive(Debug, Clone, Default)]
pub struct SchemaInfo {
pub classes: HashSet<String>,
pub properties: HashSet<String>,
pub property_domains: HashMap<String, HashSet<String>>,
pub property_ranges: HashMap<String, HashSet<String>>,
pub property_class_freq: HashMap<(String, String), usize>,
pub triple_count: usize,
}
impl SchemaInfo {
pub fn new() -> Self {
Self::default()
}
pub fn get_domain_classes(&self, property: &str) -> Vec<String> {
self.property_domains
.get(property)
.map(|classes| classes.iter().cloned().collect())
.unwrap_or_default()
}
pub fn get_range_types(&self, property: &str) -> Vec<String> {
self.property_ranges
.get(property)
.map(|ranges| ranges.iter().cloned().collect())
.unwrap_or_default()
}
pub fn get_class_properties(&self, class: &str) -> Vec<(String, usize)> {
let mut props: Vec<(String, usize)> = self
.property_class_freq
.iter()
.filter(|((_, c), _)| c == class)
.map(|((p, _), freq)| (p.clone(), *freq))
.collect();
props.sort_by_key(|item| std::cmp::Reverse(item.1));
props
}
pub fn class_count(&self) -> usize {
self.classes.len()
}
pub fn property_count(&self) -> usize {
self.properties.len()
}
}
#[derive(Debug, Clone)]
pub struct SchemaDiscoveryConfig {
pub max_triples: usize,
pub cache_ttl: Duration,
pub infer_schema: bool,
pub min_frequency: usize,
}
impl Default for SchemaDiscoveryConfig {
fn default() -> Self {
Self {
max_triples: 100_000, cache_ttl: Duration::from_secs(300), infer_schema: true,
min_frequency: 1,
}
}
}
impl SchemaDiscoveryConfig {
pub fn for_small_dataset() -> Self {
Self {
max_triples: 0, cache_ttl: Duration::from_secs(600), infer_schema: true,
min_frequency: 1,
}
}
pub fn for_large_dataset() -> Self {
Self {
max_triples: 50_000, cache_ttl: Duration::from_secs(180), infer_schema: false, min_frequency: 5, }
}
}
pub struct SchemaAutocompleteProvider {
store: Arc<RwLock<RdfStore>>,
schema_cache: Arc<RwLock<Option<CacheEntry<SchemaInfo>>>>,
config: SchemaDiscoveryConfig,
}
impl SchemaAutocompleteProvider {
pub fn new(store: Arc<RwLock<RdfStore>>) -> Self {
Self {
store,
schema_cache: Arc::new(RwLock::new(None)),
config: SchemaDiscoveryConfig::default(),
}
}
pub fn with_config(store: Arc<RwLock<RdfStore>>, config: SchemaDiscoveryConfig) -> Self {
Self {
store,
schema_cache: Arc::new(RwLock::new(None)),
config,
}
}
pub fn discover_schema(&self) -> Result<SchemaInfo, String> {
let store = self.store.read().map_err(|e| e.to_string())?;
let mut schema = SchemaInfo::new();
let mut triple_count = 0;
let all_triples = store.triples().map_err(|e| e.to_string())?;
let triples_to_analyze =
if self.config.max_triples > 0 && all_triples.len() > self.config.max_triples {
&all_triples[..self.config.max_triples]
} else {
&all_triples
};
for triple in triples_to_analyze {
triple_count += 1;
let _subject_str = Self::subject_to_string(triple.subject());
let predicate_str = Self::predicate_to_string(triple.predicate());
let object_str = Self::object_to_string(triple.object());
schema.properties.insert(predicate_str.clone());
if predicate_str == "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>"
|| predicate_str == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|| predicate_str == "rdf:type"
{
schema.classes.insert(object_str.clone());
let key = (predicate_str.clone(), object_str.clone());
*schema.property_class_freq.entry(key).or_insert(0) += 1;
}
if self.config.infer_schema {
if !object_str.is_empty() {
schema
.property_ranges
.entry(predicate_str.clone())
.or_default()
.insert(Self::infer_type(&object_str));
}
}
}
schema.triple_count = triple_count;
Ok(schema)
}
pub fn get_schema(&self) -> Result<SchemaInfo, String> {
{
let cache = self.schema_cache.read().map_err(|e| e.to_string())?;
if let Some(entry) = cache.as_ref() {
if let Some(schema) = entry.get() {
return Ok(schema.clone());
}
}
}
let schema = self.discover_schema()?;
{
let mut cache = self.schema_cache.write().map_err(|e| e.to_string())?;
*cache = Some(CacheEntry::new(schema.clone(), self.config.cache_ttl));
}
Ok(schema)
}
pub fn invalidate_cache(&self) {
if let Ok(mut cache) = self.schema_cache.write() {
*cache = None;
}
}
pub fn suggest_classes(&self, prefix: &str) -> Result<Vec<CompletionItem>, String> {
let schema = self.get_schema()?;
let prefix_lower = prefix.to_lowercase();
let mut suggestions: Vec<CompletionItem> = schema
.classes
.iter()
.filter(|class| class.to_lowercase().contains(&prefix_lower))
.map(|class| CompletionItem {
replacement: class.clone(),
display: Self::extract_local_name(class),
description: Some("Class".to_string()),
completion_type: CompletionType::Value,
})
.collect();
suggestions.sort_by(|a, b| {
let a_starts = a.replacement.to_lowercase().starts_with(&prefix_lower);
let b_starts = b.replacement.to_lowercase().starts_with(&prefix_lower);
match (a_starts, b_starts) {
(true, false) => std::cmp::Ordering::Less,
(false, true) => std::cmp::Ordering::Greater,
_ => a.replacement.cmp(&b.replacement),
}
});
Ok(suggestions)
}
pub fn suggest_properties(
&self,
prefix: &str,
context_class: Option<&str>,
) -> Result<Vec<CompletionItem>, String> {
let schema = self.get_schema()?;
let prefix_lower = prefix.to_lowercase();
let mut suggestions: Vec<(String, usize)> = if let Some(class) = context_class {
schema.get_class_properties(class)
} else {
schema.properties.iter().map(|p| (p.clone(), 1)).collect()
};
suggestions.retain(|(prop, _)| prop.to_lowercase().contains(&prefix_lower));
suggestions.sort_by(|a, b| {
let a_starts = a.0.to_lowercase().starts_with(&prefix_lower);
let b_starts = b.0.to_lowercase().starts_with(&prefix_lower);
match (a_starts, b_starts) {
(true, false) => std::cmp::Ordering::Less,
(false, true) => std::cmp::Ordering::Greater,
_ => b.1.cmp(&a.1).then(a.0.cmp(&b.0)),
}
});
let items: Vec<CompletionItem> = suggestions
.into_iter()
.map(|(prop, freq)| CompletionItem {
replacement: prop.clone(),
display: Self::extract_local_name(&prop),
description: Some(format!("Property (used {} times)", freq)),
completion_type: CompletionType::Variable,
})
.collect();
Ok(items)
}
pub fn get_contextual_suggestions(
&self,
context: &CompletionContext,
) -> Result<Vec<CompletionItem>, String> {
let prefix = &context.current_word;
match self.determine_suggestion_type(context) {
SuggestionType::Class => self.suggest_classes(prefix),
SuggestionType::Property => self.suggest_properties(prefix, None),
SuggestionType::PropertyWithContext(class) => {
self.suggest_properties(prefix, Some(&class))
}
SuggestionType::None => Ok(Vec::new()),
}
}
fn determine_suggestion_type(&self, context: &CompletionContext) -> SuggestionType {
let line_parts: Vec<String> = context.args.clone();
let line = line_parts.join(" ");
let pos = line.len();
let before_cursor = &line[..pos];
if before_cursor.contains("rdf:type") || before_cursor.ends_with(" a ") {
return SuggestionType::Class;
}
if before_cursor.contains("WHERE") && before_cursor.contains("?") {
let tokens: Vec<&str> = before_cursor.split_whitespace().collect();
if tokens.len() >= 2 {
let last_token = tokens[tokens.len() - 1];
let second_last = tokens[tokens.len() - 2];
if second_last.starts_with('?') && !last_token.starts_with('?') {
return SuggestionType::Property;
}
}
}
SuggestionType::None
}
fn subject_to_string(subject: &Subject) -> String {
match subject {
Subject::NamedNode(node) => node.to_string(),
Subject::BlankNode(node) => format!("_:{}", node),
Subject::Variable(var) => format!("?{}", var),
Subject::QuotedTriple(_) => String::new(), }
}
fn predicate_to_string(predicate: &Predicate) -> String {
match predicate {
Predicate::NamedNode(node) => node.to_string(),
Predicate::Variable(var) => format!("?{}", var),
}
}
fn object_to_string(obj: &Object) -> String {
match obj {
Object::NamedNode(node) => node.to_string(),
Object::BlankNode(node) => format!("_:{}", node),
Object::Literal(lit) => lit.value().to_string(),
Object::Variable(var) => format!("?{}", var),
Object::QuotedTriple(_) => String::new(), }
}
fn infer_type(value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
"IRI".to_string()
} else if value.parse::<i64>().is_ok() {
"xsd:integer".to_string()
} else if value.parse::<f64>().is_ok() {
"xsd:decimal".to_string()
} else if value == "true" || value == "false" {
"xsd:boolean".to_string()
} else {
"xsd:string".to_string()
}
}
fn extract_local_name(uri: &str) -> String {
if let Some(pos) = uri.rfind(&['/', '#'][..]) {
uri[pos + 1..].to_string()
} else {
uri.to_string()
}
}
pub fn get_cache_stats(&self) -> Result<CacheStats, String> {
let cache = self.schema_cache.read().map_err(|e| e.to_string())?;
if let Some(entry) = cache.as_ref() {
if let Some(schema) = entry.get() {
return Ok(CacheStats {
is_cached: true,
is_expired: false,
cached_at: Some(entry.cached_at),
class_count: schema.class_count(),
property_count: schema.property_count(),
triple_count: schema.triple_count,
});
} else {
return Ok(CacheStats {
is_cached: true,
is_expired: true,
cached_at: Some(entry.cached_at),
class_count: 0,
property_count: 0,
triple_count: 0,
});
}
}
Ok(CacheStats {
is_cached: false,
is_expired: false,
cached_at: None,
class_count: 0,
property_count: 0,
triple_count: 0,
})
}
}
#[derive(Debug, Clone, PartialEq)]
#[allow(dead_code)]
enum SuggestionType {
Class,
Property,
PropertyWithContext(String),
None,
}
#[derive(Debug, Clone)]
pub struct CacheStats {
pub is_cached: bool,
pub is_expired: bool,
pub cached_at: Option<Instant>,
pub class_count: usize,
pub property_count: usize,
pub triple_count: usize,
}
#[cfg(test)]
mod tests {
use super::*;
use oxirs_core::model::{Literal, NamedNode, Object, Predicate, Subject};
fn create_test_store() -> Arc<RwLock<RdfStore>> {
let mut store = RdfStore::new().expect("Failed to create store");
let rdf_type = Predicate::NamedNode(
NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap(),
);
let foaf_person =
Object::NamedNode(NamedNode::new("http://xmlns.com/foaf/0.1/Person").unwrap());
let foaf_name =
Predicate::NamedNode(NamedNode::new("http://xmlns.com/foaf/0.1/name").unwrap());
let alice = Subject::NamedNode(NamedNode::new("http://example.org/alice").unwrap());
use oxirs_core::model::Triple;
let triple1 = Triple::new(alice.clone(), rdf_type.clone(), foaf_person.clone());
let triple2 = Triple::new(
alice.clone(),
foaf_name.clone(),
Object::Literal(Literal::new_simple_literal("Alice")),
);
store.insert_triple(triple1).ok();
store.insert_triple(triple2).ok();
Arc::new(RwLock::new(store))
}
#[test]
fn test_schema_discovery() {
let store = create_test_store();
let provider = SchemaAutocompleteProvider::new(store);
let schema = provider.discover_schema().unwrap();
eprintln!("Triple count: {}", schema.triple_count);
eprintln!("Classes found: {:?}", schema.classes);
eprintln!("Properties found: {:?}", schema.properties);
assert!(schema.class_count() > 0, "No classes discovered");
assert!(schema.property_count() > 0, "No properties discovered");
assert!(schema.triple_count > 0, "No triples found");
}
#[test]
fn test_class_discovery() {
let store = create_test_store();
let provider = SchemaAutocompleteProvider::new(store);
let schema = provider.get_schema().unwrap();
assert!(schema.classes.iter().any(|c| c.contains("Person")));
}
#[test]
fn test_property_discovery() {
let store = create_test_store();
let provider = SchemaAutocompleteProvider::new(store);
let schema = provider.get_schema().unwrap();
assert!(schema.properties.iter().any(|p| p.contains("name")));
assert!(schema.properties.iter().any(|p| p.contains("type")));
}
#[test]
fn test_class_suggestions() {
let store = create_test_store();
let provider = SchemaAutocompleteProvider::new(store);
let suggestions = provider.suggest_classes("Per").unwrap();
assert!(!suggestions.is_empty());
assert!(suggestions.iter().any(|s| s.display.contains("Person")));
}
#[test]
fn test_property_suggestions() {
let store = create_test_store();
let provider = SchemaAutocompleteProvider::new(store);
let suggestions = provider.suggest_properties("name", None).unwrap();
assert!(!suggestions.is_empty());
assert!(suggestions.iter().any(|s| s.display.contains("name")));
}
#[test]
fn test_cache_functionality() {
let store = create_test_store();
let provider = SchemaAutocompleteProvider::new(store);
let schema1 = provider.get_schema().unwrap();
let stats1 = provider.get_cache_stats().unwrap();
assert!(stats1.is_cached);
let schema2 = provider.get_schema().unwrap();
assert_eq!(schema1.class_count(), schema2.class_count());
provider.invalidate_cache();
let stats2 = provider.get_cache_stats().unwrap();
assert!(!stats2.is_cached);
}
#[test]
fn test_cache_expiration() {
let store = create_test_store();
let config = SchemaDiscoveryConfig {
cache_ttl: Duration::from_millis(10), ..Default::default()
};
let provider = SchemaAutocompleteProvider::with_config(store, config);
provider.get_schema().ok();
std::thread::sleep(Duration::from_millis(20));
let stats = provider.get_cache_stats().unwrap();
assert!(stats.is_cached);
assert!(stats.is_expired);
}
#[test]
fn test_empty_store() {
let empty_store = Arc::new(RwLock::new(
RdfStore::new().expect("Failed to create store"),
));
let provider = SchemaAutocompleteProvider::new(empty_store);
let schema = provider.get_schema().unwrap();
assert_eq!(schema.class_count(), 0);
assert_eq!(schema.property_count(), 0);
}
#[test]
fn test_local_name_extraction() {
assert_eq!(
SchemaAutocompleteProvider::extract_local_name("http://xmlns.com/foaf/0.1/Person"),
"Person"
);
assert_eq!(
SchemaAutocompleteProvider::extract_local_name("http://example.org#name"),
"name"
);
assert_eq!(
SchemaAutocompleteProvider::extract_local_name("simple"),
"simple"
);
}
#[test]
fn test_type_inference() {
assert_eq!(
SchemaAutocompleteProvider::infer_type("http://example.org"),
"IRI"
);
assert_eq!(SchemaAutocompleteProvider::infer_type("42"), "xsd:integer");
assert_eq!(
SchemaAutocompleteProvider::infer_type("3.14"),
"xsd:decimal"
);
assert_eq!(
SchemaAutocompleteProvider::infer_type("true"),
"xsd:boolean"
);
assert_eq!(
SchemaAutocompleteProvider::infer_type("hello"),
"xsd:string"
);
}
#[test]
fn test_small_dataset_config() {
let config = SchemaDiscoveryConfig::for_small_dataset();
assert_eq!(config.max_triples, 0); assert!(config.infer_schema);
}
#[test]
fn test_large_dataset_config() {
let config = SchemaDiscoveryConfig::for_large_dataset();
assert_eq!(config.max_triples, 50_000);
assert!(!config.infer_schema); }
}