1pub mod model;
27pub mod parser;
28pub mod schema;
29
30use std::collections::HashMap;
31
32use kyu_extension::{Extension, ProcColumn, ProcParam, ProcRow, ProcedureSignature};
33use kyu_types::{LogicalType, TypedValue};
34use smol_str::SmolStr;
35
36pub use model::{RdfNodeTable, RdfObject, RdfRelTable, RdfSchema, Triple};
37pub use parser::parse_triples;
38pub use schema::infer_schema;
39
40pub struct RdfExtension;
42
43impl RdfExtension {
44 pub fn new() -> Self {
45 Self
46 }
47}
48
49impl Default for RdfExtension {
50 fn default() -> Self {
51 Self::new()
52 }
53}
54
55impl Extension for RdfExtension {
56 fn name(&self) -> &str {
57 "rdf"
58 }
59
60 fn procedures(&self) -> Vec<ProcedureSignature> {
61 vec![
62 ProcedureSignature {
63 name: "stats".into(),
64 params: vec![ProcParam {
65 name: "path".into(),
66 type_desc: "STRING".into(),
67 }],
68 columns: vec![
69 ProcColumn {
70 name: "triple_count".into(),
71 data_type: LogicalType::Int64,
72 },
73 ProcColumn {
74 name: "subject_count".into(),
75 data_type: LogicalType::Int64,
76 },
77 ProcColumn {
78 name: "predicate_count".into(),
79 data_type: LogicalType::Int64,
80 },
81 ProcColumn {
82 name: "type_count".into(),
83 data_type: LogicalType::Int64,
84 },
85 ],
86 },
87 ProcedureSignature {
88 name: "prefixes".into(),
89 params: vec![ProcParam {
90 name: "path".into(),
91 type_desc: "STRING".into(),
92 }],
93 columns: vec![
94 ProcColumn {
95 name: "prefix".into(),
96 data_type: LogicalType::String,
97 },
98 ProcColumn {
99 name: "namespace".into(),
100 data_type: LogicalType::String,
101 },
102 ],
103 },
104 ProcedureSignature {
105 name: "types".into(),
106 params: vec![ProcParam {
107 name: "path".into(),
108 type_desc: "STRING".into(),
109 }],
110 columns: vec![
111 ProcColumn {
112 name: "type_uri".into(),
113 data_type: LogicalType::String,
114 },
115 ProcColumn {
116 name: "local_name".into(),
117 data_type: LogicalType::String,
118 },
119 ProcColumn {
120 name: "count".into(),
121 data_type: LogicalType::Int64,
122 },
123 ],
124 },
125 ]
126 }
127
128 fn execute(
129 &self,
130 procedure: &str,
131 args: &[String],
132 _adjacency: &HashMap<i64, Vec<(i64, f64)>>,
133 ) -> Result<Vec<ProcRow>, String> {
134 let path = args.first().ok_or("rdf.* requires a file path argument")?;
135
136 match procedure {
137 "stats" => exec_stats(path),
138 "prefixes" => exec_prefixes(path),
139 "types" => exec_types(path),
140 _ => Err(format!("unknown procedure: {procedure}")),
141 }
142 }
143}
144
145fn exec_stats(path: &str) -> Result<Vec<ProcRow>, String> {
146 let triples = parse_triples(path).map_err(|e| e.to_string())?;
147
148 let mut subjects = std::collections::HashSet::new();
149 let mut predicates = std::collections::HashSet::new();
150 let mut type_count = 0u64;
151
152 for t in &triples {
153 subjects.insert(&t.subject);
154 predicates.insert(&t.predicate);
155 if t.predicate == model::RDF_TYPE {
156 type_count += 1;
157 }
158 }
159
160 Ok(vec![vec![
161 TypedValue::Int64(triples.len() as i64),
162 TypedValue::Int64(subjects.len() as i64),
163 TypedValue::Int64(predicates.len() as i64),
164 TypedValue::Int64(type_count as i64),
165 ]])
166}
167
168fn exec_prefixes(path: &str) -> Result<Vec<ProcRow>, String> {
169 let prefixes = parser::parse_prefixes(path).map_err(|e| e.to_string())?;
170 Ok(prefixes
171 .into_iter()
172 .map(|(prefix, ns)| {
173 vec![
174 TypedValue::String(SmolStr::new(prefix)),
175 TypedValue::String(SmolStr::new(ns)),
176 ]
177 })
178 .collect())
179}
180
181fn exec_types(path: &str) -> Result<Vec<ProcRow>, String> {
182 let triples = parse_triples(path).map_err(|e| e.to_string())?;
183
184 let mut type_counts: HashMap<String, i64> = HashMap::new();
185 for t in &triples {
186 if t.predicate == model::RDF_TYPE
187 && let RdfObject::Uri(ref uri) = t.object
188 {
189 *type_counts.entry(uri.clone()).or_insert(0) += 1;
190 }
191 }
192
193 let mut rows: Vec<ProcRow> = type_counts
194 .into_iter()
195 .map(|(uri, count)| {
196 let name = model::local_name(&uri).to_string();
197 vec![
198 TypedValue::String(SmolStr::new(&uri)),
199 TypedValue::String(SmolStr::new(name)),
200 TypedValue::Int64(count),
201 ]
202 })
203 .collect();
204 rows.sort_by(|a, b| {
205 let a_count = match &a[2] {
206 TypedValue::Int64(n) => *n,
207 _ => 0,
208 };
209 let b_count = match &b[2] {
210 TypedValue::Int64(n) => *n,
211 _ => 0,
212 };
213 b_count.cmp(&a_count)
214 });
215
216 Ok(rows)
217}