1use clap::Args;
23use ggen_utils::error::Result;
24use std::path::{Component, Path};
25
26#[derive(Args, Debug)]
27pub struct LoadArgs {
28 pub file: String,
30
31 #[arg(long)]
33 pub format: Option<String>,
34
35 #[arg(long)]
37 pub base: Option<String>,
38
39 #[arg(long)]
41 pub merge: bool,
42}
43
44#[cfg_attr(test, mockall::automock)]
45pub trait RdfLoader {
46 fn load(
47 &self, file: String, format: Option<String>, base: Option<String>, merge: bool,
48 ) -> Result<LoadStats>;
49}
50
51#[derive(Debug, Clone)]
52pub struct LoadStats {
53 pub triples_loaded: usize,
54 pub total_triples: usize,
55 pub format_detected: String,
56}
57
58fn validate_file_path(file: &str) -> Result<()> {
60 if file.trim().is_empty() {
62 return Err(ggen_utils::error::Error::new("File path cannot be empty"));
63 }
64
65 if file.len() > 1000 {
67 return Err(ggen_utils::error::Error::new(
68 "File path too long (max 1000 characters)",
69 ));
70 }
71
72 let path = Path::new(file);
74 if path.components().any(|c| matches!(c, Component::ParentDir)) {
75 return Err(ggen_utils::error::Error::new(
76 "Path traversal detected: paths containing '..' are not allowed",
77 ));
78 }
79
80 if !file
82 .chars()
83 .all(|c| c.is_alphanumeric() || c == '.' || c == '/' || c == '-' || c == '_' || c == '\\')
84 {
85 return Err(ggen_utils::error::Error::new(
86 "Invalid file path format: only alphanumeric characters, dots, slashes, dashes, underscores, and backslashes allowed",
87 ));
88 }
89
90 Ok(())
91}
92
93fn validate_format(format: &Option<String>) -> Result<()> {
95 if let Some(format) = format {
96 if format.trim().is_empty() {
98 return Err(ggen_utils::error::Error::new("Format cannot be empty"));
99 }
100
101 if format.len() > 50 {
103 return Err(ggen_utils::error::Error::new(
104 "Format too long (max 50 characters)",
105 ));
106 }
107
108 if !format
110 .chars()
111 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
112 {
113 return Err(ggen_utils::error::Error::new(
114 "Invalid format: only alphanumeric characters, dashes, and underscores allowed",
115 ));
116 }
117
118 let valid_formats = ["turtle", "ntriples", "rdfxml", "jsonld", "n3"];
120 if !valid_formats.contains(&format.to_lowercase().as_str()) {
121 return Err(ggen_utils::error::Error::new(
122 "Unsupported format: supported formats are turtle, ntriples, rdfxml, jsonld, n3",
123 ));
124 }
125 }
126
127 Ok(())
128}
129
130fn validate_base_iri(base: &Option<String>) -> Result<()> {
132 if let Some(base) = base {
133 if base.trim().is_empty() {
135 return Err(ggen_utils::error::Error::new("Base IRI cannot be empty"));
136 }
137
138 if base.len() > 500 {
140 return Err(ggen_utils::error::Error::new(
141 "Base IRI too long (max 500 characters)",
142 ));
143 }
144
145 if !base.starts_with("http://")
147 && !base.starts_with("https://")
148 && !base.starts_with("file://")
149 {
150 return Err(ggen_utils::error::Error::new(
151 "Invalid base IRI: must start with http://, https://, or file://",
152 ));
153 }
154 }
155
156 Ok(())
157}
158
159fn detect_format_from_extension(filename: &str) -> &'static str {
161 let path = std::path::Path::new(filename);
162 match path.extension().and_then(|ext| ext.to_str()) {
163 Some("ttl") | Some("turtle") => "turtle",
164 Some("nt") | Some("ntriples") => "ntriples",
165 Some("rdf") | Some("xml") => "rdfxml",
166 Some("jsonld") | Some("json") => "jsonld",
167 Some("n3") => "n3",
168 _ => "turtle", }
170}
171
172pub async fn run(args: &LoadArgs) -> Result<()> {
173 validate_file_path(&args.file)?;
175 validate_format(&args.format)?;
176 validate_base_iri(&args.base)?;
177
178 println!("📊 Loading RDF graph...");
179
180 let file_path = std::path::Path::new(&args.file);
182 if !file_path.exists() {
183 return Err(ggen_utils::error::Error::new(&format!(
184 "File not found: {}",
185 args.file
186 )));
187 }
188
189 let format = args
191 .format
192 .as_deref()
193 .unwrap_or_else(|| detect_format_from_extension(&args.file));
194
195 println!("📁 Loading file: {}", args.file);
196 println!("🔍 Format: {}", format);
197
198 if let Some(base) = &args.base {
199 println!("🌐 Base IRI: {}", base);
200 }
201
202 let graph = ggen_core::Graph::load_from_file(&args.file)
204 .map_err(|e| ggen_utils::error::Error::new(&format!("Failed to load RDF file: {}", e)))?;
205
206 let triples_count = graph.len();
208
209 if args.merge {
210 println!(
211 "✅ Merged {} triples from {} ({})",
212 triples_count, args.file, format
213 );
214 println!("📊 Total triples in graph: {}", triples_count);
215 } else {
216 println!(
217 "✅ Loaded {} triples from {} ({})",
218 triples_count, args.file, format
219 );
220 }
221
222 Ok(())
223}
224
225pub async fn run_with_deps(args: &LoadArgs, loader: &dyn RdfLoader) -> Result<()> {
226 validate_file_path(&args.file)?;
228 validate_format(&args.format)?;
229 validate_base_iri(&args.base)?;
230
231 println!("🔍 Loading RDF file...");
233
234 let stats = loader.load(
235 args.file.clone(),
236 args.format.clone(),
237 args.base.clone(),
238 args.merge,
239 )?;
240
241 if args.merge {
242 println!(
243 "✅ Loaded {} triples from {} ({})",
244 stats.triples_loaded, args.file, stats.format_detected
245 );
246 println!("📊 Total triples in graph: {}", stats.total_triples);
247 } else {
248 println!(
249 "✅ Loaded {} triples from {} ({})",
250 stats.triples_loaded, args.file, stats.format_detected
251 );
252 }
253
254 Ok(())
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260 use mockall::predicate::*;
261
262 #[tokio::test]
263 async fn test_load_rdf_file() {
264 let mut mock_loader = MockRdfLoader::new();
265 mock_loader
266 .expect_load()
267 .with(
268 eq(String::from("data.ttl")),
269 eq(Some(String::from("turtle"))),
270 eq(None::<String>),
271 eq(false),
272 )
273 .times(1)
274 .returning(|_, _, _, _| {
275 Ok(LoadStats {
276 triples_loaded: 100,
277 total_triples: 100,
278 format_detected: "Turtle".to_string(),
279 })
280 });
281
282 let args = LoadArgs {
283 file: "data.ttl".to_string(),
284 format: Some("turtle".to_string()),
285 base: None,
286 merge: false,
287 };
288
289 let result = run_with_deps(&args, &mock_loader).await;
290 assert!(result.is_ok());
291 }
292
293 #[tokio::test]
294 async fn test_load_with_merge() {
295 let mut mock_loader = MockRdfLoader::new();
296 mock_loader
297 .expect_load()
298 .with(
299 eq(String::from("additional.ttl")),
300 always(),
301 always(),
302 eq(true),
303 )
304 .times(1)
305 .returning(|_, _, _, _| {
306 Ok(LoadStats {
307 triples_loaded: 50,
308 total_triples: 150,
309 format_detected: "Turtle".to_string(),
310 })
311 });
312
313 let args = LoadArgs {
314 file: "additional.ttl".to_string(),
315 format: None,
316 base: None,
317 merge: true,
318 };
319
320 let result = run_with_deps(&args, &mock_loader).await;
321 assert!(result.is_ok());
322 }
323
324 #[tokio::test]
325 async fn test_load_with_base_iri() {
326 let mut mock_loader = MockRdfLoader::new();
327 mock_loader
328 .expect_load()
329 .with(
330 eq(String::from("relative.ttl")),
331 always(),
332 eq(Some(String::from("http://example.org/"))),
333 eq(false),
334 )
335 .times(1)
336 .returning(|_, _, _, _| {
337 Ok(LoadStats {
338 triples_loaded: 25,
339 total_triples: 25,
340 format_detected: "Turtle".to_string(),
341 })
342 });
343
344 let args = LoadArgs {
345 file: "relative.ttl".to_string(),
346 format: None,
347 base: Some("http://example.org/".to_string()),
348 merge: false,
349 };
350
351 let result = run_with_deps(&args, &mock_loader).await;
352 assert!(result.is_ok());
353 }
354}