oxirs 0.2.4

Command-line interface for OxiRS - import, export, migration, and benchmarking tools
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
//! TDB Dump Tool
//!
//! Export RDF data from TDB databases to various serialization formats.
//! Supports exporting entire datasets or specific named graphs.

use super::{ToolResult, ToolStats};
use colored::Colorize;
use oxirs_tdb::{TdbConfig, TdbStore};
use std::fs::File;
use std::io::{self, Write};
use std::path::PathBuf;
use std::str::FromStr;
use std::time::Instant;

/// Supported RDF serialization formats
#[derive(Debug, Clone, Copy)]
pub enum DumpFormat {
    NTriples,
    NQuads,
    Turtle,
    TriG,
    RdfXml,
    JsonLd,
}

impl FromStr for DumpFormat {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s.to_lowercase().as_str() {
            "ntriples" | "nt" => Ok(Self::NTriples),
            "nquads" | "nq" => Ok(Self::NQuads),
            "turtle" | "ttl" => Ok(Self::Turtle),
            "trig" => Ok(Self::TriG),
            "rdfxml" | "rdf" | "xml" => Ok(Self::RdfXml),
            "jsonld" | "json" => Ok(Self::JsonLd),
            _ => Err(format!(
                "Unknown format: {}. Supported: ntriples, nquads, turtle, trig, rdfxml, jsonld",
                s
            )),
        }
    }
}

impl DumpFormat {
    /// Get file extension for format
    pub fn extension(&self) -> &'static str {
        match self {
            Self::NTriples => "nt",
            Self::NQuads => "nq",
            Self::Turtle => "ttl",
            Self::TriG => "trig",
            Self::RdfXml => "rdf",
            Self::JsonLd => "jsonld",
        }
    }

    /// Get MIME type for format
    pub fn mime_type(&self) -> &'static str {
        match self {
            Self::NTriples => "application/n-triples",
            Self::NQuads => "application/n-quads",
            Self::Turtle => "text/turtle",
            Self::TriG => "application/trig",
            Self::RdfXml => "application/rdf+xml",
            Self::JsonLd => "application/ld+json",
        }
    }
}

/// Run TDB dump command
pub async fn run(
    location: PathBuf,
    output: Option<PathBuf>,
    format: String,
    graph: Option<String>,
) -> ToolResult {
    let mut stats = ToolStats::new();

    println!("{}", "".repeat(70).bright_blue());
    println!("{}", "  OxiRS TDB Database Export".bright_green().bold());
    println!("{}", "".repeat(70).bright_blue());
    println!();

    // Parse dump format
    let dump_format: DumpFormat = format
        .parse()
        .map_err(|e| Box::new(io::Error::new(io::ErrorKind::InvalidInput, e)))?;

    // Validate location exists
    if !location.exists() {
        return Err(format!("Dataset location does not exist: {}", location.display()).into());
    }

    if !location.is_dir() {
        return Err(format!(
            "Dataset location must be a directory: {}",
            location.display()
        )
        .into());
    }

    println!(
        "Dataset Location: {}",
        location.display().to_string().cyan()
    );
    println!(
        "Output Format:    {}",
        format!("{} ({})", format.to_uppercase(), dump_format.mime_type()).bright_white()
    );

    if let Some(ref g) = graph {
        println!("Target Graph:     {}", g.cyan());
    } else {
        println!(
            "Target:           {}",
            "All graphs (default graph + named graphs)".dimmed()
        );
    }

    if let Some(ref out) = output {
        println!("Output File:      {}", out.display().to_string().cyan());
    } else {
        println!("Output:           {}", "stdout".dimmed());
    }
    println!();

    // Open store
    println!("{}", "Opening database...".bright_yellow());
    let config = TdbConfig::new(&location);
    let store =
        TdbStore::open_with_config(config).map_err(|e| format!("Failed to open store: {}", e))?;

    println!("{}", "Database opened".green());
    println!();

    // Start export
    println!("{}", "Exporting data...".bright_yellow());
    let export_start = Instant::now();

    // Get triples count for progress
    let store_stats = store.stats();
    let total_triples = store_stats.triple_count;

    if total_triples == 0 {
        println!("{}", "No triples found in database".yellow());
        println!();
        println!("{}", "".repeat(70).bright_blue());
        return Ok(());
    }

    println!(
        "  Found {} triples to export",
        format_number(total_triples).bright_white()
    );

    // Create output writer
    let mut writer: Box<dyn Write> = if let Some(ref out_path) = output {
        Box::new(File::create(out_path)?)
    } else {
        Box::new(io::stdout())
    };

    // Export based on format
    let exported_count = match dump_format {
        DumpFormat::NQuads | DumpFormat::NTriples => {
            // Simple line-based format
            export_ntriples_nquads(&store, &mut writer, graph.as_deref(), dump_format)?
        }
        DumpFormat::Turtle | DumpFormat::TriG => {
            // Turtle/TriG serialization using oxirs-core
            export_turtle_trig(&store, &mut writer, graph.as_deref(), dump_format)?
        }
        DumpFormat::RdfXml => {
            // RDF/XML serialization using oxirs-core
            export_rdfxml(&store, &mut writer, graph.as_deref())?
        }
        DumpFormat::JsonLd => {
            // JSON-LD serialization using oxirs-core
            export_jsonld(&store, &mut writer, graph.as_deref())?
        }
    };

    writer.flush()?;

    let export_duration = export_start.elapsed();

    println!(
        "{}",
        format!("Exported {} triples", format_number(exported_count))
            .green()
            .bold()
    );
    println!();

    // Display summary
    println!("{}", "Export Summary".bright_yellow().bold());
    println!("{}", "".repeat(70));
    println!(
        "  Triples Exported: {}",
        format_number(exported_count).bright_white().bold()
    );
    println!(
        "  Duration:         {}s",
        format!("{:.2}", export_duration.as_secs_f64()).bright_white()
    );

    let throughput = exported_count as f64 / export_duration.as_secs_f64();
    println!(
        "  Throughput:       {} triples/sec",
        format_number(throughput as usize).bright_white()
    );

    if let Some(ref out_path) = output {
        let file_size = std::fs::metadata(out_path)?.len();
        println!(
            "  Output Size:      {}",
            format_bytes(file_size).bright_white()
        );
        println!(
            "  Output File:      {}",
            out_path.display().to_string().cyan()
        );
    }

    println!();
    println!("{}", "".repeat(70).bright_blue());

    stats.items_processed = exported_count;
    stats.finish();

    Ok(())
}

/// Export triples in N-Triples or N-Quads format
fn export_ntriples_nquads(
    store: &TdbStore,
    writer: &mut dyn Write,
    graph: Option<&str>,
    format: DumpFormat,
) -> Result<usize, Box<dyn std::error::Error>> {
    use oxirs_core::format::ntriples::NTriplesSerializer;
    use oxirs_core::model::Triple;

    // Write a comment header
    writeln!(writer, "# OxiRS TDB Export")?;
    writeln!(
        writer,
        "# Format: {}",
        match format {
            DumpFormat::NTriples => "N-Triples",
            DumpFormat::NQuads => "N-Quads",
            _ => "Unknown",
        }
    )?;

    if let Some(g) = graph {
        writeln!(writer, "# Graph: {}", g)?;
    }

    writeln!(writer, "# Exported: {}\n", chrono::Utc::now().to_rfc3339())?;

    // Query all triples from the store
    // Note: TDB is a triple store, not a quad store. The graph parameter is
    // accepted for API consistency but all data comes from the same triple store.
    // For true named graph support, use a quad-based storage backend like oxirs-cluster.
    let triples = store.query_triples(None, None, None)?;

    // Create serializer and serialize each triple
    let serializer = NTriplesSerializer::new();
    let mut writer_serializer = serializer.for_writer(writer);

    let mut count = 0;
    for (s, p, o) in triples {
        // Convert TDB Terms to oxirs-core Terms for serialization
        let subject = term_to_subject(&s)?;
        let predicate = term_to_named_node(&p)?;
        let object = term_to_object(&o)?;

        let triple = Triple::new(subject, predicate, object);

        // Serialize the triple
        writer_serializer.serialize_triple(triple.as_ref())?;
        count += 1;
    }

    writer_serializer.finish()?;

    Ok(count)
}

/// Export triples in Turtle or TriG format
fn export_turtle_trig(
    store: &TdbStore,
    writer: &mut dyn Write,
    _graph: Option<&str>,
    _format: DumpFormat,
) -> Result<usize, Box<dyn std::error::Error>> {
    use oxirs_core::format::turtle::TurtleSerializer;
    use oxirs_core::model::Triple;

    // Query all triples from the store
    // Note: TDB is a triple store, not a quad store. Graph parameter is ignored.
    let triples = store.query_triples(None, None, None)?;

    // Create Turtle serializer
    let serializer = TurtleSerializer::new();
    let mut writer_serializer = serializer.for_writer(writer);

    let mut count = 0;
    for (s, p, o) in triples {
        // Convert TDB Terms to oxirs-core Terms for serialization
        let subject = term_to_subject(&s)?;
        let predicate = term_to_named_node(&p)?;
        let object = term_to_object(&o)?;

        let triple = Triple::new(subject, predicate, object);

        // Serialize the triple
        writer_serializer.serialize_triple(triple.as_ref())?;
        count += 1;
    }

    writer_serializer.finish()?;

    Ok(count)
}

/// Export triples in RDF/XML format
fn export_rdfxml(
    store: &TdbStore,
    writer: &mut dyn Write,
    _graph: Option<&str>,
) -> Result<usize, Box<dyn std::error::Error>> {
    use oxirs_core::format::rdfxml::RdfXmlSerializer;
    use oxirs_core::model::Triple;

    // Query all triples from the store
    // Note: TDB is a triple store, not a quad store. Graph parameter is ignored.
    let triples = store.query_triples(None, None, None)?;

    // Convert to oxirs-core Triple format
    let mut core_triples = Vec::new();
    for (s, p, o) in triples {
        let subject = term_to_subject(&s)?;
        let predicate = term_to_named_node(&p)?;
        let object = term_to_object(&o)?;

        core_triples.push(Triple::new(subject, predicate, object));
    }

    // Create RDF/XML serializer and serialize to string
    let serializer = RdfXmlSerializer::new();
    let serialized = serializer.serialize_to_string(&core_triples)?;

    // Write to output
    writer.write_all(serialized.as_bytes())?;

    Ok(core_triples.len())
}

/// Export triples in JSON-LD format
fn export_jsonld(
    store: &TdbStore,
    writer: &mut dyn Write,
    _graph: Option<&str>,
) -> Result<usize, Box<dyn std::error::Error>> {
    use oxirs_core::format::jsonld::JsonLdSerializer;
    use oxirs_core::model::{GraphName, Quad};

    // Note: TDB is a triple store, not a quad store.
    // The graph parameter is accepted for API consistency but filtered data
    // comes from the same triple store regardless. For true named graph support,
    // use a quad-based storage backend like oxirs-cluster.

    // Query all triples from the store
    let triples = store.query_triples(None, None, None)?;

    // Create JSON-LD serializer
    let serializer = JsonLdSerializer::new();
    let mut writer_serializer = serializer.for_writer(writer);

    let mut count = 0;
    for (s, p, o) in triples {
        // Convert TDB Terms to oxirs-core Terms for serialization
        let subject = term_to_subject(&s)?;
        let predicate = term_to_named_node(&p)?;
        let object = term_to_object(&o)?;

        // JSON-LD works with quads, so we use the default graph
        let quad = Quad::new(subject, predicate, object, GraphName::DefaultGraph);

        // Serialize the quad (triple in default graph)
        writer_serializer.serialize_quad(quad.as_ref())?;
        count += 1;
    }

    writer_serializer.finish()?;

    Ok(count)
}

/// Convert TDB Term to oxirs-core Subject
fn term_to_subject(
    term: &oxirs_tdb::dictionary::Term,
) -> Result<oxirs_core::model::Subject, Box<dyn std::error::Error>> {
    use oxirs_core::model::{BlankNode, NamedNode, Subject};
    use oxirs_tdb::dictionary::Term;

    match term {
        Term::Iri(iri) => Ok(Subject::NamedNode(NamedNode::new(iri.as_str())?)),
        Term::BlankNode(id) => Ok(Subject::BlankNode(BlankNode::new(id.as_str())?)),
        Term::Literal { .. } => Err("Literal cannot be used as subject".into()),
    }
}

/// Convert TDB Term to oxirs-core NamedNode (for predicates)
fn term_to_named_node(
    term: &oxirs_tdb::dictionary::Term,
) -> Result<oxirs_core::model::NamedNode, Box<dyn std::error::Error>> {
    use oxirs_core::model::NamedNode;
    use oxirs_tdb::dictionary::Term;

    match term {
        Term::Iri(iri) => Ok(NamedNode::new(iri.as_str())?),
        _ => Err("Only IRI can be used as predicate".into()),
    }
}

/// Convert TDB Term to oxirs-core Object
fn term_to_object(
    term: &oxirs_tdb::dictionary::Term,
) -> Result<oxirs_core::model::Object, Box<dyn std::error::Error>> {
    use oxirs_core::model::{BlankNode, Literal, NamedNode, Object};
    use oxirs_tdb::dictionary::Term;

    match term {
        Term::Iri(iri) => Ok(Object::NamedNode(NamedNode::new(iri.as_str())?)),
        Term::BlankNode(id) => Ok(Object::BlankNode(BlankNode::new(id.as_str())?)),
        Term::Literal {
            value,
            language,
            datatype,
        } => {
            // Convert TDB literal to oxirs-core literal
            if let Some(lang) = language {
                Ok(Object::Literal(Literal::new_language_tagged_literal(
                    value, lang,
                )?))
            } else if let Some(dt) = datatype {
                let datatype_node = NamedNode::new(dt.as_str())?;
                Ok(Object::Literal(Literal::new_typed_literal(
                    value,
                    datatype_node,
                )))
            } else {
                // Plain literal (xsd:string)
                Ok(Object::Literal(Literal::new_simple_literal(value)))
            }
        }
    }
}

/// Format number with thousands separators
fn format_number(n: usize) -> String {
    let s = n.to_string();
    let mut result = String::new();

    for (count, c) in s.chars().rev().enumerate() {
        if count > 0 && count % 3 == 0 {
            result.push(',');
        }
        result.push(c);
    }

    result.chars().rev().collect()
}

/// Format bytes in human-readable format
fn format_bytes(bytes: u64) -> String {
    const UNITS: [&str; 6] = ["B", "KB", "MB", "GB", "TB", "PB"];

    if bytes == 0 {
        return "0 B".to_string();
    }

    let bytes_f = bytes as f64;
    let unit_index = (bytes_f.log2() / 10.0).floor() as usize;
    let unit_index = unit_index.min(UNITS.len() - 1);

    let value = bytes_f / (1024_f64.powi(unit_index as i32));

    if value < 10.0 {
        format!("{:.2} {}", value, UNITS[unit_index])
    } else if value < 100.0 {
        format!("{:.1} {}", value, UNITS[unit_index])
    } else {
        format!("{:.0} {}", value, UNITS[unit_index])
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_dump_format_parse() {
        assert!(matches!(
            "ntriples".parse::<DumpFormat>(),
            Ok(DumpFormat::NTriples)
        ));
        assert!(matches!(
            "nt".parse::<DumpFormat>(),
            Ok(DumpFormat::NTriples)
        ));
        assert!(matches!(
            "turtle".parse::<DumpFormat>(),
            Ok(DumpFormat::Turtle)
        ));
        assert!(matches!(
            "ttl".parse::<DumpFormat>(),
            Ok(DumpFormat::Turtle)
        ));
        assert!("invalid".parse::<DumpFormat>().is_err());
    }

    #[test]
    fn test_dump_format_extension() {
        assert_eq!(DumpFormat::NTriples.extension(), "nt");
        assert_eq!(DumpFormat::Turtle.extension(), "ttl");
        assert_eq!(DumpFormat::JsonLd.extension(), "jsonld");
    }

    #[test]
    fn test_dump_format_mime_type() {
        assert_eq!(DumpFormat::NTriples.mime_type(), "application/n-triples");
        assert_eq!(DumpFormat::Turtle.mime_type(), "text/turtle");
    }

    #[test]
    fn test_format_number() {
        assert_eq!(format_number(0), "0");
        assert_eq!(format_number(1000), "1,000");
        assert_eq!(format_number(1234567), "1,234,567");
    }

    #[test]
    fn test_format_bytes() {
        assert_eq!(format_bytes(0), "0 B");
        assert_eq!(format_bytes(1024), "1.00 KB");
        assert_eq!(format_bytes(1048576), "1.00 MB");
    }
}