1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
/// *This module is available only if HDT is built with the `"cli"` feature.*
/// Under development, parameters may change.
use bytesize::ByteSize;
use clap::{Parser, Subcommand};
use color_eyre::config::HookBuilder;
use color_eyre::eyre::{Report, WrapErr};
//use log::info;
use fs_err::{File, metadata};
use hdt::Hdt;
use hdt::containers::ControlInfo;
use hdt::header::Header;
use sophia::api::graph::Graph;
use sophia::api::prelude::{TripleSerializer, TripleSource};
//use sophia::api::prelude::Stringifier;
use sophia::inmem::graph::LightGraph;
use sophia::turtle::parser::{nt, turtle};
use sophia::turtle::serializer::nt::NtSerializer;
use sophia::turtle::serializer::turtle::{TurtleConfig, TurtleSerializer};
use std::ffi::OsStr;
use std::io::{BufReader, BufWriter};
use std::path::PathBuf;
use std::time::Instant;
//use std::io::{BufReader, stdin};
/*enum Format {
NTriples,
RdfXml,
Turtle,
}*/
/// convert HDT to N-Triples
#[derive(Parser)]
#[command(version, about, long_about = None)]
struct Args {
#[command(subcommand)]
command: Command,
// /// RDF Format of the output
//format: Format,
// /// verbose output
//verbose: bool,
// disable std reading for now because of usability downside for new users when started with no parameter // the HDT file to load from, if not given it is read from stdin
}
#[derive(Subcommand)]
enum Command {
Info {
input_path: PathBuf,
},
Convert {
// #[arg(short, long)]
// /// export as RDF Turtle, default is N-Triples
// turtle: bool,
/*
#[arg(short, long)]
/// Count triples only, do not print them
count: bool,
*/
/// the HDT file to load from
input_path: PathBuf,
// /// the RDF file to create, if not given it is written to stdout
// rdf_output_path: Option<String>,
output_path: PathBuf,
},
}
fn main() -> Result<(), Report> {
HookBuilder::default().display_env_section(false).install()?;
//env_logger::init();
//env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let args = Args::parse();
/*let hdt = match args.input_path {
Some(filename) => {
let file =
File::open(filename.clone()).wrap_err_with(|| format!("Error opening input file {}", filename))?;
Hdt::read(BufReader::new(file))
.wrap_err_with(|| format!("Error loading HDT from {}", filename))?
//info!("Loaded from file {filename} {hdt:?}");
}
None => {
let reader = BufReader::new(stdin());
Hdt::read(reader).wrap_err("Error loading HDT from standard input")?
//info!("Loaded from stdin {hdt:?}");
}
};*/
match args.command {
Command::Info { input_path } => {
let file = File::open(input_path.clone())
.with_context(|| format!("Error opening input HDT file {input_path:?}"))?;
let mut reader = BufReader::new(file);
match input_path.extension().and_then(OsStr::to_str) {
Some("nt") => {
let triples = nt::parse_bufread(reader).collect_triples();
let g: LightGraph = triples.unwrap();
println!("RDF N-Triples File with ~{} triples", g.triples().size_hint().0);
}
Some("ttl") => {
let triples = turtle::parse_bufread(reader).collect_triples();
let g: LightGraph = triples.unwrap();
println!("RDF Turtle with ~{} triples", g.triples().size_hint().0);
}
Some("hdt") => {
ControlInfo::read(&mut reader)?;
let header = Header::read(&mut reader)?;
//println!("{}",ByteSize(hdt.size_in_bytes() as u64).to_string());
println!("HDT File: {:#?}", header.body);
}
Some(x) => {
eprintln!("Unknown RDF extension {x:?}, aborting.");
}
None => {
println!("File has no extension, RDF format cannot be determined, aborting.");
}
}
}
Command::Convert { input_path, output_path /* turtle*/ } => {
let t = Instant::now();
let file = File::open(input_path.clone())
.with_context(|| format!("Error opening input HDT file {input_path:?}"))?;
let reader = BufReader::new(file);
let hdt = match input_path.extension().and_then(OsStr::to_str) {
Some("hdt") => {
Hdt::read(reader).with_context(|| format!("Error loading input HDT from {input_path:?}"))?
}
Some("nt") => Hdt::read_nt(&input_path)
.with_context(|| format!("Error loading input N-Triples file from {input_path:?}"))?,
_ => {
panic!(
"Input file has unsupported or no extension, RDF format cannot be determined, aborting."
);
}
};
// let count = hdt.triples.len();
/*if args.count {
println!("Parsing returned {} triples", count);
return Ok(());
}*/
let output_file = File::create(&output_path)?;
let mut writer = BufWriter::new(output_file);
match output_path.extension().and_then(OsStr::to_str) {
Some("ttl") => {
let config = TurtleConfig::new().with_pretty(true);
//.with_own_prefix_map(prefixes().clone());
//TurtleSerializer::new_stringifier_with_config(config)
TurtleSerializer::new_with_config(writer, config)
.serialize_graph(&hdt)
.wrap_err("error serializing graph as RDF Turtle")?;
//.to_string()
}
Some("nt") => {
// Default: export the complete graph as N-Triples.
//NtSerializer::new_stringifier()
NtSerializer::new(writer)
.serialize_graph(&hdt)
.wrap_err("error serializing graph as N-Triples")?;
//.to_string()
}
Some("hdt") => {
hdt.write(&mut writer)?;
}
_ => {
panic!(
"Output file has no extension or one signifying an unsupported export format, aborting."
);
}
};
let in_size = ByteSize(metadata(&input_path)?.len());
let out_size = ByteSize(metadata(&output_path)?.len());
println!(
"Successfully converted {input_path:?} ({in_size}) to {output_path:?} ({out_size}) in {:.2}s",
t.elapsed().as_secs_f32()
);
// println!("{s}");
}
}
Ok(())
}