1use std::collections::HashMap;
30use std::hash::BuildHasher;
31
32use panproto_gat::Theory;
33use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
34
35use crate::error::ProtocolError;
36use crate::theories;
37
38#[must_use]
40pub fn protocol() -> Protocol {
41 Protocol {
42 name: "raw_file".into(),
43 schema_theory: "ThRawFileSchema".into(),
44 instance_theory: "ThRawFileInstance".into(),
45 edge_rules: vec![
46 EdgeRule {
47 edge_kind: "line-of".into(),
48 src_kinds: vec!["file".into()],
49 tgt_kinds: vec!["line".into()],
50 },
51 EdgeRule {
52 edge_kind: "chunk-of".into(),
53 src_kinds: vec!["file".into()],
54 tgt_kinds: vec!["chunk".into()],
55 },
56 ],
57 obj_kinds: vec!["file".into(), "line".into(), "chunk".into()],
58 constraint_sorts: vec![
59 "mime-type".into(),
60 "encoding".into(),
61 "line-number".into(),
62 "content".into(),
63 "content-length".into(),
64 "content-hash".into(),
65 ],
66 has_order: true,
67 has_coproducts: false,
68 has_recursion: false,
69 has_causal: false,
70 nominal_identity: false,
71 has_defaults: false,
72 has_coercions: false,
73 has_mergers: false,
74 has_policies: false,
75 }
76}
77
78pub fn register_theories<S: BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
83 theories::register_constrained_multigraph_wtype(
84 registry,
85 "ThRawFileSchema",
86 "ThRawFileInstance",
87 );
88}
89
90pub fn parse_text(input: &str, file_path: &str) -> Result<Schema, ProtocolError> {
99 let proto = protocol();
100 let mut builder = SchemaBuilder::new(&proto);
101
102 let file_id = file_path;
104 builder = builder
105 .vertex(file_id, "file", None)
106 .map_err(|e| ProtocolError::Parse(format!("file vertex: {e}")))?;
107
108 let mime = mime_from_path(file_path);
110 builder = builder.constraint(file_id, "mime-type", &mime);
111 builder = builder.constraint(file_id, "encoding", "utf-8");
112
113 for (i, line_text) in input.lines().enumerate() {
115 let line_id = format!("{file_id}::line_{i}");
116 builder = builder
117 .vertex(&line_id, "line", None)
118 .map_err(|e| ProtocolError::Parse(format!("line {i}: {e}")))?;
119
120 builder = builder
121 .edge(file_id, &line_id, "line-of", None)
122 .map_err(|e| ProtocolError::Parse(format!("line-of edge {i}: {e}")))?;
123
124 builder = builder.constraint(&line_id, "content", line_text);
125 builder = builder.constraint(&line_id, "line-number", &i.to_string());
126 }
127
128 builder
129 .build()
130 .map_err(|e| ProtocolError::Parse(format!("build: {e}")))
131}
132
133pub fn parse_binary(file_path: &str, content: &[u8]) -> Result<Schema, ProtocolError> {
142 let proto = protocol();
143 let mut builder = SchemaBuilder::new(&proto);
144
145 let file_id = file_path;
146 builder = builder
147 .vertex(file_id, "file", None)
148 .map_err(|e| ProtocolError::Parse(format!("file vertex: {e}")))?;
149
150 let mime = mime_from_path(file_path);
151 builder = builder.constraint(file_id, "mime-type", &mime);
152 builder = builder.constraint(file_id, "encoding", "binary");
153 builder = builder.constraint(file_id, "content-length", &content.len().to_string());
154
155 let chunk_id = format!("{file_id}::chunk_0");
156 builder = builder
157 .vertex(&chunk_id, "chunk", None)
158 .map_err(|e| ProtocolError::Parse(format!("chunk vertex: {e}")))?;
159
160 builder = builder
161 .edge(file_id, &chunk_id, "chunk-of", None)
162 .map_err(|e| ProtocolError::Parse(format!("chunk-of edge: {e}")))?;
163
164 let hash = blake3::hash(content);
168 let hex = hash.to_hex();
169 builder = builder.constraint(&chunk_id, "content-hash", hex.as_str());
170
171 builder
172 .build()
173 .map_err(|e| ProtocolError::Parse(format!("build: {e}")))
174}
175
176pub fn emit_text(schema: &Schema) -> Result<String, ProtocolError> {
184 let mut lines: Vec<(usize, String)> = Vec::new();
186
187 for (name, vertex) in &schema.vertices {
188 if vertex.kind.as_ref() == "line" {
189 let line_num = schema
190 .constraints
191 .get(name)
192 .and_then(|cs| {
193 cs.iter()
194 .find(|c| c.sort.as_ref() == "line-number")
195 .and_then(|c| c.value.parse::<usize>().ok())
196 })
197 .unwrap_or(lines.len());
198
199 let content = schema
200 .constraints
201 .get(name)
202 .and_then(|cs| {
203 cs.iter()
204 .find(|c| c.sort.as_ref() == "content")
205 .map(|c| c.value.clone())
206 })
207 .unwrap_or_default();
208
209 lines.push((line_num, content));
210 }
211 }
212
213 lines.sort_by_key(|(num, _)| *num);
215
216 let text: Vec<&str> = lines.iter().map(|(_, content)| content.as_str()).collect();
217 let mut result = text.join("\n");
218 if !result.is_empty() {
219 result.push('\n');
220 }
221 Ok(result)
222}
223
224fn mime_from_path(path: &str) -> String {
226 let ext = if path.contains('.') {
229 path.rsplit('.').next().unwrap_or("")
230 } else {
231 ""
232 };
233 match ext.to_lowercase().as_str() {
234 "md" | "markdown" => "text/markdown",
235 "txt" => "text/plain",
236 "json" => "application/json",
237 "yaml" | "yml" => "text/yaml",
238 "toml" => "text/toml",
239 "xml" => "application/xml",
240 "html" | "htm" => "text/html",
241 "css" => "text/css",
242 "svg" => "image/svg+xml",
243 "png" => "image/png",
244 "jpg" | "jpeg" => "image/jpeg",
245 "gif" => "image/gif",
246 "webp" => "image/webp",
247 "pdf" => "application/pdf",
248 "zip" => "application/zip",
249 "tar" => "application/x-tar",
250 "gz" => "application/gzip",
251 "wasm" => "application/wasm",
252 "sh" | "bash" => "text/x-shellscript",
253 "dockerfile" => "text/x-dockerfile",
254 "makefile" => "text/x-makefile",
255 "gitignore" => "text/plain",
256 "env" => "text/plain",
257 "lock" => "text/plain",
258 "cfg" | "ini" => "text/plain",
259 "csv" => "text/csv",
260 "tsv" => "text/tab-separated-values",
261 "log" => "text/plain",
262 _ => "application/octet-stream",
263 }
264 .to_owned()
265}
266
267#[cfg(test)]
268#[allow(clippy::unwrap_used)]
269mod tests {
270 use super::*;
271
272 #[test]
273 fn protocol_def() {
274 let proto = protocol();
275 assert_eq!(proto.name, "raw_file");
276 assert_eq!(proto.obj_kinds.len(), 3);
277 assert_eq!(proto.edge_rules.len(), 2);
278 assert!(proto.has_order);
279 }
280
281 #[test]
282 fn register_theories_works() {
283 let mut registry = HashMap::new();
284 register_theories(&mut registry);
285 assert!(registry.contains_key("ThRawFileSchema"));
286 assert!(registry.contains_key("ThRawFileInstance"));
287 }
288
289 #[test]
290 fn parse_text_file() {
291 let input = "Hello World\nSecond line\nThird line";
292 let schema = parse_text(input, "README.md").unwrap();
293
294 assert_eq!(schema.vertices.len(), 4);
296
297 let file_name: panproto_gat::Name = "README.md".into();
299 let constraints = schema.constraints.get(&file_name).unwrap();
300 let mime = constraints
301 .iter()
302 .find(|c| c.sort.as_ref() == "mime-type")
303 .unwrap();
304 assert_eq!(mime.value, "text/markdown");
305 }
306
307 #[test]
308 fn parse_and_emit_roundtrip() {
309 let input = "line one\nline two\nline three\n";
310 let schema = parse_text(input, "test.txt").unwrap();
311 let output = emit_text(&schema).unwrap();
312 assert_eq!(output, input);
313 }
314
315 #[test]
316 fn parse_empty_file() {
317 let input = "";
318 let schema = parse_text(input, "empty.txt").unwrap();
319 assert_eq!(schema.vertices.len(), 1);
321 }
322
323 #[test]
324 fn parse_binary_file() {
325 let schema = parse_binary("image.png", &[0x89, 0x50, 0x4E, 0x47]).unwrap();
326 assert_eq!(schema.vertices.len(), 2); let file_name: panproto_gat::Name = "image.png".into();
329 let constraints = schema.constraints.get(&file_name).unwrap();
330 let mime = constraints
331 .iter()
332 .find(|c| c.sort.as_ref() == "mime-type")
333 .unwrap();
334 assert_eq!(mime.value, "image/png");
335
336 let encoding = constraints
337 .iter()
338 .find(|c| c.sort.as_ref() == "encoding")
339 .unwrap();
340 assert_eq!(encoding.value, "binary");
341 }
342
343 #[test]
344 fn mime_detection() {
345 assert_eq!(mime_from_path("README.md"), "text/markdown");
346 assert_eq!(mime_from_path("data.json"), "application/json");
347 assert_eq!(mime_from_path("photo.jpg"), "image/jpeg");
348 assert_eq!(mime_from_path("unknown.xyz"), "application/octet-stream");
349 assert_eq!(mime_from_path("Dockerfile"), "application/octet-stream");
352 assert_eq!(mime_from_path("app.dockerfile"), "text/x-dockerfile");
353 }
354}