1use std::collections::HashMap;
30use std::hash::BuildHasher;
31
32use panproto_gat::Theory;
33use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
34
35use crate::error::ProtocolError;
36use crate::theories;
37
38#[must_use]
40pub fn protocol() -> Protocol {
41 Protocol {
42 name: "raw_file".into(),
43 schema_theory: "ThRawFileSchema".into(),
44 instance_theory: "ThRawFileInstance".into(),
45 schema_composition: None,
46 instance_composition: None,
47 edge_rules: vec![
48 EdgeRule {
49 edge_kind: "line-of".into(),
50 src_kinds: vec!["file".into()],
51 tgt_kinds: vec!["line".into()],
52 },
53 EdgeRule {
54 edge_kind: "chunk-of".into(),
55 src_kinds: vec!["file".into()],
56 tgt_kinds: vec!["chunk".into()],
57 },
58 ],
59 obj_kinds: vec!["file".into(), "line".into(), "chunk".into()],
60 constraint_sorts: vec![
61 "mime-type".into(),
62 "encoding".into(),
63 "line-number".into(),
64 "content".into(),
65 "content-length".into(),
66 "content-hash".into(),
67 ],
68 has_order: true,
69 has_coproducts: false,
70 has_recursion: false,
71 has_causal: false,
72 nominal_identity: false,
73 has_defaults: false,
74 has_coercions: false,
75 has_mergers: false,
76 has_policies: false,
77 }
78}
79
80pub fn register_theories<S: BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
85 theories::register_constrained_multigraph_wtype(
86 registry,
87 "ThRawFileSchema",
88 "ThRawFileInstance",
89 );
90}
91
92pub fn parse_text(input: &str, file_path: &str) -> Result<Schema, ProtocolError> {
101 let proto = protocol();
102 let mut builder = SchemaBuilder::new(&proto);
103
104 let file_id = file_path;
106 builder = builder
107 .vertex(file_id, "file", None)
108 .map_err(|e| ProtocolError::Parse(format!("file vertex: {e}")))?;
109
110 let mime = mime_from_path(file_path);
112 builder = builder.constraint(file_id, "mime-type", &mime);
113 builder = builder.constraint(file_id, "encoding", "utf-8");
114
115 for (i, line_text) in input.lines().enumerate() {
117 let line_id = format!("{file_id}::line_{i}");
118 builder = builder
119 .vertex(&line_id, "line", None)
120 .map_err(|e| ProtocolError::Parse(format!("line {i}: {e}")))?;
121
122 builder = builder
123 .edge(file_id, &line_id, "line-of", None)
124 .map_err(|e| ProtocolError::Parse(format!("line-of edge {i}: {e}")))?;
125
126 builder = builder.constraint(&line_id, "content", line_text);
127 builder = builder.constraint(&line_id, "line-number", &i.to_string());
128 }
129
130 builder
131 .build()
132 .map_err(|e| ProtocolError::Parse(format!("build: {e}")))
133}
134
135pub fn parse_binary(file_path: &str, content: &[u8]) -> Result<Schema, ProtocolError> {
144 let proto = protocol();
145 let mut builder = SchemaBuilder::new(&proto);
146
147 let file_id = file_path;
148 builder = builder
149 .vertex(file_id, "file", None)
150 .map_err(|e| ProtocolError::Parse(format!("file vertex: {e}")))?;
151
152 let mime = mime_from_path(file_path);
153 builder = builder.constraint(file_id, "mime-type", &mime);
154 builder = builder.constraint(file_id, "encoding", "binary");
155 builder = builder.constraint(file_id, "content-length", &content.len().to_string());
156
157 let chunk_id = format!("{file_id}::chunk_0");
158 builder = builder
159 .vertex(&chunk_id, "chunk", None)
160 .map_err(|e| ProtocolError::Parse(format!("chunk vertex: {e}")))?;
161
162 builder = builder
163 .edge(file_id, &chunk_id, "chunk-of", None)
164 .map_err(|e| ProtocolError::Parse(format!("chunk-of edge: {e}")))?;
165
166 let hash = blake3::hash(content);
170 let hex = hash.to_hex();
171 builder = builder.constraint(&chunk_id, "content-hash", hex.as_str());
172
173 builder
174 .build()
175 .map_err(|e| ProtocolError::Parse(format!("build: {e}")))
176}
177
178pub fn emit_text(schema: &Schema) -> Result<String, ProtocolError> {
186 let mut lines: Vec<(usize, String)> = Vec::new();
188
189 for (name, vertex) in &schema.vertices {
190 if vertex.kind.as_ref() == "line" {
191 let line_num = schema
192 .constraints
193 .get(name)
194 .and_then(|cs| {
195 cs.iter()
196 .find(|c| c.sort.as_ref() == "line-number")
197 .and_then(|c| c.value.parse::<usize>().ok())
198 })
199 .unwrap_or(lines.len());
200
201 let content = schema
202 .constraints
203 .get(name)
204 .and_then(|cs| {
205 cs.iter()
206 .find(|c| c.sort.as_ref() == "content")
207 .map(|c| c.value.clone())
208 })
209 .unwrap_or_default();
210
211 lines.push((line_num, content));
212 }
213 }
214
215 lines.sort_by_key(|(num, _)| *num);
217
218 let text: Vec<&str> = lines.iter().map(|(_, content)| content.as_str()).collect();
219 let mut result = text.join("\n");
220 if !result.is_empty() {
221 result.push('\n');
222 }
223 Ok(result)
224}
225
226fn mime_from_path(path: &str) -> String {
228 let ext = if path.contains('.') {
231 path.rsplit('.').next().unwrap_or("")
232 } else {
233 ""
234 };
235 match ext.to_lowercase().as_str() {
236 "md" | "markdown" => "text/markdown",
237 "txt" => "text/plain",
238 "json" => "application/json",
239 "yaml" | "yml" => "text/yaml",
240 "toml" => "text/toml",
241 "xml" => "application/xml",
242 "html" | "htm" => "text/html",
243 "css" => "text/css",
244 "svg" => "image/svg+xml",
245 "png" => "image/png",
246 "jpg" | "jpeg" => "image/jpeg",
247 "gif" => "image/gif",
248 "webp" => "image/webp",
249 "pdf" => "application/pdf",
250 "zip" => "application/zip",
251 "tar" => "application/x-tar",
252 "gz" => "application/gzip",
253 "wasm" => "application/wasm",
254 "sh" | "bash" => "text/x-shellscript",
255 "dockerfile" => "text/x-dockerfile",
256 "makefile" => "text/x-makefile",
257 "gitignore" => "text/plain",
258 "env" => "text/plain",
259 "lock" => "text/plain",
260 "cfg" | "ini" => "text/plain",
261 "csv" => "text/csv",
262 "tsv" => "text/tab-separated-values",
263 "log" => "text/plain",
264 _ => "application/octet-stream",
265 }
266 .to_owned()
267}
268
269#[cfg(test)]
270#[allow(clippy::unwrap_used)]
271mod tests {
272 use super::*;
273
274 #[test]
275 fn protocol_def() {
276 let proto = protocol();
277 assert_eq!(proto.name, "raw_file");
278 assert_eq!(proto.obj_kinds.len(), 3);
279 assert_eq!(proto.edge_rules.len(), 2);
280 assert!(proto.has_order);
281 }
282
283 #[test]
284 fn register_theories_works() {
285 let mut registry = HashMap::new();
286 register_theories(&mut registry);
287 assert!(registry.contains_key("ThRawFileSchema"));
288 assert!(registry.contains_key("ThRawFileInstance"));
289 }
290
291 #[test]
292 fn parse_text_file() {
293 let input = "Hello World\nSecond line\nThird line";
294 let schema = parse_text(input, "README.md").unwrap();
295
296 assert_eq!(schema.vertices.len(), 4);
298
299 let file_name: panproto_gat::Name = "README.md".into();
301 let constraints = schema.constraints.get(&file_name).unwrap();
302 let mime = constraints
303 .iter()
304 .find(|c| c.sort.as_ref() == "mime-type")
305 .unwrap();
306 assert_eq!(mime.value, "text/markdown");
307 }
308
309 #[test]
310 fn parse_and_emit_roundtrip() {
311 let input = "line one\nline two\nline three\n";
312 let schema = parse_text(input, "test.txt").unwrap();
313 let output = emit_text(&schema).unwrap();
314 assert_eq!(output, input);
315 }
316
317 #[test]
318 fn parse_empty_file() {
319 let input = "";
320 let schema = parse_text(input, "empty.txt").unwrap();
321 assert_eq!(schema.vertices.len(), 1);
323 }
324
325 #[test]
326 fn parse_binary_file() {
327 let schema = parse_binary("image.png", &[0x89, 0x50, 0x4E, 0x47]).unwrap();
328 assert_eq!(schema.vertices.len(), 2); let file_name: panproto_gat::Name = "image.png".into();
331 let constraints = schema.constraints.get(&file_name).unwrap();
332 let mime = constraints
333 .iter()
334 .find(|c| c.sort.as_ref() == "mime-type")
335 .unwrap();
336 assert_eq!(mime.value, "image/png");
337
338 let encoding = constraints
339 .iter()
340 .find(|c| c.sort.as_ref() == "encoding")
341 .unwrap();
342 assert_eq!(encoding.value, "binary");
343 }
344
345 #[test]
346 fn mime_detection() {
347 assert_eq!(mime_from_path("README.md"), "text/markdown");
348 assert_eq!(mime_from_path("data.json"), "application/json");
349 assert_eq!(mime_from_path("photo.jpg"), "image/jpeg");
350 assert_eq!(mime_from_path("unknown.xyz"), "application/octet-stream");
351 assert_eq!(mime_from_path("Dockerfile"), "application/octet-stream");
354 assert_eq!(mime_from_path("app.dockerfile"), "text/x-dockerfile");
355 }
356}