Skip to main content

grex_core/
import.rs

1//! `grex import` — ingest legacy flat `REPOS.json` into a manifest.
2//!
3//! Parses `[{url, path}, …]`, classifies each entry by a small heuristic
4//! (git URL → `scripted`, empty/path-only → `declarative`), and dispatches
5//! equivalent add registrations into the target manifest while skipping
6//! any path that already exists.
7//!
8//! Scope (feat-m7-4a):
9//! * Only the flat `REPOS.json` schema from the meta-repo.
10//! * Skip-on-collision (never overwrite).
11//! * `--dry-run` short-circuits before any manifest write.
12//!
13//! Dispatch: each accepted entry is dispatched through
14//! [`crate::add::add_pack`], which is the single shared registration
15//! path used by `grex add` and the MCP edge — so `import` and `add`
16//! emit byte-identical `Event::Add` rows for the same input.
17
18use crate::add::{add_pack, AddError, AddOpts, AddRequest};
19use crate::manifest;
20use crate::pack::validate::child_path::reject_reason;
21use serde::Deserialize;
22use std::path::{Path, PathBuf};
23use thiserror::Error;
24
25/// Pack kind assigned by the import heuristic.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum ImportedKind {
28    Scripted,
29    Declarative,
30}
31
32impl ImportedKind {
33    pub fn as_str(self) -> &'static str {
34        match self {
35            ImportedKind::Scripted => "scripted",
36            ImportedKind::Declarative => "declarative",
37        }
38    }
39}
40
41/// One entry from a `REPOS.json` array.
42#[derive(Debug, Clone, Deserialize)]
43struct RawEntry {
44    #[serde(default)]
45    url: String,
46    path: String,
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct ImportEntry {
51    pub path: String,
52    pub url: String,
53    pub kind: ImportedKind,
54    pub would_dispatch: bool,
55}
56
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum SkipReason {
59    DuplicateInInput,
60    PathCollision,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct ImportSkip {
65    pub path: String,
66    pub reason: SkipReason,
67}
68
69#[derive(Debug, Clone, PartialEq, Eq)]
70pub struct ImportFailure {
71    pub path: String,
72    pub error: String,
73}
74
75#[derive(Debug, Clone, Default, PartialEq, Eq)]
76pub struct ImportPlan {
77    pub imported: Vec<ImportEntry>,
78    pub skipped: Vec<ImportSkip>,
79    pub failed: Vec<ImportFailure>,
80}
81
82#[derive(Debug, Clone, Copy, Default)]
83pub struct ImportOpts {
84    pub dry_run: bool,
85}
86
87#[derive(Debug, Error)]
88pub enum ImportError {
89    #[error("cannot read {path}: {source}")]
90    Io {
91        path: PathBuf,
92        #[source]
93        source: std::io::Error,
94    },
95    #[error("malformed REPOS.json at {path}: {source}")]
96    Parse {
97        path: PathBuf,
98        #[source]
99        source: serde_json::Error,
100    },
101    #[error("manifest write failed: {0}")]
102    Manifest(#[from] manifest::ManifestError),
103}
104
105/// Classify a `REPOS.json` entry into a pack kind.
106pub fn classify(url: &str) -> ImportedKind {
107    let trimmed = url.trim();
108    if trimmed.is_empty() {
109        return ImportedKind::Declarative;
110    }
111    let low = trimmed.to_ascii_lowercase();
112    let looks_git = low.starts_with("http://")
113        || low.starts_with("https://")
114        || low.starts_with("git@")
115        || low.starts_with("ssh://")
116        || low.starts_with("git://")
117        || low.ends_with(".git");
118    if looks_git {
119        ImportedKind::Scripted
120    } else {
121        ImportedKind::Declarative
122    }
123}
124
125fn parse_repos_json(repos_json: &Path) -> Result<Vec<RawEntry>, ImportError> {
126    let bytes = std::fs::read(repos_json)
127        .map_err(|source| ImportError::Io { path: repos_json.to_path_buf(), source })?;
128    let parsed: Vec<RawEntry> = serde_json::from_slice(&bytes)
129        .map_err(|source| ImportError::Parse { path: repos_json.to_path_buf(), source })?;
130    Ok(parsed)
131}
132
133fn existing_paths(manifest_path: &Path) -> Result<std::collections::HashSet<String>, ImportError> {
134    let events = manifest::read_all(manifest_path)?;
135    let state = manifest::fold(events);
136    Ok(state.values().map(|s| s.path.clone()).collect())
137}
138
139/// Ingest a flat `REPOS.json` into the manifest at `manifest_path`.
140pub fn import_from_repos_json(
141    repos_json: &Path,
142    manifest_path: &Path,
143    opts: ImportOpts,
144) -> Result<ImportPlan, ImportError> {
145    let raw = parse_repos_json(repos_json)?;
146    let existing = existing_paths(manifest_path)?;
147
148    let mut plan = ImportPlan::default();
149    let mut seen_in_input: std::collections::HashSet<String> = std::collections::HashSet::new();
150
151    for entry in raw {
152        let path = entry.path.clone();
153        // Bare-name validation BEFORE any manifest write — refuses to
154        // ingest a `path` that would later trip
155        // `ChildPathValidator` (separators, `.` / `..`, regex
156        // mismatch, empty). Without this gate, `migration.md`'s
157        // promise that import "validates" was untrue: bad rows
158        // landed as `Event::Add` rows that only failed at sync
159        // time. Fail-fast at import is a much friendlier signal.
160        if let Some(reason) = reject_reason(&path) {
161            plan.failed.push(ImportFailure { path, error: format!("invalid `path`: {reason}") });
162            continue;
163        }
164        if existing.contains(&path) {
165            plan.skipped.push(ImportSkip { path, reason: SkipReason::PathCollision });
166            continue;
167        }
168        if !seen_in_input.insert(path.clone()) {
169            plan.skipped.push(ImportSkip { path, reason: SkipReason::DuplicateInInput });
170            continue;
171        }
172        let kind = classify(&entry.url);
173        plan.imported.push(ImportEntry {
174            path,
175            url: entry.url,
176            kind,
177            would_dispatch: opts.dry_run,
178        });
179    }
180
181    if !opts.dry_run {
182        commit_plan(&plan, manifest_path)?;
183    }
184
185    Ok(plan)
186}
187
188fn commit_plan(plan: &ImportPlan, manifest_path: &Path) -> Result<(), ImportError> {
189    for entry in &plan.imported {
190        add_pack(
191            manifest_path,
192            AddRequest {
193                url: entry.url.clone(),
194                path: entry.path.clone(),
195                pack_type: entry.kind.as_str().to_string(),
196            },
197            AddOpts { dry_run: false },
198        )
199        .map_err(add_error_to_import_error)?;
200    }
201    Ok(())
202}
203
204fn add_error_to_import_error(err: AddError) -> ImportError {
205    match err {
206        AddError::Manifest(err) => ImportError::Manifest(err),
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use crate::manifest::{Event, SCHEMA_VERSION};
214    use chrono::Utc;
215    use tempfile::tempdir;
216
217    fn write_json(path: &Path, body: &str) {
218        std::fs::write(path, body).unwrap();
219    }
220
221    #[test]
222    fn classify_https_git_url_is_scripted() {
223        assert_eq!(classify("https://github.com/x/y.git"), ImportedKind::Scripted);
224    }
225
226    #[test]
227    fn classify_http_git_url_is_scripted() {
228        assert_eq!(classify("http://example.com/x.git"), ImportedKind::Scripted);
229    }
230
231    #[test]
232    fn classify_ssh_git_url_is_scripted() {
233        assert_eq!(classify("git@github.com:x/y.git"), ImportedKind::Scripted);
234        assert_eq!(classify("ssh://git@host/x.git"), ImportedKind::Scripted);
235    }
236
237    #[test]
238    fn classify_git_protocol_is_scripted() {
239        assert_eq!(classify("git://host/x.git"), ImportedKind::Scripted);
240    }
241
242    #[test]
243    fn classify_dot_git_suffix_is_scripted() {
244        assert_eq!(classify("some-weird-host/x.git"), ImportedKind::Scripted);
245    }
246
247    #[test]
248    fn classify_empty_url_is_declarative() {
249        assert_eq!(classify(""), ImportedKind::Declarative);
250    }
251
252    #[test]
253    fn classify_whitespace_url_is_declarative() {
254        assert_eq!(classify("   "), ImportedKind::Declarative);
255    }
256
257    #[test]
258    fn classify_bare_path_is_declarative() {
259        assert_eq!(classify("foo/bar"), ImportedKind::Declarative);
260        assert_eq!(classify("my-pack"), ImportedKind::Declarative);
261    }
262
263    #[test]
264    fn classify_case_insensitive() {
265        assert_eq!(classify("HTTPS://X/Y.GIT"), ImportedKind::Scripted);
266    }
267
268    #[test]
269    fn imported_kind_str_is_stable() {
270        assert_eq!(ImportedKind::Scripted.as_str(), "scripted");
271        assert_eq!(ImportedKind::Declarative.as_str(), "declarative");
272    }
273
274    #[test]
275    fn parse_rejects_missing_file() {
276        let dir = tempdir().unwrap();
277        let p = dir.path().join("absent.json");
278        let err = parse_repos_json(&p).unwrap_err();
279        assert!(matches!(err, ImportError::Io { .. }));
280    }
281
282    #[test]
283    fn parse_rejects_malformed_json() {
284        let dir = tempdir().unwrap();
285        let p = dir.path().join("bad.json");
286        write_json(&p, r#"[{"url": "x", "path": "a",}]"#);
287        let err = parse_repos_json(&p).unwrap_err();
288        assert!(matches!(err, ImportError::Parse { .. }));
289    }
290
291    #[test]
292    fn parse_rejects_non_array_shape() {
293        let dir = tempdir().unwrap();
294        let p = dir.path().join("bad.json");
295        write_json(&p, r#"{"url": "x", "path": "a"}"#);
296        let err = parse_repos_json(&p).unwrap_err();
297        assert!(matches!(err, ImportError::Parse { .. }));
298    }
299
300    #[test]
301    fn parse_rejects_entry_missing_path_field() {
302        let dir = tempdir().unwrap();
303        let p = dir.path().join("bad.json");
304        write_json(&p, r#"[{"url": "x"}]"#);
305        let err = parse_repos_json(&p).unwrap_err();
306        assert!(matches!(err, ImportError::Parse { .. }));
307    }
308
309    #[test]
310    fn parse_rejects_array_of_strings() {
311        let dir = tempdir().unwrap();
312        let p = dir.path().join("bad.json");
313        write_json(&p, r#"["foo", "bar"]"#);
314        let err = parse_repos_json(&p).unwrap_err();
315        assert!(matches!(err, ImportError::Parse { .. }));
316    }
317
318    #[test]
319    fn parse_accepts_empty_array() {
320        let dir = tempdir().unwrap();
321        let p = dir.path().join("empty.json");
322        write_json(&p, "[]");
323        let out = parse_repos_json(&p).unwrap();
324        assert!(out.is_empty());
325    }
326
327    #[test]
328    fn parse_accepts_missing_url_field() {
329        let dir = tempdir().unwrap();
330        let p = dir.path().join("ok.json");
331        write_json(&p, r#"[{"path": "foo"}]"#);
332        let out = parse_repos_json(&p).unwrap();
333        assert_eq!(out.len(), 1);
334        assert_eq!(out[0].url, "");
335        assert_eq!(out[0].path, "foo");
336    }
337
338    #[test]
339    fn import_parses_flat_repos_json_three_entries() {
340        let dir = tempdir().unwrap();
341        let input = dir.path().join("REPOS.json");
342        let manifest = dir.path().join(".grex/events.jsonl");
343        write_json(
344            &input,
345            r#"[
346                {"url": "https://github.com/a/a.git", "path": "a"},
347                {"url": "git@github.com:b/b.git", "path": "b"},
348                {"url": "", "path": "c"}
349            ]"#,
350        );
351        let plan = import_from_repos_json(&input, &manifest, ImportOpts { dry_run: true }).unwrap();
352        assert_eq!(plan.imported.len(), 3);
353        assert!(plan.skipped.is_empty());
354        assert!(plan.failed.is_empty());
355        assert_eq!(plan.imported[0].kind, ImportedKind::Scripted);
356        assert_eq!(plan.imported[1].kind, ImportedKind::Scripted);
357        assert_eq!(plan.imported[2].kind, ImportedKind::Declarative);
358        assert!(plan.imported.iter().all(|e| e.would_dispatch));
359    }
360
361    #[test]
362    fn import_dry_run_does_not_write_manifest() {
363        let dir = tempdir().unwrap();
364        let input = dir.path().join("REPOS.json");
365        let manifest = dir.path().join(".grex/events.jsonl");
366        write_json(&input, r#"[{"url": "https://x/y.git", "path": "foo"}]"#);
367        let _ = import_from_repos_json(&input, &manifest, ImportOpts { dry_run: true }).unwrap();
368        assert!(!manifest.exists());
369    }
370
371    #[test]
372    fn import_real_run_appends_one_row_per_entry() {
373        let dir = tempdir().unwrap();
374        let input = dir.path().join("REPOS.json");
375        let manifest = dir.path().join(".grex/events.jsonl");
376        write_json(
377            &input,
378            r#"[
379                {"url": "https://github.com/a/a.git", "path": "a"},
380                {"url": "", "path": "b"}
381            ]"#,
382        );
383        let plan =
384            import_from_repos_json(&input, &manifest, ImportOpts { dry_run: false }).unwrap();
385        assert_eq!(plan.imported.len(), 2);
386        let events = manifest::read_all(&manifest).unwrap();
387        assert_eq!(events.len(), 2);
388        match &events[0] {
389            Event::Add { path, pack_type, .. } => {
390                assert_eq!(path, "a");
391                assert_eq!(pack_type, "scripted");
392            }
393            _ => panic!("expected Add"),
394        }
395        match &events[1] {
396            Event::Add { path, pack_type, .. } => {
397                assert_eq!(path, "b");
398                assert_eq!(pack_type, "declarative");
399            }
400            _ => panic!("expected Add"),
401        }
402    }
403
404    #[test]
405    fn import_real_run_matches_shared_add_event_fields() {
406        let dir = tempdir().unwrap();
407        let input = dir.path().join("REPOS.json");
408        let import_manifest = dir.path().join("grex-import.jsonl");
409        let add_manifest = dir.path().join("grex-add.jsonl");
410        write_json(&input, r#"[{"url": "https://x/y.git", "path": "foo"}]"#);
411
412        import_from_repos_json(&input, &import_manifest, ImportOpts { dry_run: false }).unwrap();
413        crate::add::add_pack(
414            &add_manifest,
415            crate::add::AddRequest {
416                url: "https://x/y.git".into(),
417                path: "foo".into(),
418                pack_type: "scripted".into(),
419            },
420            crate::add::AddOpts { dry_run: false },
421        )
422        .unwrap();
423
424        let import_events = manifest::read_all(&import_manifest).unwrap();
425        let add_events = manifest::read_all(&add_manifest).unwrap();
426        assert_eq!(add_event_fields(&import_events[0]), add_event_fields(&add_events[0]));
427    }
428
429    fn add_event_fields(event: &Event) -> (&str, &str, &str, &str, &str) {
430        match event {
431            Event::Add { id, url, path, pack_type, schema_version, .. } => {
432                (id, url, path, pack_type, schema_version)
433            }
434            _ => panic!("expected Add"),
435        }
436    }
437
438    #[test]
439    fn import_skips_existing_manifest_row() {
440        let dir = tempdir().unwrap();
441        let input = dir.path().join("REPOS.json");
442        let manifest = dir.path().join(".grex/events.jsonl");
443        manifest::append_event(
444            &manifest,
445            &Event::Add {
446                ts: Utc::now(),
447                id: "a".into(),
448                url: "pre".into(),
449                path: "a".into(),
450                pack_type: "declarative".into(),
451                schema_version: SCHEMA_VERSION.into(),
452            },
453        )
454        .unwrap();
455        write_json(
456            &input,
457            r#"[
458                {"url": "https://x/a.git", "path": "a"},
459                {"url": "", "path": "b"}
460            ]"#,
461        );
462        let plan =
463            import_from_repos_json(&input, &manifest, ImportOpts { dry_run: false }).unwrap();
464        assert_eq!(plan.imported.len(), 1);
465        assert_eq!(plan.imported[0].path, "b");
466        assert_eq!(plan.skipped.len(), 1);
467        assert_eq!(plan.skipped[0].path, "a");
468        assert_eq!(plan.skipped[0].reason, SkipReason::PathCollision);
469    }
470
471    #[test]
472    fn import_is_idempotent_on_second_run() {
473        let dir = tempdir().unwrap();
474        let input = dir.path().join("REPOS.json");
475        let manifest = dir.path().join(".grex/events.jsonl");
476        write_json(&input, r#"[{"url": "https://x/y.git", "path": "foo"}]"#);
477        let p1 = import_from_repos_json(&input, &manifest, ImportOpts { dry_run: false }).unwrap();
478        assert_eq!(p1.imported.len(), 1);
479        let p2 = import_from_repos_json(&input, &manifest, ImportOpts { dry_run: false }).unwrap();
480        assert_eq!(p2.imported.len(), 0);
481        assert_eq!(p2.skipped.len(), 1);
482        let events = manifest::read_all(&manifest).unwrap();
483        assert_eq!(events.len(), 1);
484    }
485
486    #[test]
487    fn import_detects_duplicate_paths_within_input() {
488        let dir = tempdir().unwrap();
489        let input = dir.path().join("REPOS.json");
490        let manifest = dir.path().join(".grex/events.jsonl");
491        write_json(
492            &input,
493            r#"[
494                {"url": "https://x/y.git", "path": "foo"},
495                {"url": "https://other.git", "path": "foo"}
496            ]"#,
497        );
498        let plan = import_from_repos_json(&input, &manifest, ImportOpts { dry_run: true }).unwrap();
499        assert_eq!(plan.imported.len(), 1);
500        assert_eq!(plan.skipped.len(), 1);
501        assert_eq!(plan.skipped[0].reason, SkipReason::DuplicateInInput);
502    }
503
504    #[test]
505    fn import_empty_array_produces_empty_plan() {
506        let dir = tempdir().unwrap();
507        let input = dir.path().join("REPOS.json");
508        let manifest = dir.path().join(".grex/events.jsonl");
509        write_json(&input, "[]");
510        let plan =
511            import_from_repos_json(&input, &manifest, ImportOpts { dry_run: false }).unwrap();
512        assert!(plan.imported.is_empty());
513        assert!(plan.skipped.is_empty());
514        assert!(!manifest.exists());
515    }
516
517    #[test]
518    fn import_missing_input_surfaces_io_error() {
519        let dir = tempdir().unwrap();
520        let manifest = dir.path().join(".grex/events.jsonl");
521        let err = import_from_repos_json(
522            &dir.path().join("no-such.json"),
523            &manifest,
524            ImportOpts::default(),
525        )
526        .unwrap_err();
527        assert!(matches!(err, ImportError::Io { .. }));
528    }
529
530    #[test]
531    fn import_malformed_surfaces_parse_error() {
532        let dir = tempdir().unwrap();
533        let input = dir.path().join("REPOS.json");
534        let manifest = dir.path().join(".grex/events.jsonl");
535        write_json(&input, "not json at all");
536        let err = import_from_repos_json(&input, &manifest, ImportOpts::default()).unwrap_err();
537        assert!(matches!(err, ImportError::Parse { .. }));
538    }
539
540    #[test]
541    fn import_rejects_path_with_separator_into_failed() {
542        let dir = tempdir().unwrap();
543        let input = dir.path().join("REPOS.json");
544        let manifest = dir.path().join(".grex/events.jsonl");
545        write_json(
546            &input,
547            r#"[
548                {"url": "https://x/a.git", "path": "../escape"},
549                {"url": "https://x/b.git", "path": "good"}
550            ]"#,
551        );
552        let plan =
553            import_from_repos_json(&input, &manifest, ImportOpts { dry_run: false }).unwrap();
554        assert_eq!(plan.imported.len(), 1, "only the good row imports");
555        assert_eq!(plan.imported[0].path, "good");
556        assert_eq!(plan.failed.len(), 1, "the traversal-bearing row goes to failed");
557        assert_eq!(plan.failed[0].path, "../escape");
558        assert!(
559            plan.failed[0].error.contains("separator"),
560            "error must explain the rejection: {}",
561            plan.failed[0].error,
562        );
563        // Manifest must NOT contain a row for `../escape`.
564        let events = manifest::read_all(&manifest).unwrap();
565        assert!(
566            events.iter().all(|e| !matches!(e, Event::Add { path, .. } if path == "../escape")),
567            "no Event::Add may be written for a rejected path",
568        );
569    }
570
571    #[test]
572    fn import_rejects_dot_dotdot_uppercase_empty() {
573        let dir = tempdir().unwrap();
574        let input = dir.path().join("REPOS.json");
575        let manifest = dir.path().join(".grex/events.jsonl");
576        write_json(
577            &input,
578            r#"[
579                {"url": "u", "path": "."},
580                {"url": "u", "path": ".."},
581                {"url": "u", "path": "Foo"},
582                {"url": "u", "path": ""},
583                {"url": "u", "path": "foo\\bar"}
584            ]"#,
585        );
586        let plan = import_from_repos_json(&input, &manifest, ImportOpts { dry_run: true }).unwrap();
587        assert_eq!(plan.imported.len(), 0);
588        assert_eq!(plan.failed.len(), 5);
589    }
590
591    #[test]
592    fn property_every_imported_entry_matches_classify() {
593        let dir = tempdir().unwrap();
594        let input = dir.path().join("REPOS.json");
595        let manifest = dir.path().join(".grex/events.jsonl");
596        write_json(
597            &input,
598            r#"[
599                {"url": "https://a/a.git", "path": "a"},
600                {"url": "git@b:b/b.git", "path": "b"},
601                {"url": "not-a-url", "path": "c"},
602                {"url": "", "path": "d"},
603                {"url": "git://h/x.git", "path": "e"}
604            ]"#,
605        );
606        let plan = import_from_repos_json(&input, &manifest, ImportOpts { dry_run: true }).unwrap();
607        assert_eq!(plan.imported.len(), 5);
608        for entry in &plan.imported {
609            assert_eq!(entry.kind, classify(&entry.url));
610        }
611    }
612}