Skip to main content

knowdit_project/
moves.rs

1//! Move-specific project types: platform identifiers, snapshot
2//! discovery, and the audit-report shape that comes with Move projects.
3//!
4//! Lifted from `knowdit_kg::project_loader` so all project-related
5//! data lives in one crate. The LLM-driven analysis methods that
6//! previously hung off these types stay in `knowdit-kg` — this
7//! module is data + filesystem discovery only.
8
9use std::{
10    collections::HashMap,
11    path::{Path, PathBuf},
12};
13
14use color_eyre::eyre::{Result, WrapErr};
15use serde::Deserialize;
16
17/// Which Move ecosystem a project belongs to. Drives the directory
18/// layout used to enumerate snapshots and pair them with audit
19/// findings under `moves/`.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21pub enum MovePlatform {
22    Aptos,
23    Sui,
24}
25
26impl MovePlatform {
27    pub fn as_str(self) -> &'static str {
28        match self {
29            Self::Aptos => "aptos",
30            Self::Sui => "sui",
31        }
32    }
33
34    /// Subdirectory of `moves/` that contains source snapshots for
35    /// this platform.
36    pub fn codebase_dir(self) -> &'static str {
37        match self {
38            Self::Aptos => "_codebase_apt",
39            Self::Sui => "_codebase_sui",
40        }
41    }
42
43    /// Subdirectory of `moves/` that contains vulnerability JSONs for
44    /// this platform.
45    pub fn vulnerability_dir(self) -> &'static str {
46        match self {
47            Self::Aptos => "_vun_apt",
48            Self::Sui => "_vun_sui/vulnerability_snippets",
49        }
50    }
51}
52
53/// One discoverable Move project snapshot, before any source loading.
54/// `snapshot_sort_key` is used to pick the most recent snapshot when
55/// multiple share a commit hash; it's not exposed publicly because
56/// callers only ever sort by it indirectly through
57/// [`list_move_projects`].
58#[derive(Debug, Clone)]
59pub struct MoveProjectDescriptor {
60    pub platform: MovePlatform,
61    pub commit_hash: String,
62    pub name: String,
63    pub root_dir: PathBuf,
64    snapshot_sort_key: String,
65}
66
67impl MoveProjectDescriptor {
68    pub fn snapshot_sort_key(&self) -> &str {
69        &self.snapshot_sort_key
70    }
71}
72
73/// Parsed contents of a `moves/_vun_*/<id>.json` file: one finding.
74/// Aligned with the upstream JSON schema; unknown fields are
75/// permitted via serde's default behaviour for missing keys (see
76/// each `#[serde(default)]`).
77#[derive(Debug, Clone, Deserialize)]
78pub struct MoveVulnerabilityFinding {
79    pub id: u32,
80    pub commit: String,
81    #[serde(default)]
82    pub project_id: Option<u32>,
83    #[serde(default)]
84    pub number: Option<String>,
85    #[serde(default)]
86    pub title: String,
87    #[serde(rename = "type", default)]
88    pub finding_type: Option<String>,
89    #[serde(default)]
90    pub severity: Option<String>,
91    #[serde(default)]
92    pub confidence: Option<String>,
93    #[serde(default)]
94    pub status: Option<String>,
95    #[serde(default)]
96    pub description: String,
97    #[serde(default)]
98    pub suggestion: Option<String>,
99    #[serde(default)]
100    pub resolution: Option<String>,
101    #[serde(default)]
102    pub notes: Option<String>,
103    #[serde(default)]
104    pub files: Vec<MoveVulnerabilitySnippetFile>,
105}
106
107#[derive(Debug, Clone, Deserialize)]
108pub struct MoveVulnerabilitySnippetFile {
109    pub filename: String,
110    #[serde(default)]
111    pub commit: String,
112    #[serde(default)]
113    pub snippets: Vec<String>,
114}
115
116/// One Move project's worth of vulnerability findings, keyed by
117/// commit hash so it can be paired with a snapshot.
118#[derive(Debug, Clone)]
119pub struct MoveVulnerabilitySnippet {
120    pub commit: String,
121    pub findings: Vec<MoveVulnerabilityFinding>,
122}
123
124impl MoveVulnerabilitySnippet {
125    pub fn new(commit: impl Into<String>, mut findings: Vec<MoveVulnerabilityFinding>) -> Self {
126        findings.sort_by_key(|finding| finding.id);
127        Self {
128            commit: commit.into(),
129            findings,
130        }
131    }
132
133    /// Render the snippet as markdown for prompt construction.
134    pub fn render(&self) -> String {
135        if self.findings.is_empty() {
136            return String::new();
137        }
138        let mut out = String::from("## Move Audit Finding Material\n\n");
139        for finding in &self.findings {
140            finding.render_into(&mut out);
141        }
142        out
143    }
144}
145
146impl MoveVulnerabilityFinding {
147    fn render_into(&self, out: &mut String) {
148        out.push_str(&format!("### Finding {}: {}\n\n", self.id, self.title));
149        out.push_str(&format!("- Commit: {}\n", self.commit));
150        if let Some(number) = self
151            .number
152            .as_deref()
153            .filter(|value| !value.trim().is_empty())
154        {
155            out.push_str(&format!("- Finding Number: {}\n", number));
156        }
157        if let Some(kind) = self
158            .finding_type
159            .as_deref()
160            .filter(|value| !value.trim().is_empty())
161        {
162            out.push_str(&format!("- Type: {}\n", kind));
163        }
164        if let Some(severity) = self
165            .severity
166            .as_deref()
167            .filter(|value| !value.trim().is_empty())
168        {
169            out.push_str(&format!("- Original Severity: {}\n", severity));
170        }
171        if let Some(confidence) = self
172            .confidence
173            .as_deref()
174            .filter(|value| !value.trim().is_empty())
175        {
176            out.push_str(&format!("- Confidence: {}\n", confidence));
177        }
178        if let Some(status) = self
179            .status
180            .as_deref()
181            .filter(|value| !value.trim().is_empty())
182        {
183            out.push_str(&format!("- Status: {}\n", status));
184        }
185        out.push('\n');
186
187        append_section(out, "Description", &self.description);
188        append_optional_section(out, "Suggestion", self.suggestion.as_deref());
189        append_optional_section(out, "Resolution", self.resolution.as_deref());
190        append_optional_section(out, "Notes", self.notes.as_deref());
191
192        if !self.files.is_empty() {
193            out.push_str("#### Referenced Files\n\n");
194            for file in &self.files {
195                out.push_str(&format!("##### {}\n\n", file.filename));
196                if !file.commit.trim().is_empty() && file.commit != self.commit {
197                    out.push_str(&format!("- File Commit: {}\n\n", file.commit));
198                }
199                if !file.snippets.is_empty() {
200                    out.push_str("```move\n");
201                    out.push_str(&file.snippets.join("\n"));
202                    out.push_str("\n```\n\n");
203                }
204            }
205        }
206    }
207}
208
209// ---------------------------------------------------------------------------
210// Discovery helpers (filesystem-only — no LLM).
211// ---------------------------------------------------------------------------
212
213/// Enumerate Move project snapshots under `moves/`, deduplicating by
214/// commit hash so each project is represented by its most recent
215/// snapshot.
216pub fn list_move_projects(
217    moves_dir: &Path,
218    platforms: &[MovePlatform],
219) -> Result<Vec<MoveProjectDescriptor>> {
220    let mut by_commit = HashMap::<String, MoveProjectDescriptor>::new();
221
222    for platform in requested_platforms(platforms) {
223        let codebase_root = moves_dir.join(platform.codebase_dir());
224        if !codebase_root.exists() {
225            tracing::warn!(
226                "Move codebase directory not found: {}",
227                codebase_root.display()
228            );
229            continue;
230        }
231
232        for entry in std::fs::read_dir(&codebase_root)
233            .wrap_err_with(|| format!("failed to read move codebase {}", codebase_root.display()))?
234        {
235            let entry = entry?;
236            if !entry.file_type()?.is_dir() {
237                continue;
238            }
239
240            let snapshot_path = entry.path();
241            let snapshot_name = entry.file_name().to_string_lossy().to_string();
242            let Some((commit_hash, root_dir)) = discover_move_snapshot(&snapshot_path)? else {
243                tracing::warn!(
244                    "Skipping move snapshot without a commit dir: {}",
245                    snapshot_path.display()
246                );
247                continue;
248            };
249
250            let name = load_move_package_name(&root_dir).unwrap_or_else(|| commit_hash.clone());
251            let descriptor = MoveProjectDescriptor {
252                platform,
253                commit_hash: commit_hash.clone(),
254                name,
255                root_dir,
256                snapshot_sort_key: snapshot_sort_key(&snapshot_name),
257            };
258
259            match by_commit.get_mut(&commit_hash) {
260                Some(existing) if descriptor.snapshot_sort_key > existing.snapshot_sort_key => {
261                    *existing = descriptor;
262                }
263                None => {
264                    by_commit.insert(commit_hash, descriptor);
265                }
266                Some(_) => {}
267            }
268        }
269    }
270
271    let mut projects: Vec<_> = by_commit.into_values().collect();
272    projects.sort_by(|a, b| {
273        b.snapshot_sort_key
274            .cmp(&a.snapshot_sort_key)
275            .then_with(|| a.commit_hash.cmp(&b.commit_hash))
276    });
277    Ok(projects)
278}
279
280/// Load every Move vulnerability JSON found under `moves/_vun_*/` and
281/// group them by commit hash. Returns a `commit -> snippet` map ready
282/// to be paired with snapshots from [`list_move_projects`].
283pub fn load_move_audit_reports(
284    moves_dir: &Path,
285    platforms: &[MovePlatform],
286) -> Result<HashMap<String, MoveVulnerabilitySnippet>> {
287    let mut vulnerabilities_by_commit: HashMap<String, Vec<MoveVulnerabilityFinding>> =
288        HashMap::new();
289
290    for platform in requested_platforms(platforms) {
291        let vuln_root = moves_dir.join(platform.vulnerability_dir());
292        if !vuln_root.exists() {
293            tracing::warn!(
294                "Move vulnerability directory not found: {}",
295                vuln_root.display()
296            );
297            continue;
298        }
299
300        for entry in std::fs::read_dir(&vuln_root)
301            .wrap_err_with(|| format!("failed to read {}", vuln_root.display()))?
302        {
303            let entry = entry?;
304            let path = entry.path();
305            if !entry.file_type()?.is_file()
306                || path.extension().and_then(|ext| ext.to_str()) != Some("json")
307            {
308                continue;
309            }
310
311            let text = std::fs::read_to_string(&path)
312                .wrap_err_with(|| format!("failed to read {}", path.display()))?;
313            let snippet: MoveVulnerabilityFinding =
314                serde_json::from_str(&text).wrap_err_with(|| {
315                    format!("failed to parse move vulnerability JSON {}", path.display())
316                })?;
317            vulnerabilities_by_commit
318                .entry(snippet.commit.clone())
319                .or_default()
320                .push(snippet);
321        }
322    }
323
324    let mut reports = HashMap::new();
325    for (commit_hash, snippets) in vulnerabilities_by_commit {
326        if !snippets.is_empty() {
327            reports.insert(
328                commit_hash.clone(),
329                MoveVulnerabilitySnippet::new(commit_hash, snippets),
330            );
331        }
332    }
333    Ok(reports)
334}
335
336/// Enumerate Code4rena contest IDs under `<dataset>/contracts/`.
337/// Useful for batch-processing — the audit and report files for each
338/// contest are loaded via [`crate::C4PairedProjectData::from_dataset_dir`].
339pub fn list_contest_ids(dataset_dir: &Path) -> Result<Vec<u32>> {
340    let contracts_dir = dataset_dir.join("contracts");
341    let mut ids = Vec::new();
342    for entry in std::fs::read_dir(&contracts_dir)
343        .wrap_err_with(|| format!("failed to read {}", contracts_dir.display()))?
344    {
345        let entry = entry?;
346        if entry.file_type()?.is_dir()
347            && let Some(name) = entry.file_name().to_str()
348            && let Ok(id) = name.parse::<u32>()
349        {
350            ids.push(id);
351        }
352    }
353    ids.sort();
354    Ok(ids)
355}
356
357// ---------------------------------------------------------------------------
358// Internals
359// ---------------------------------------------------------------------------
360
361fn requested_platforms(platforms: &[MovePlatform]) -> Vec<MovePlatform> {
362    if platforms.is_empty() {
363        vec![MovePlatform::Aptos, MovePlatform::Sui]
364    } else {
365        let mut out = Vec::new();
366        for platform in platforms {
367            if !out.contains(platform) {
368                out.push(*platform);
369            }
370        }
371        out
372    }
373}
374
375fn discover_move_snapshot(snapshot_dir: &Path) -> Result<Option<(String, PathBuf)>> {
376    let mut commit_dirs = Vec::new();
377    for entry in std::fs::read_dir(snapshot_dir)? {
378        let entry = entry?;
379        if !entry.file_type()?.is_dir() {
380            continue;
381        }
382        let name = entry.file_name().to_string_lossy().to_string();
383        if is_hex_commit_hash(&name) {
384            commit_dirs.push((name, entry.path()));
385        }
386    }
387    commit_dirs.sort_by(|a, b| a.0.cmp(&b.0));
388    Ok(commit_dirs.into_iter().next())
389}
390
391fn is_hex_commit_hash(text: &str) -> bool {
392    text.len() == 40 && text.chars().all(|ch| ch.is_ascii_hexdigit())
393}
394
395fn snapshot_sort_key(snapshot_name: &str) -> String {
396    let mut parts = snapshot_name.rsplitn(3, '_');
397    let time = parts.next().unwrap_or_default();
398    let date = parts.next().unwrap_or_default();
399    if date.len() == 8
400        && time.len() == 6
401        && date.chars().all(|ch| ch.is_ascii_digit())
402        && time.chars().all(|ch| ch.is_ascii_digit())
403    {
404        format!("{date}_{time}")
405    } else {
406        snapshot_name.to_string()
407    }
408}
409
410fn load_move_package_name(root_dir: &Path) -> Option<String> {
411    let contents = std::fs::read_to_string(root_dir.join("Move.toml")).ok()?;
412    parse_move_package_name(&contents)
413}
414
415fn parse_move_package_name(contents: &str) -> Option<String> {
416    let mut in_package_section = false;
417    for raw_line in contents.lines() {
418        let line = raw_line.split('#').next().unwrap_or_default().trim();
419        if line.is_empty() {
420            continue;
421        }
422        if line.starts_with('[') && line.ends_with(']') {
423            in_package_section = line == "[package]";
424            continue;
425        }
426        if in_package_section {
427            let Some(rest) = line.strip_prefix("name") else {
428                continue;
429            };
430            let Some(value) = rest.trim_start().strip_prefix('=') else {
431                continue;
432            };
433            let value = value.trim().trim_matches('"');
434            if !value.is_empty() {
435                return Some(value.to_string());
436            }
437        }
438    }
439    None
440}
441
442fn append_section(out: &mut String, heading: &str, value: &str) {
443    let value = value.trim();
444    if value.is_empty() {
445        return;
446    }
447    out.push_str(&format!("#### {heading}\n\n{value}\n\n"));
448}
449
450fn append_optional_section(out: &mut String, heading: &str, value: Option<&str>) {
451    if let Some(value) = value {
452        append_section(out, heading, value);
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459
460    #[test]
461    fn parse_move_package_name_finds_name() {
462        let toml = r#"
463[package]
464name = "foo_bar"
465version = "0.1"
466"#;
467        assert_eq!(parse_move_package_name(toml).as_deref(), Some("foo_bar"));
468    }
469
470    #[test]
471    fn parse_move_package_name_other_section_ignored() {
472        let toml = r#"
473[addresses]
474name = "ignored"
475"#;
476        assert_eq!(parse_move_package_name(toml), None);
477    }
478
479    #[test]
480    fn is_hex_commit_recognises_40_hex_chars() {
481        assert!(is_hex_commit_hash(
482            "0123456789abcdef0123456789abcdef01234567"
483        ));
484        assert!(!is_hex_commit_hash("short"));
485        assert!(!is_hex_commit_hash(
486            "g123456789abcdef0123456789abcdef01234567"
487        ));
488    }
489}