Skip to main content

knowdit_project/
c4.rs

1//! Code4rena-paired project view.
2//!
3//! Bundles a [`ProjectData`] with an optional audit-report payload.
4//! The same shape is reused for all project sources — C4 contests
5//! (where `meta` and `audit` are both populated), Move snapshots
6//! (where `audit` may be a [`AuditReportMaterial::MoveVulnerabilitySnippet`]
7//! and `meta` is `None`), and freshly-cloned audit checkouts
8//! (where both are `None`). Keeping a single struct here lets the
9//! rest of the pipeline take "project + optional audit" without
10//! having to discriminate on dataset origin.
11
12use std::path::Path;
13
14use color_eyre::eyre::{Result, WrapErr, eyre};
15use serde::Deserialize;
16
17use crate::{
18    data::{ProjectData, SourceLanguage},
19    moves::MoveVulnerabilitySnippet,
20    scope::ProjectScope,
21};
22
23/// `audits/<id>.json` payload for a Code4rena contest. Only the
24/// fields downstream callers currently inspect are listed; serde will
25/// ignore unknown fields so we don't break when the source schema
26/// grows.
27#[derive(Debug, Clone, Deserialize)]
28pub struct AuditMeta {
29    #[serde(rename = "contestId")]
30    pub contest_id: u32,
31    pub title: String,
32    pub slug: Option<String>,
33    #[serde(rename = "startTime")]
34    pub start_time: Option<String>,
35    #[serde(rename = "endTime")]
36    pub end_time: Option<String>,
37    pub details: Option<String>,
38}
39
40/// Renderable audit-report payload. Plain text for C4; structured
41/// Move vulnerability snippets for the Move dataset. Both render to
42/// markdown for prompt inclusion.
43#[derive(Debug, Clone)]
44pub enum AuditReportMaterial {
45    Text(String),
46    MoveVulnerabilitySnippet(MoveVulnerabilitySnippet),
47}
48
49impl AuditReportMaterial {
50    /// Render the payload as markdown for prompt construction.
51    pub fn render(&self) -> String {
52        match self {
53            Self::Text(s) => s.clone(),
54            Self::MoveVulnerabilitySnippet(snippet) => snippet.render(),
55        }
56    }
57
58    /// Load a markdown audit report from `report_path` and wrap it as
59    /// [`AuditReportMaterial::Text`]. Returns `None` when the file is
60    /// missing, empty after trimming, or unreadable — those cases are
61    /// logged as warnings but never an error, mirroring the
62    /// "report-is-optional" assumption the C4 ingest tools make.
63    pub fn from_optional_text_file(report_path: &Path) -> Option<Self> {
64        if !report_path.is_file() {
65            tracing::warn!("audit report not found: {}", report_path.display());
66            return None;
67        }
68        match std::fs::read_to_string(report_path) {
69            Ok(text) if !text.trim().is_empty() => Some(Self::Text(text)),
70            Ok(_) => {
71                tracing::warn!("audit report is empty: {}", report_path.display());
72                None
73            }
74            Err(err) => {
75                tracing::warn!(
76                    "failed to read audit report {}: {err}",
77                    report_path.display()
78                );
79                None
80            }
81        }
82    }
83}
84
85/// A project bundle with an optional audit-side payload.
86///
87/// * For Code4rena contests, both `meta` and `audit` are populated by
88///   [`Self::from_dataset_dir`].
89/// * For Move snapshots paired with a vulnerability JSON,
90///   [`Self::from_move_pair`] sets `audit` to the snippet variant
91///   and leaves `meta` as `None`.
92/// * For a freshly-cloned audit checkout with no contest metadata,
93///   wrap a [`ProjectData`] via [`Self::audit_only`] or
94///   [`Self::bare`].
95///
96/// The struct intentionally keeps both `meta` and `audit` as
97/// `Option`s so the legacy `knowdit_kg::project_loader::ProjectData`
98/// can wrap one of these directly regardless of dataset origin.
99#[derive(Debug, Clone)]
100pub struct C4PairedProjectData {
101    pub project: ProjectData,
102    pub meta: Option<AuditMeta>,
103    pub audit: Option<AuditReportMaterial>,
104}
105
106impl C4PairedProjectData {
107    /// Wrap a bare [`ProjectData`] with no audit material. Used by
108    /// adapters that don't have any audit context to attach (e.g.
109    /// when loading from a generic `from_dir`).
110    pub fn bare(project: ProjectData) -> Self {
111        Self {
112            project,
113            meta: None,
114            audit: None,
115        }
116    }
117
118    /// Wrap a [`ProjectData`] alongside an audit-report payload that
119    /// the caller already has in hand. Useful for Move pairings
120    /// where the snippet is built from a separately-loaded JSON map.
121    pub fn audit_only(project: ProjectData, audit: AuditReportMaterial) -> Self {
122        Self {
123            project,
124            meta: None,
125            audit: Some(audit),
126        }
127    }
128
129    /// Pair a Move-language [`ProjectData`] with its vulnerability
130    /// snippet (typically pulled out of
131    /// [`crate::load_move_audit_reports`] keyed on commit hash).
132    pub fn from_move_pair(project: ProjectData, snippet: MoveVulnerabilitySnippet) -> Self {
133        Self::audit_only(
134            project,
135            AuditReportMaterial::MoveVulnerabilitySnippet(snippet),
136        )
137    }
138
139    /// Load a `(project, audit_meta, audit_report)` triple from the
140    /// `out_train/` (or `out_git/`) layout that the C4 ingest scripts
141    /// produce:
142    ///
143    /// * `audits/<id>.json` — contest metadata. Required.
144    /// * `contracts/<id>/` — Solidity sources. Required.
145    /// * `reports/<id>.md` — markdown audit report. Optional; logged
146    ///   when missing but not an error.
147    pub async fn from_dataset_dir(dataset_dir: &Path, contest_id: u32) -> Result<Self> {
148        let audit_path = dataset_dir
149            .join("audits")
150            .join(format!("{contest_id}.json"));
151        if !audit_path.is_file() {
152            return Err(eyre!("audit metadata not found: {}", audit_path.display()));
153        }
154        let audit_text = std::fs::read_to_string(&audit_path)
155            .wrap_err_with(|| format!("failed to read {}", audit_path.display()))?;
156        let meta: AuditMeta = serde_json::from_str(&audit_text)
157            .wrap_err_with(|| format!("failed to parse audit JSON at {}", audit_path.display()))?;
158
159        let contracts_dir = dataset_dir.join("contracts").join(contest_id.to_string());
160        if !contracts_dir.is_dir() {
161            return Err(eyre!(
162                "contracts directory not found: {}",
163                contracts_dir.display()
164            ));
165        }
166
167        let report = AuditReportMaterial::from_optional_text_file(
168            &dataset_dir.join("reports").join(format!("{contest_id}.md")),
169        );
170
171        let scope = ProjectScope::build_from_patterns(
172            contracts_dir,
173            &["**/*.sol"],
174            &crate::data::DEFAULT_VENDORED_EXCLUDES,
175        )?;
176        let content = scope.read().await?;
177        let project = ProjectData::new(
178            meta.title.clone(),
179            Some(format!("c4-{contest_id}")),
180            SourceLanguage::Solidity,
181            content,
182        );
183        Ok(Self {
184            project,
185            meta: Some(meta),
186            audit: report,
187        })
188    }
189
190    /// Lower-level constructor for callers that have already
191    /// constructed a [`ProjectScope`] (e.g. with scope.txt filters).
192    /// `report_path` is read if present; pass `None` to skip.
193    pub async fn from_scoped(
194        meta: AuditMeta,
195        contest_id: u32,
196        scope: ProjectScope,
197        report_path: Option<&Path>,
198    ) -> Result<Self> {
199        let content = scope.read().await?;
200        let project = ProjectData::new(
201            meta.title.clone(),
202            Some(format!("c4-{contest_id}")),
203            SourceLanguage::Solidity,
204            content,
205        );
206        let report = report_path.and_then(AuditReportMaterial::from_optional_text_file);
207        Ok(Self {
208            project,
209            meta: Some(meta),
210            audit: report,
211        })
212    }
213}