knowdit_project/c4.rs
1//! Code4rena-paired project view.
2//!
3//! Bundles a [`ProjectData`] with an optional audit-report payload.
4//! The same shape is reused for all project sources — C4 contests
5//! (where `meta` and `audit` are both populated), Move snapshots
6//! (where `audit` may be a [`AuditReportMaterial::MoveVulnerabilitySnippet`]
7//! and `meta` is `None`), and freshly-cloned audit checkouts
8//! (where both are `None`). Keeping a single struct here lets the
9//! rest of the pipeline take "project + optional audit" without
10//! having to discriminate on dataset origin.
11
12use std::path::Path;
13
14use color_eyre::eyre::{Result, WrapErr, eyre};
15use serde::Deserialize;
16
17use crate::{
18 data::{ProjectData, SourceLanguage},
19 moves::MoveVulnerabilitySnippet,
20 scope::ProjectScope,
21};
22
23/// `audits/<id>.json` payload for a Code4rena contest. Only the
24/// fields downstream callers currently inspect are listed; serde will
25/// ignore unknown fields so we don't break when the source schema
26/// grows.
27#[derive(Debug, Clone, Deserialize)]
28pub struct AuditMeta {
29 #[serde(rename = "contestId")]
30 pub contest_id: u32,
31 pub title: String,
32 pub slug: Option<String>,
33 #[serde(rename = "startTime")]
34 pub start_time: Option<String>,
35 #[serde(rename = "endTime")]
36 pub end_time: Option<String>,
37 pub details: Option<String>,
38}
39
40/// Renderable audit-report payload. Plain text for C4; structured
41/// Move vulnerability snippets for the Move dataset. Both render to
42/// markdown for prompt inclusion.
43#[derive(Debug, Clone)]
44pub enum AuditReportMaterial {
45 Text(String),
46 MoveVulnerabilitySnippet(MoveVulnerabilitySnippet),
47}
48
49impl AuditReportMaterial {
50 /// Render the payload as markdown for prompt construction.
51 pub fn render(&self) -> String {
52 match self {
53 Self::Text(s) => s.clone(),
54 Self::MoveVulnerabilitySnippet(snippet) => snippet.render(),
55 }
56 }
57
58 /// Load a markdown audit report from `report_path` and wrap it as
59 /// [`AuditReportMaterial::Text`]. Returns `None` when the file is
60 /// missing, empty after trimming, or unreadable — those cases are
61 /// logged as warnings but never an error, mirroring the
62 /// "report-is-optional" assumption the C4 ingest tools make.
63 pub fn from_optional_text_file(report_path: &Path) -> Option<Self> {
64 if !report_path.is_file() {
65 tracing::warn!("audit report not found: {}", report_path.display());
66 return None;
67 }
68 match std::fs::read_to_string(report_path) {
69 Ok(text) if !text.trim().is_empty() => Some(Self::Text(text)),
70 Ok(_) => {
71 tracing::warn!("audit report is empty: {}", report_path.display());
72 None
73 }
74 Err(err) => {
75 tracing::warn!(
76 "failed to read audit report {}: {err}",
77 report_path.display()
78 );
79 None
80 }
81 }
82 }
83}
84
85/// A project bundle with an optional audit-side payload.
86///
87/// * For Code4rena contests, both `meta` and `audit` are populated by
88/// [`Self::from_dataset_dir`].
89/// * For Move snapshots paired with a vulnerability JSON,
90/// [`Self::from_move_pair`] sets `audit` to the snippet variant
91/// and leaves `meta` as `None`.
92/// * For a freshly-cloned audit checkout with no contest metadata,
93/// wrap a [`ProjectData`] via [`Self::audit_only`] or
94/// [`Self::bare`].
95///
96/// The struct intentionally keeps both `meta` and `audit` as
97/// `Option`s so the legacy `knowdit_kg::project_loader::ProjectData`
98/// can wrap one of these directly regardless of dataset origin.
99#[derive(Debug, Clone)]
100pub struct C4PairedProjectData {
101 pub project: ProjectData,
102 pub meta: Option<AuditMeta>,
103 pub audit: Option<AuditReportMaterial>,
104}
105
106impl C4PairedProjectData {
107 /// Wrap a bare [`ProjectData`] with no audit material. Used by
108 /// adapters that don't have any audit context to attach (e.g.
109 /// when loading from a generic `from_dir`).
110 pub fn bare(project: ProjectData) -> Self {
111 Self {
112 project,
113 meta: None,
114 audit: None,
115 }
116 }
117
118 /// Wrap a [`ProjectData`] alongside an audit-report payload that
119 /// the caller already has in hand. Useful for Move pairings
120 /// where the snippet is built from a separately-loaded JSON map.
121 pub fn audit_only(project: ProjectData, audit: AuditReportMaterial) -> Self {
122 Self {
123 project,
124 meta: None,
125 audit: Some(audit),
126 }
127 }
128
129 /// Pair a Move-language [`ProjectData`] with its vulnerability
130 /// snippet (typically pulled out of
131 /// [`crate::load_move_audit_reports`] keyed on commit hash).
132 pub fn from_move_pair(project: ProjectData, snippet: MoveVulnerabilitySnippet) -> Self {
133 Self::audit_only(
134 project,
135 AuditReportMaterial::MoveVulnerabilitySnippet(snippet),
136 )
137 }
138
139 /// Load a `(project, audit_meta, audit_report)` triple from the
140 /// `out_train/` (or `out_git/`) layout that the C4 ingest scripts
141 /// produce:
142 ///
143 /// * `audits/<id>.json` — contest metadata. Required.
144 /// * `contracts/<id>/` — Solidity sources. Required.
145 /// * `reports/<id>.md` — markdown audit report. Optional; logged
146 /// when missing but not an error.
147 pub async fn from_dataset_dir(dataset_dir: &Path, contest_id: u32) -> Result<Self> {
148 let audit_path = dataset_dir
149 .join("audits")
150 .join(format!("{contest_id}.json"));
151 if !audit_path.is_file() {
152 return Err(eyre!("audit metadata not found: {}", audit_path.display()));
153 }
154 let audit_text = std::fs::read_to_string(&audit_path)
155 .wrap_err_with(|| format!("failed to read {}", audit_path.display()))?;
156 let meta: AuditMeta = serde_json::from_str(&audit_text)
157 .wrap_err_with(|| format!("failed to parse audit JSON at {}", audit_path.display()))?;
158
159 let contracts_dir = dataset_dir.join("contracts").join(contest_id.to_string());
160 if !contracts_dir.is_dir() {
161 return Err(eyre!(
162 "contracts directory not found: {}",
163 contracts_dir.display()
164 ));
165 }
166
167 let report = AuditReportMaterial::from_optional_text_file(
168 &dataset_dir.join("reports").join(format!("{contest_id}.md")),
169 );
170
171 let scope = ProjectScope::build_from_patterns(
172 contracts_dir,
173 &["**/*.sol"],
174 &crate::data::DEFAULT_VENDORED_EXCLUDES,
175 )?;
176 let content = scope.read().await?;
177 let project = ProjectData::new(
178 meta.title.clone(),
179 Some(format!("c4-{contest_id}")),
180 SourceLanguage::Solidity,
181 content,
182 );
183 Ok(Self {
184 project,
185 meta: Some(meta),
186 audit: report,
187 })
188 }
189
190 /// Lower-level constructor for callers that have already
191 /// constructed a [`ProjectScope`] (e.g. with scope.txt filters).
192 /// `report_path` is read if present; pass `None` to skip.
193 pub async fn from_scoped(
194 meta: AuditMeta,
195 contest_id: u32,
196 scope: ProjectScope,
197 report_path: Option<&Path>,
198 ) -> Result<Self> {
199 let content = scope.read().await?;
200 let project = ProjectData::new(
201 meta.title.clone(),
202 Some(format!("c4-{contest_id}")),
203 SourceLanguage::Solidity,
204 content,
205 );
206 let report = report_path.and_then(AuditReportMaterial::from_optional_text_file);
207 Ok(Self {
208 project,
209 meta: Some(meta),
210 audit: report,
211 })
212 }
213}