Skip to main content

chaincodec_registry/
csdl.rs

1//! CSDL (ChainCodec Schema Definition Language) parser.
2//!
3//! CSDL is a YAML-based DSL for defining blockchain event schemas.
4//! This parser converts raw YAML text into `chaincodec_core::Schema`.
5//!
6//! A single `.csdl` file may contain multiple schema documents separated
7//! by `---`. Use `parse_all()` to get every schema, or `parse()` to get
8//! only the first one.
9
10use chaincodec_core::{
11    error::RegistryError,
12    event::EventFingerprint,
13    schema::{FieldDef, Schema, SchemaMeta, TrustLevel},
14    types::CanonicalType,
15};
16use indexmap::IndexMap;
17use serde::Deserialize;
18
19// ─── Raw CSDL serde types ─────────────────────────────────────────────────────
20
21#[derive(Debug, Deserialize)]
22struct CsdlRaw {
23    schema: String,
24    version: u32,
25    #[serde(default)]
26    description: Option<String>,
27    chains: Vec<String>,
28    #[serde(default)]
29    address: Option<serde_yaml::Value>,
30    event: String,
31    #[serde(default)]
32    fingerprint: Option<String>,
33    #[serde(default)]
34    supersedes: Option<String>,
35    #[serde(default)]
36    superseded_by: Option<String>,
37    #[serde(default)]
38    deprecated: bool,
39    // IndexMap preserves YAML insertion order — critical for ABI decode field ordering.
40    fields: IndexMap<String, CsdlFieldRaw>,
41    #[serde(default)]
42    meta: CsdlMetaRaw,
43}
44
45#[derive(Debug, Deserialize, Default)]
46struct CsdlMetaRaw {
47    #[serde(default)]
48    protocol: Option<String>,
49    #[serde(default)]
50    category: Option<String>,
51    #[serde(default)]
52    verified: bool,
53    #[serde(default)]
54    trust_level: Option<String>,
55    #[serde(default)]
56    provenance_sig: Option<String>,
57}
58
59#[derive(Debug, Deserialize)]
60struct CsdlFieldRaw {
61    #[serde(rename = "type")]
62    ty: String,
63    #[serde(default)]
64    indexed: bool,
65    #[serde(default)]
66    nullable: bool,
67    #[serde(default)]
68    description: Option<String>,
69}
70
71// ─── Parser ───────────────────────────────────────────────────────────────────
72
73pub struct CsdlParser;
74
75impl CsdlParser {
76    /// Parse the first schema document from a CSDL YAML string.
77    /// For files with multiple schemas (`---` separator), use `parse_all()`.
78    pub fn parse(yaml: &str) -> Result<Schema, RegistryError> {
79        let mut schemas = Self::parse_all(yaml)?;
80        if schemas.is_empty() {
81            return Err(RegistryError::ParseError("empty CSDL file".into()));
82        }
83        Ok(schemas.remove(0))
84    }
85
86    /// Parse all schema documents from a CSDL YAML string.
87    ///
88    /// `.csdl` files may contain multiple schemas separated by `---`.
89    /// Each document is parsed independently and returned in file order.
90    pub fn parse_all(yaml: &str) -> Result<Vec<Schema>, RegistryError> {
91        use serde::de::Deserialize as _;
92
93        let mut schemas = Vec::new();
94        for doc in serde_yaml::Deserializer::from_str(yaml) {
95            let value = serde_yaml::Value::deserialize(doc)
96                .map_err(|e| RegistryError::ParseError(e.to_string()))?;
97            // Skip null/empty documents (e.g. trailing `---`)
98            if value.is_null() {
99                continue;
100            }
101            schemas.push(Self::parse_value(value)?);
102        }
103        Ok(schemas)
104    }
105
106    /// Parse a single schema from a `serde_yaml::Value` (one YAML document).
107    fn parse_value(value: serde_yaml::Value) -> Result<Schema, RegistryError> {
108        // CSDL documents look like:
109        //   schema UniswapV3Swap:
110        //     version: 2
111        //     ...
112        // The top-level key is "schema <Name>".
113        let mapping = match &value {
114            serde_yaml::Value::Mapping(m) => m,
115            _ => {
116                return Err(RegistryError::ParseError(
117                    "CSDL document must be a YAML mapping".into(),
118                ))
119            }
120        };
121
122        let (schema_key, schema_body) = mapping
123            .iter()
124            .find(|(k, _)| {
125                k.as_str()
126                    .map(|s| s.starts_with("schema "))
127                    .unwrap_or(false)
128            })
129            .ok_or_else(|| RegistryError::ParseError("missing 'schema <Name>' key".into()))?;
130
131        let schema_name = schema_key
132            .as_str()
133            .unwrap()
134            .strip_prefix("schema ")
135            .unwrap()
136            .trim()
137            .to_string();
138
139        // Re-inject the `schema` field so CsdlRaw can deserialize it
140        let body: CsdlRaw = {
141            let mut m = serde_yaml::Mapping::new();
142            if let serde_yaml::Value::Mapping(map) = schema_body.clone() {
143                for (k, v) in map {
144                    m.insert(k, v);
145                }
146            }
147            m.insert(
148                serde_yaml::Value::String("schema".into()),
149                serde_yaml::Value::String(schema_name.clone()),
150            );
151            serde_yaml::from_value(serde_yaml::Value::Mapping(m))
152                .map_err(|e| RegistryError::ParseError(e.to_string()))?
153        };
154
155        // Parse fields — IndexMap preserves YAML insertion order
156        let mut fields: Vec<(String, FieldDef)> = Vec::with_capacity(body.fields.len());
157        for (name, raw_field) in &body.fields {
158            let ty = parse_type(&raw_field.ty).map_err(|e| {
159                RegistryError::ParseError(format!("field '{}': {}", name, e))
160            })?;
161            fields.push((
162                name.clone(),
163                FieldDef {
164                    ty,
165                    indexed: raw_field.indexed,
166                    nullable: raw_field.nullable,
167                    description: raw_field.description.clone(),
168                },
169            ));
170        }
171
172        // Parse addresses
173        let address = match &body.address {
174            None => None,
175            Some(serde_yaml::Value::String(s)) => Some(vec![s.clone()]),
176            Some(serde_yaml::Value::Sequence(seq)) => Some(
177                seq.iter()
178                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
179                    .collect(),
180            ),
181            Some(serde_yaml::Value::Null) | Some(_) => None,
182        };
183
184        // Parse meta
185        let trust_level = match body.meta.trust_level.as_deref() {
186            Some("community_verified") => TrustLevel::CommunityVerified,
187            Some("maintainer_verified") => TrustLevel::MaintainerVerified,
188            Some("protocol_verified") => TrustLevel::ProtocolVerified,
189            _ => TrustLevel::Unverified,
190        };
191        let meta = SchemaMeta {
192            protocol: body.meta.protocol,
193            category: body.meta.category,
194            verified: body.meta.verified,
195            trust_level,
196            provenance_sig: body.meta.provenance_sig,
197        };
198
199        let fingerprint = body
200            .fingerprint
201            .map(EventFingerprint::new)
202            .unwrap_or_else(|| EventFingerprint::new("0x".to_string()));
203
204        Ok(Schema {
205            name: schema_name,
206            version: body.version,
207            chains: body.chains,
208            address,
209            event: body.event,
210            fingerprint,
211            supersedes: body.supersedes,
212            superseded_by: body.superseded_by,
213            deprecated: body.deprecated,
214            fields,
215            meta,
216        })
217    }
218}
219
220/// Parse a ChainCodec canonical type string into a `CanonicalType`.
221fn parse_type(s: &str) -> Result<CanonicalType, String> {
222    let s = s.trim();
223    match s {
224        "bool" => Ok(CanonicalType::Bool),
225        "address" => Ok(CanonicalType::Address),
226        "pubkey" => Ok(CanonicalType::Pubkey),
227        "bech32" => Ok(CanonicalType::Bech32Address),
228        "bytes" => Ok(CanonicalType::BytesVec),
229        "string" => Ok(CanonicalType::Str),
230        "hash256" => Ok(CanonicalType::Hash256),
231        "timestamp" => Ok(CanonicalType::Timestamp),
232        _ if s.starts_with("uint") && s[4..].parse::<u16>().is_ok() => {
233            Ok(CanonicalType::Uint(s[4..].parse().unwrap()))
234        }
235        _ if s.starts_with("int") && s[3..].parse::<u16>().is_ok() => {
236            Ok(CanonicalType::Int(s[3..].parse().unwrap()))
237        }
238        _ if s.starts_with("bytes") && s[5..].parse::<u8>().is_ok() => {
239            Ok(CanonicalType::Bytes(s[5..].parse().unwrap()))
240        }
241        _ if s.ends_with("[]") => {
242            let inner = parse_type(&s[..s.len() - 2])?;
243            Ok(CanonicalType::Vec(Box::new(inner)))
244        }
245        _ => Err(format!("unknown type: '{s}'")),
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    const SAMPLE_CSDL: &str = r#"
254schema ERC20Transfer:
255  version: 1
256  chains: [ethereum, arbitrum, polygon, base]
257  address: null
258  event: Transfer
259  fingerprint: "0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef"
260  fields:
261    from:   { type: address, indexed: true  }
262    to:     { type: address, indexed: true  }
263    value:  { type: uint256, indexed: false }
264  meta:
265    protocol: erc20
266    category: token
267    verified: true
268    trust_level: maintainer_verified
269"#;
270
271    const MULTI_DOC_CSDL: &str = r#"
272schema ERC20Transfer:
273  version: 1
274  chains: [ethereum]
275  event: Transfer
276  fingerprint: "0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef"
277  fields:
278    from:  { type: address, indexed: true  }
279    to:    { type: address, indexed: true  }
280    value: { type: uint256, indexed: false }
281  meta: {}
282---
283schema ERC20Approval:
284  version: 1
285  chains: [ethereum]
286  event: Approval
287  fingerprint: "0x8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b925"
288  fields:
289    owner:   { type: address, indexed: true  }
290    spender: { type: address, indexed: true  }
291    value:   { type: uint256, indexed: false }
292  meta: {}
293"#;
294
295    #[test]
296    fn parse_erc20_transfer() {
297        let schema = CsdlParser::parse(SAMPLE_CSDL).unwrap();
298        assert_eq!(schema.name, "ERC20Transfer");
299        assert_eq!(schema.version, 1);
300        assert_eq!(schema.fields.len(), 3);
301        assert_eq!(schema.event, "Transfer");
302        assert_eq!(schema.meta.trust_level, TrustLevel::MaintainerVerified);
303    }
304
305    #[test]
306    fn field_order_preserved() {
307        let schema = CsdlParser::parse(SAMPLE_CSDL).unwrap();
308        // Fields must be in YAML declaration order: from, to, value
309        assert_eq!(schema.fields[0].0, "from");
310        assert_eq!(schema.fields[1].0, "to");
311        assert_eq!(schema.fields[2].0, "value");
312    }
313
314    #[test]
315    fn parse_multi_doc_csdl() {
316        let schemas = CsdlParser::parse_all(MULTI_DOC_CSDL).unwrap();
317        assert_eq!(schemas.len(), 2);
318        assert_eq!(schemas[0].name, "ERC20Transfer");
319        assert_eq!(schemas[1].name, "ERC20Approval");
320    }
321
322    #[test]
323    fn parse_type_uint256() {
324        let t = parse_type("uint256").unwrap();
325        assert!(matches!(t, CanonicalType::Uint(256)));
326    }
327
328    #[test]
329    fn parse_type_int24() {
330        let t = parse_type("int24").unwrap();
331        assert!(matches!(t, CanonicalType::Int(24)));
332    }
333
334    #[test]
335    fn parse_type_uint160() {
336        let t = parse_type("uint160").unwrap();
337        assert!(matches!(t, CanonicalType::Uint(160)));
338    }
339
340    #[test]
341    fn parse_type_address_array() {
342        let t = parse_type("address[]").unwrap();
343        assert!(matches!(t, CanonicalType::Vec(_)));
344    }
345
346    #[test]
347    fn parse_type_bytes32() {
348        let t = parse_type("bytes32").unwrap();
349        assert!(matches!(t, CanonicalType::Bytes(32)));
350    }
351}