Skip to main content

oxihuman_core/
avro_codec.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! Apache Avro codec stub.
6
7/// An Avro primitive type.
8#[derive(Debug, Clone, PartialEq)]
9pub enum AvroType {
10    Null,
11    Boolean,
12    Int,
13    Long,
14    Float,
15    Double,
16    Bytes,
17    String,
18    Record {
19        name: String,
20        fields: Vec<AvroField>,
21    },
22    Array {
23        items: Box<AvroType>,
24    },
25    Union(Vec<AvroType>),
26}
27
28/// A record field in an Avro schema.
29#[derive(Debug, Clone, PartialEq)]
30pub struct AvroField {
31    pub name: String,
32    pub schema: AvroType,
33}
34
35/// An Avro value.
36#[derive(Debug, Clone, PartialEq)]
37pub enum AvroValue {
38    Null,
39    Boolean(bool),
40    Int(i32),
41    Long(i64),
42    Float(f32),
43    Double(f64),
44    Bytes(Vec<u8>),
45    String(String),
46    Array(Vec<AvroValue>),
47    Record(Vec<(String, AvroValue)>),
48}
49
50/// Avro codec error.
51#[derive(Debug, Clone, PartialEq)]
52pub enum AvroError {
53    SchemaMismatch,
54    UnexpectedEnd,
55    InvalidEncoding,
56}
57
58impl std::fmt::Display for AvroError {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        match self {
61            Self::SchemaMismatch => write!(f, "Avro schema mismatch"),
62            Self::UnexpectedEnd => write!(f, "unexpected end of Avro buffer"),
63            Self::InvalidEncoding => write!(f, "invalid Avro encoding"),
64        }
65    }
66}
67
68/// Encode an Avro long using zigzag + varint.
69pub fn encode_long(value: i64, buf: &mut Vec<u8>) {
70    let zz = ((value << 1) ^ (value >> 63)) as u64;
71    let mut n = zz;
72    loop {
73        let byte = (n & 0x7F) as u8;
74        n >>= 7;
75        if n == 0 {
76            buf.push(byte);
77            break;
78        }
79        buf.push(byte | 0x80);
80    }
81}
82
83/// Decode an Avro zigzag long.
84pub fn decode_long(buf: &[u8]) -> Result<(i64, usize), AvroError> {
85    let mut n: u64 = 0;
86    let mut shift = 0u32;
87    for (i, &b) in buf.iter().enumerate() {
88        n |= ((b & 0x7F) as u64) << shift;
89        if b & 0x80 == 0 {
90            let value = ((n >> 1) as i64) ^ (-((n & 1) as i64));
91            return Ok((value, i + 1));
92        }
93        shift += 7;
94        if shift >= 64 {
95            return Err(AvroError::InvalidEncoding);
96        }
97    }
98    Err(AvroError::UnexpectedEnd)
99}
100
101/// Encode an Avro bytes field (length-prefixed).
102pub fn encode_bytes(data: &[u8], buf: &mut Vec<u8>) {
103    encode_long(data.len() as i64, buf);
104    buf.extend_from_slice(data);
105}
106
107/// Return the number of fields in an Avro record value.
108pub fn record_field_count(val: &AvroValue) -> usize {
109    if let AvroValue::Record(fields) = val {
110        fields.len()
111    } else {
112        0
113    }
114}
115
116/// Return `true` if the Avro type is a union.
117pub fn is_union(t: &AvroType) -> bool {
118    matches!(t, AvroType::Union(_))
119}
120
121/// Return the name of an Avro record type, or `None`.
122pub fn type_name(t: &AvroType) -> Option<&str> {
123    if let AvroType::Record { name, .. } = t {
124        Some(name.as_str())
125    } else {
126        None
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn test_encode_zero() {
136        /* zero encodes to 0x00 */
137        let mut buf = vec![];
138        encode_long(0, &mut buf);
139        assert_eq!(buf, &[0x00]);
140    }
141
142    #[test]
143    fn test_encode_negative_one() {
144        /* -1 encodes to 0x01 in zigzag */
145        let mut buf = vec![];
146        encode_long(-1, &mut buf);
147        assert_eq!(buf, &[0x01]);
148    }
149
150    #[test]
151    fn test_decode_zero() {
152        /* decode 0x00 gives 0 */
153        let (v, n) = decode_long(&[0x00]).expect("should succeed");
154        assert_eq!(v, 0);
155        assert_eq!(n, 1);
156    }
157
158    #[test]
159    fn test_roundtrip_positive() {
160        /* positive value roundtrip */
161        let mut buf = vec![];
162        encode_long(12345, &mut buf);
163        let (v, _) = decode_long(&buf).expect("should succeed");
164        assert_eq!(v, 12345);
165    }
166
167    #[test]
168    fn test_roundtrip_negative() {
169        /* negative value roundtrip */
170        let mut buf = vec![];
171        encode_long(-999, &mut buf);
172        let (v, _) = decode_long(&buf).expect("should succeed");
173        assert_eq!(v, -999);
174    }
175
176    #[test]
177    fn test_encode_bytes() {
178        /* bytes field encodes length then data */
179        let mut buf = vec![];
180        encode_bytes(&[1, 2, 3], &mut buf);
181        assert!(buf.len() >= 4);
182    }
183
184    #[test]
185    fn test_record_field_count() {
186        /* field count for record value */
187        let v = AvroValue::Record(vec![
188            ("a".to_string(), AvroValue::Int(1)),
189            ("b".to_string(), AvroValue::Boolean(false)),
190        ]);
191        assert_eq!(record_field_count(&v), 2);
192    }
193
194    #[test]
195    fn test_is_union_true() {
196        /* union type detected */
197        let t = AvroType::Union(vec![AvroType::Null, AvroType::String]);
198        assert!(is_union(&t));
199    }
200
201    #[test]
202    fn test_type_name() {
203        /* type_name returns record name */
204        let t = AvroType::Record {
205            name: "Foo".to_string(),
206            fields: vec![],
207        };
208        assert_eq!(type_name(&t), Some("Foo"));
209    }
210
211    #[test]
212    fn test_unexpected_end() {
213        /* truncated buffer returns error */
214        assert_eq!(decode_long(&[0x80]).unwrap_err(), AvroError::UnexpectedEnd);
215    }
216}