rain_metadata/cli/
build.rs

1use clap::Parser;
2use anyhow::anyhow;
3use itertools::izip;
4use std::path::PathBuf;
5use crate::cli::output::SupportedOutputEncoding;
6use crate::meta::{
7    RainMetaDocumentV1Item, KnownMeta, ContentType, ContentEncoding, ContentLanguage,
8    magic::KnownMagic,
9};
10
11/// command for building rain meta
12#[derive(Parser)]
13pub struct Build {
14    /// Output path. If not specified, the output is written to stdout.
15    #[arg(short, long)]
16    output_path: Option<PathBuf>,
17    /// Output encoding. If not specified, the output is written in binary format.
18    #[arg(short = 'E', long, default_value = "binary")]
19    output_encoding: SupportedOutputEncoding,
20    /// Global magic number. If not specified, the default magic number is used.
21    /// The default magic number is rain-meta-document-v1. Don't change this
22    /// unless you know what you are doing.
23    #[arg(short = 'M', long, default_value = "rain-meta-document-v1")]
24    global_magic: KnownMagic,
25    /// Sequence of input paths. The number of input paths must match the number
26    /// of magic numbers, content types, content encodings and content languages.
27    /// Reading from stdin is not supported but proccess substitution can be used.
28    #[arg(short, long, num_args = 1..)]
29    input_path: Vec<PathBuf>,
30    /// Sequence of magic numbers. The number of magic numbers must match the
31    /// number of input paths, content types, content encodings and content languages.
32    /// Magic numbers are arbitrary byte sequences used to build self-describing
33    /// payloads.
34    #[arg(short, long, num_args = 1..)]
35    magic: Vec<KnownMagic>,
36    /// Sequence of content types. The number of content types must match the
37    /// number of input paths, magic numbers, content encodings and content languages.
38    /// Content type is as per http headers.
39    #[arg(short = 't', long, num_args = 1..)]
40    content_type: Vec<ContentType>,
41    /// Sequence of content encodings. The number of content encodings must match the
42    /// number of input paths, magic numbers, content types and content languages.
43    /// Content encoding is as per http headers.
44    #[arg(short = 'e', long, num_args = 1..)]
45    content_encoding: Vec<ContentEncoding>,
46    /// Sequence of content languages. The number of content languages must match the
47    /// number of input paths, magic numbers, content types and content encodings.
48    /// Content language is as per http headers.
49    #[arg(short = 'l', long, num_args = 1..)]
50    content_language: Vec<ContentLanguage>,
51}
52
53/// Temporary housing for raw data before it is converted into a RainMetaDocumentV1Item.
54#[derive(Clone, Debug)]
55pub struct BuildItem {
56    /// Raw data. Ostensibly this is the content of a file.
57    pub data: Vec<u8>,
58    /// Magic number taken from build options.
59    pub magic: KnownMagic,
60    /// Content type taken from build options.
61    pub content_type: ContentType,
62    /// Content encoding taken from build options.
63    pub content_encoding: ContentEncoding,
64    /// Content language taken from build options.
65    pub content_language: ContentLanguage,
66}
67
68/// Moving from a BuildItem to a RainMetaDocumentV1Item requires normalization
69/// according to the magic number and encoding from the build options.
70impl TryFrom<&BuildItem> for RainMetaDocumentV1Item {
71    type Error = anyhow::Error;
72    fn try_from(item: &BuildItem) -> anyhow::Result<Self> {
73        let normalized = TryInto::<KnownMeta>::try_into(item.magic)?.normalize(&item.data)?;
74        let encoded = item.content_encoding.encode(&normalized);
75        Ok(RainMetaDocumentV1Item {
76            payload: serde_bytes::ByteBuf::from(encoded),
77            magic: item.magic,
78            content_type: item.content_type,
79            content_encoding: item.content_encoding,
80            content_language: item.content_language,
81        })
82    }
83}
84
85/// Build a rain meta document from a sequence of BuildItems.
86pub fn build_bytes(magic: KnownMagic, items: Vec<BuildItem>) -> anyhow::Result<Vec<u8>> {
87    let mut metas: Vec<RainMetaDocumentV1Item> = vec![];
88    for item in items {
89        metas.push(RainMetaDocumentV1Item::try_from(&item)?);
90    }
91    Ok(RainMetaDocumentV1Item::cbor_encode_seq(&metas, magic)?)
92}
93
94/// Build a rain meta document from command line options.
95/// Enforces length constraints on the input paths, magic numbers, content types,
96/// content encodings and content languages.
97/// Handles reading input files and writing to files/stdout according to the
98/// build options.
99pub fn build(b: Build) -> anyhow::Result<()> {
100    if b.input_path.len() != b.magic.len() {
101        return Err(anyhow!(
102            "{} inputs does not match {} magic numbers.",
103            b.input_path.len(),
104            b.magic.len()
105        ));
106    }
107
108    if b.input_path.len() != b.content_type.len() {
109        return Err(anyhow!(
110            "{} inputs does not match {} content types.",
111            b.input_path.len(),
112            b.content_type.len()
113        ));
114    }
115
116    if b.input_path.len() != b.content_encoding.len() {
117        return Err(anyhow!(
118            "{} inputs does not match {} content encodings.",
119            b.input_path.len(),
120            b.content_encoding.len()
121        ));
122    }
123
124    if b.input_path.len() != b.content_language.len() {
125        return Err(anyhow!(
126            "{} inputs does not match {} content languages.",
127            b.input_path.len(),
128            b.content_language.len()
129        ));
130    }
131    let mut items: Vec<BuildItem> = vec![];
132    for (input_path, magic, content_type, content_encoding, content_language) in izip!(
133        b.input_path.iter(),
134        b.magic.iter(),
135        b.content_type.iter(),
136        b.content_encoding.iter(),
137        b.content_language.iter()
138    ) {
139        items.push(BuildItem {
140            data: std::fs::read(input_path)?,
141            magic: *magic,
142            content_type: *content_type,
143            content_encoding: *content_encoding,
144            content_language: *content_language,
145        });
146    }
147    crate::cli::output::output(
148        &b.output_path,
149        b.output_encoding,
150        &build_bytes(b.global_magic, items)?,
151    )
152}
153
154#[cfg(test)]
155mod tests {
156    use strum::IntoEnumIterator;
157    use crate::meta::{
158        magic::{self, KnownMagic},
159        ContentType, ContentEncoding, ContentLanguage, RainMetaDocumentV1Item,
160    };
161    use super::BuildItem;
162    use super::build_bytes;
163
164    /// Test that the magic number prefix is correct for all known magic numbers
165    /// in isolation from all build items.
166    #[test]
167    fn test_build_empty() -> anyhow::Result<()> {
168        for global_magic in magic::KnownMagic::iter() {
169            let built_bytes = build_bytes(global_magic, vec![])?;
170            assert_eq!(built_bytes, global_magic.to_prefix_bytes());
171        }
172        Ok(())
173    }
174
175    /// We can build a single document item from a single build item.
176    /// Empty ABI documents are used to avoid testing the normalisation and
177    /// encoding process.
178    #[test]
179    fn test_into_meta_document() -> anyhow::Result<()> {
180        let build_item = BuildItem {
181            data: "[]".as_bytes().to_vec(),
182            magic: KnownMagic::SolidityAbiV2,
183            content_type: ContentType::Json,
184            content_encoding: ContentEncoding::None,
185            content_language: ContentLanguage::En,
186        };
187
188        let meta_document = RainMetaDocumentV1Item::try_from(&build_item)?;
189        let expected_meta_document = RainMetaDocumentV1Item {
190            payload: serde_bytes::ByteBuf::from("[]".as_bytes().to_vec()),
191            magic: KnownMagic::SolidityAbiV2,
192            content_type: ContentType::Json,
193            content_encoding: ContentEncoding::None,
194            content_language: ContentLanguage::En,
195        };
196        assert_eq!(meta_document, expected_meta_document);
197        Ok(())
198    }
199
200    /// The final CBOR bytes are as expected for a single build item. An empty
201    /// ABI is used to avoid testing the normalisation and encoding process.
202    #[test]
203    fn test_empty_item() -> anyhow::Result<()> {
204        let build_item = BuildItem {
205            data: "[]".as_bytes().to_vec(),
206            magic: KnownMagic::SolidityAbiV2,
207            content_type: ContentType::Json,
208            content_encoding: ContentEncoding::Identity,
209            content_language: ContentLanguage::En,
210        };
211
212        let bytes = super::build_bytes(KnownMagic::RainMetaDocumentV1, vec![build_item.clone()])?;
213
214        // https://github.com/rainprotocol/specs/blob/main/metadata-v1.md#example
215        // 8 byte magic number prefix
216        assert_eq!(
217            &bytes[0..8],
218            KnownMagic::RainMetaDocumentV1.to_prefix_bytes()
219        );
220        // cbor map with 5 keys
221        assert_eq!(bytes[8], 0xa5);
222        // key 0
223        assert_eq!(bytes[9], 0x00);
224        // major type 2 (bytes) length 2
225        assert_eq!(bytes[10], 0b010_00010);
226        // payload
227        assert_eq!(bytes[11..13], "[]".as_bytes()[..]);
228        // key 1
229        assert_eq!(bytes[13], 0x01);
230        // major type 0 (unsigned integer) value 27
231        assert_eq!(bytes[14], 0b000_11011);
232        // magic number
233        assert_eq!(&bytes[15..23], KnownMagic::SolidityAbiV2.to_prefix_bytes());
234        // key 2
235        assert_eq!(bytes[23], 0x02);
236        // text string application/json length 16
237        assert_eq!(bytes[24], 0b011_10000);
238        // the string application/json
239        assert_eq!(&bytes[25..41], "application/json".as_bytes());
240        // key 3
241        assert_eq!(bytes[41], 0x03);
242        // text string identity length 8
243        assert_eq!(bytes[42], 0b011_01000);
244        // the string identity
245        assert_eq!(&bytes[43..51], "identity".as_bytes());
246        // key 4
247        assert_eq!(bytes[51], 0x04);
248        // text string en length 2
249        assert_eq!(bytes[52], 0b011_00010);
250        // the string en
251        assert_eq!(&bytes[53..55], "en".as_bytes());
252
253        assert_eq!(bytes.len(), 55);
254
255        Ok(())
256    }
257
258    #[test]
259    fn test_cbor_encoding_type() -> anyhow::Result<()> {
260        let build_item = BuildItem {
261            data: "[]".as_bytes().to_vec(),
262            magic: KnownMagic::DotrainV1,
263            content_type: ContentType::Cbor,
264            content_encoding: ContentEncoding::Identity,
265            content_language: ContentLanguage::En,
266        };
267
268        let bytes = super::build_bytes(KnownMagic::RainMetaDocumentV1, vec![build_item.clone()])?;
269
270        // https://github.com/rainprotocol/specs/blob/main/metadata-v1.md#example
271        // 8 byte magic number prefix
272        assert_eq!(
273            &bytes[0..8],
274            KnownMagic::RainMetaDocumentV1.to_prefix_bytes()
275        );
276        // cbor map with 5 keys
277        assert_eq!(bytes[8], 0xa5);
278        // key 0
279        assert_eq!(bytes[9], 0x00);
280        // major type 2 (bytes) length 2
281        assert_eq!(bytes[10], 0b010_00010);
282        // payload
283        assert_eq!(bytes[11..13], "[]".as_bytes()[..]);
284        // key 1
285        assert_eq!(bytes[13], 0x01);
286        // major type 0 (unsigned integer) value 27
287        assert_eq!(bytes[14], 0b000_11011);
288        // magic number
289        assert_eq!(&bytes[15..23], KnownMagic::DotrainV1.to_prefix_bytes());
290        // key 2
291        assert_eq!(bytes[23], 0x02);
292        // text string application/cbor length 16
293        assert_eq!(bytes[24], 0b011_10000);
294        // the string application/cbor
295        assert_eq!(&bytes[25..41], "application/cbor".as_bytes());
296        // key 3
297        assert_eq!(bytes[41], 0x03);
298        // text string identity length 8
299        assert_eq!(bytes[42], 0b011_01000);
300        // the string identity
301        assert_eq!(&bytes[43..51], "identity".as_bytes());
302        // key 4
303        assert_eq!(bytes[51], 0x04);
304        // text string en length 2
305        assert_eq!(bytes[52], 0b011_00010);
306        // the string en
307        assert_eq!(&bytes[53..55], "en".as_bytes());
308
309        assert_eq!(bytes.len(), 55);
310
311        Ok(())
312    }
313}