pkgsrc_kv_derive/
lib.rs

1/*
2 * Copyright (c) 2025 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17//! Derive macro for parsing `KEY=VALUE` formats.
18//!
19//! This crate provides [`macro@Kv`] for automatically implementing parsers
20//! for structs from `KEY=VALUE` formatted input.
21//!
22//! # Field Types
23//!
24//! | Rust Type | Attribute | Behavior |
25//! |-----------|-----------|----------|
26//! | `T` | | Required single value |
27//! | `Option<T>` | | Optional single value |
28//! | `Vec<T>` | | Whitespace-separated values on single line |
29//! | `Option<Vec<T>>` | | Optional whitespace-separated values |
30//! | `Vec<T>` | `#[kv(multiline)]` | Multiple lines collected into Vec |
31//! | `Option<Vec<T>>` | `#[kv(multiline)]` | Optional multiple lines |
32//! | `HashMap<String, String>` | `#[kv(collect)]` | Collects unhandled keys |
33//!
34//! # Container Attributes
35//!
36//! - `#[kv(allow_unknown)]` - Ignore unknown keys instead of returning an error
37//!
38//! # Field Attributes
39//!
40//! - `#[kv(variable = "KEY")]` - Use custom key name instead of uppercased field name
41//! - `#[kv(multiline)]` - Collect multiple lines with the same key into a `Vec`
42//! - `#[kv(collect)]` - Collect all unhandled keys into this `HashMap<String, String>`
43//!
44//! # Duplicate Key Behavior
45//!
46//! For non-multiline fields, duplicate keys overwrite the previous value.
47//! For multiline fields, each occurrence appends to the `Vec`.
48//!
49//! # Examples
50//!
51//! ```
52//! use indoc::indoc;
53//! use pkgsrc::kv::{Error, Kv};
54//! use pkgsrc::PkgName;
55//!
56//! #[derive(Kv)]
57//! pub struct Package {
58//!     pkgname: PkgName,
59//!     #[kv(variable = "SIZE_PKG")]
60//!     size: u64,
61//!     #[kv(multiline)]
62//!     description: Vec<String>,
63//!     homepage: Option<String>,
64//! }
65//!
66//! let input = indoc! {"
67//!     PKGNAME=foo-1.0
68//!     SIZE_PKG=1234
69//!     DESCRIPTION=A package that does
70//!     DESCRIPTION=many interesting things.
71//! "};
72//! let pkg = Package::parse(input)?;
73//! assert_eq!(pkg.pkgname.pkgbase(), "foo");
74//! assert_eq!(pkg.size, 1234);
75//! assert_eq!(pkg.description, vec!["A package that does", "many interesting things."]);
76//! assert_eq!(pkg.homepage, None);
77//!
78//! // Missing required fields return an error.
79//! assert!(Package::parse("PKGNAME=bar-1.0\n").is_err());
80//! # Ok::<(), Error>(())
81//! ```
82//!
83//! Use `collect` to collect unhandled keys into a `HashMap`, for example
84//! when parsing `+BUILD_INFO` where arbitrary variables will be present:
85//!
86//! ```
87//! use indoc::indoc;
88//! use std::collections::HashMap;
89//! use pkgsrc::kv::{Error, Kv};
90//!
91//! #[derive(Kv)]
92//! pub struct BuildInfo {
93//!     build_host: Option<String>,
94//!     machine_arch: Option<String>,
95//!     #[kv(collect)]
96//!     vars: HashMap<String, String>,
97//! }
98//!
99//! let input = indoc! {"
100//!     BUILD_DATE=2025-01-15 10:30:00 +0000
101//!     BUILD_HOST=builder.example.com
102//!     MACHINE_ARCH=x86_64
103//!     PKGPATH=devel/example
104//! "};
105//! let info = BuildInfo::parse(input)?;
106//! assert_eq!(info.build_host, Some("builder.example.com".to_string()));
107//! assert_eq!(info.machine_arch, Some("x86_64".to_string()));
108//! assert_eq!(info.vars.get("PKGPATH"), Some(&"devel/example".to_string()));
109//! assert_eq!(info.vars.get("VARBASE"), None);
110//! # Ok::<(), Error>(())
111//! ```
112
113#![deny(missing_docs)]
114#![deny(unsafe_code)]
115
116use proc_macro::TokenStream;
117use proc_macro2::TokenStream as TokenStream2;
118use quote::quote;
119use syn::{
120    parse_macro_input, Attribute, Data, DeriveInput, Field, Fields,
121    GenericArgument, Ident, PathArguments, Type,
122};
123
124/// Derive macro for parsing `KEY=VALUE` formatted input.
125///
126/// Generates a `parse` method that parses the struct from a string
127/// containing `KEY=VALUE` pairs separated by newlines.
128///
129/// See the [module documentation](crate) for detailed usage.
130#[proc_macro_derive(Kv, attributes(kv))]
131pub fn derive_kv(input: TokenStream) -> TokenStream {
132    let input = parse_macro_input!(input as DeriveInput);
133
134    match generate_impl(&input) {
135        Ok(tokens) => tokens.into(),
136        Err(err) => err.to_compile_error().into(),
137    }
138}
139
140/// Main implementation generator.
141fn generate_impl(input: &DeriveInput) -> syn::Result<TokenStream2> {
142    let name = &input.ident;
143    let container_attrs = ContainerAttrs::parse(&input.attrs)?;
144
145    let fields = extract_named_fields(input)?;
146
147    let parsed_fields: Vec<ParsedField> = fields
148        .iter()
149        .map(ParsedField::from_field)
150        .collect::<syn::Result<_>>()?;
151
152    let collect_field =
153        parsed_fields.iter().find(|f| f.kind == FieldKind::Collect);
154    let regular_fields: Vec<_> = parsed_fields
155        .iter()
156        .filter(|f| f.kind != FieldKind::Collect)
157        .collect();
158
159    let field_decls = generate_field_declarations(&parsed_fields);
160    let match_arms = generate_match_arms(&regular_fields);
161    let unknown_handling =
162        generate_unknown_handling(&container_attrs, collect_field);
163    let field_extracts: Vec<_> = parsed_fields
164        .iter()
165        .map(ParsedField::extract_expr)
166        .collect();
167    let field_names: Vec<_> = parsed_fields.iter().map(|f| &f.ident).collect();
168
169    let serde_impl = generate_serde_impl(name, &parsed_fields);
170
171    Ok(quote! {
172        impl #name {
173            /// Parses from `KEY=VALUE` formatted input.
174            ///
175            /// # Errors
176            ///
177            /// Returns an error if:
178            /// - A line doesn't contain `=`
179            /// - A required field is missing
180            /// - A value fails to parse into its target type
181            /// - An unknown key is encountered (unless `allow_unknown` is set)
182            pub fn parse(input: &str) -> std::result::Result<Self, ::pkgsrc::kv::Error> {
183                use ::pkgsrc::kv::FromKv;
184
185                #(#field_decls)*
186
187                let input_start = input.as_ptr() as usize;
188
189                for line in input.lines() {
190                    if line.is_empty() {
191                        continue;
192                    }
193
194                    // Use pointer arithmetic to compute the line offset.
195                    // This correctly handles both LF and CRLF line endings.
196                    let line_offset = line.as_ptr() as usize - input_start;
197
198                    let eq_pos = match line.find('=') {
199                        Some(p) => p,
200                        None => {
201                            return Err(::pkgsrc::kv::Error::ParseLine(::pkgsrc::kv::Span {
202                                offset: line_offset,
203                                len: line.len(),
204                            }));
205                        }
206                    };
207
208                    let key = &line[..eq_pos];
209                    let value = &line[eq_pos + 1..];
210                    let value_offset = line_offset + eq_pos + 1;
211                    let value_span = ::pkgsrc::kv::Span {
212                        offset: value_offset,
213                        len: value.len(),
214                    };
215
216                    match key {
217                        #(#match_arms)*
218                        #unknown_handling
219                    }
220                }
221
222                Ok(#name {
223                    #(#field_names: #field_extracts,)*
224                })
225            }
226        }
227
228        #serde_impl
229    })
230}
231
232/// Extracts named fields from a struct, returning an error for other types.
233fn extract_named_fields(
234    input: &DeriveInput,
235) -> syn::Result<&syn::punctuated::Punctuated<Field, syn::token::Comma>> {
236    let Data::Struct(data) = &input.data else {
237        return Err(syn::Error::new_spanned(
238            input,
239            "Kv derive only supports structs",
240        ));
241    };
242    let Fields::Named(fields) = &data.fields else {
243        return Err(syn::Error::new_spanned(
244            input,
245            "Kv derive only supports structs with named fields",
246        ));
247    };
248    Ok(&fields.named)
249}
250
251/// Generates variable declarations for parsing state.
252fn generate_field_declarations(fields: &[ParsedField]) -> Vec<TokenStream2> {
253    fields
254        .iter()
255        .map(|f| {
256            let ident = &f.ident;
257            let state_ty = f.state_type();
258            if f.kind == FieldKind::Collect {
259                quote! { let mut #ident: #state_ty = std::collections::HashMap::new(); }
260            } else {
261                quote! { let mut #ident: #state_ty = None; }
262            }
263        })
264        .collect()
265}
266
267/// Generates match arms for known keys.
268fn generate_match_arms(fields: &[&ParsedField]) -> Vec<TokenStream2> {
269    fields
270        .iter()
271        .map(|f| {
272            let ident = &f.ident;
273            let key_name = &f.key_name;
274            let merge_expr = f.merge_expr();
275            quote! {
276                #key_name => {
277                    #ident = Some(#merge_expr);
278                }
279            }
280        })
281        .collect()
282}
283
284/// Generates the fallback arm for unknown keys.
285fn generate_unknown_handling(
286    container_attrs: &ContainerAttrs,
287    collect_field: Option<&ParsedField>,
288) -> TokenStream2 {
289    match collect_field {
290        Some(field) => {
291            let ident = &field.ident;
292            quote! {
293                _ => {
294                    #ident.insert(key.to_string(), value.to_string());
295                }
296            }
297        }
298        None if container_attrs.allow_unknown => {
299            quote! { _ => {} }
300        }
301        None => {
302            quote! {
303                unknown => {
304                    return Err(::pkgsrc::kv::Error::UnknownVariable {
305                        variable: unknown.to_string(),
306                        span: ::pkgsrc::kv::Span {
307                            offset: line_offset,
308                            len: unknown.len(),
309                        },
310                    });
311                }
312            }
313        }
314    }
315}
316
317/// Generates serde Serialize/Deserialize implementations.
318///
319/// These are feature-gated with `#[cfg(feature = "serde")]`.
320fn generate_serde_impl(name: &Ident, fields: &[ParsedField]) -> TokenStream2 {
321    let field_defs: Vec<_> = fields
322        .iter()
323        .map(|f| {
324            let ident = &f.ident;
325            let ty = &f.original_type;
326            let key_name = &f.key_name;
327
328            let serde_attrs = match f.kind {
329                FieldKind::Required | FieldKind::Vec | FieldKind::MultiLine => {
330                    quote! {
331                        #[serde(rename = #key_name)]
332                    }
333                }
334                FieldKind::Optional | FieldKind::OptionVec | FieldKind::OptionMultiLine => {
335                    quote! {
336                        #[serde(rename = #key_name, default, skip_serializing_if = "Option::is_none")]
337                    }
338                }
339                FieldKind::Collect => {
340                    quote! {
341                        #[serde(flatten)]
342                    }
343                }
344            };
345
346            quote! {
347                #serde_attrs
348                #ident: #ty
349            }
350        })
351        .collect();
352
353    let field_names: Vec<_> = fields.iter().map(|f| &f.ident).collect();
354
355    let to_fields: Vec<_> = fields
356        .iter()
357        .map(|f| {
358            let ident = &f.ident;
359            quote! { #ident: self.#ident.clone() }
360        })
361        .collect();
362
363    quote! {
364        #[cfg(feature = "serde")]
365        impl serde::Serialize for #name {
366            fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
367            where
368                S: serde::Serializer,
369            {
370                #[derive(serde::Serialize)]
371                struct Helper {
372                    #(#field_defs,)*
373                }
374
375                let helper = Helper {
376                    #(#to_fields,)*
377                };
378                helper.serialize(serializer)
379            }
380        }
381
382        #[cfg(feature = "serde")]
383        impl<'de> serde::Deserialize<'de> for #name {
384            fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
385            where
386                D: serde::Deserializer<'de>,
387            {
388                #[derive(serde::Deserialize)]
389                struct Helper {
390                    #(#field_defs,)*
391                }
392
393                let helper = Helper::deserialize(deserializer)?;
394                Ok(Self {
395                    #(#field_names: helper.#field_names,)*
396                })
397            }
398        }
399    }
400}
401
402/// Container-level attributes parsed from `#[kv(...)]`.
403#[derive(Default)]
404struct ContainerAttrs {
405    /// If true, unknown keys are silently ignored.
406    allow_unknown: bool,
407}
408
409impl ContainerAttrs {
410    /// Parses container attributes from a slice of attributes.
411    fn parse(attrs: &[Attribute]) -> syn::Result<Self> {
412        let mut result = Self::default();
413
414        for attr in attrs {
415            if !attr.path().is_ident("kv") {
416                continue;
417            }
418
419            attr.parse_nested_meta(|meta| {
420                if meta.path.is_ident("allow_unknown") {
421                    result.allow_unknown = true;
422                    Ok(())
423                } else {
424                    Err(meta.error(
425                        "unknown container attribute; expected `allow_unknown`",
426                    ))
427                }
428            })?;
429        }
430
431        Ok(result)
432    }
433}
434
435/// Field-level attributes parsed from `#[kv(...)]`.
436#[derive(Default)]
437struct FieldAttrs {
438    /// Custom key name override.
439    variable: Option<String>,
440    /// Whether this field collects multiple lines.
441    multiline: bool,
442    /// Whether this field collects unhandled keys.
443    collect: bool,
444}
445
446impl FieldAttrs {
447    /// Parses field attributes from a slice of attributes.
448    fn parse(attrs: &[Attribute]) -> syn::Result<Self> {
449        let mut result = Self::default();
450
451        for attr in attrs {
452            if !attr.path().is_ident("kv") {
453                continue;
454            }
455
456            attr.parse_nested_meta(|meta| {
457                if meta.path.is_ident("variable") {
458                    let lit: syn::LitStr = meta.value()?.parse()?;
459                    result.variable = Some(lit.value());
460                    Ok(())
461                } else if meta.path.is_ident("multiline") {
462                    result.multiline = true;
463                    Ok(())
464                } else if meta.path.is_ident("collect") {
465                    result.collect = true;
466                    Ok(())
467                } else {
468                    Err(meta.error(
469                        "unknown field attribute; expected `variable`, `multiline`, or `collect`",
470                    ))
471                }
472            })?;
473        }
474
475        Ok(result)
476    }
477}
478
479/// Classification of how a field should be parsed.
480#[derive(Debug, Clone, Copy, PartialEq, Eq)]
481enum FieldKind {
482    /// `T` - required single value.
483    Required,
484    /// `Option<T>` - optional single value.
485    Optional,
486    /// `Vec<T>` - whitespace-separated values on one line.
487    Vec,
488    /// `Option<Vec<T>>` - optional whitespace-separated values.
489    OptionVec,
490    /// `Vec<T>` with `multiline` - multiple lines appended.
491    MultiLine,
492    /// `Option<Vec<T>>` with `multiline` - optional multiple lines.
493    OptionMultiLine,
494    /// `HashMap<String, String>` with `collect` - collects unhandled keys.
495    Collect,
496}
497
498/// A parsed and analyzed struct field.
499struct ParsedField {
500    /// The field identifier.
501    ident: Ident,
502    /// The key name used in KEY=VALUE format.
503    key_name: String,
504    /// How this field should be parsed.
505    kind: FieldKind,
506    /// The inner type (e.g., `T` from `Vec<T>`).
507    inner_type: Type,
508    /// The original declared type.
509    original_type: Type,
510}
511
512impl ParsedField {
513    /// Analyzes a field and extracts parsing metadata.
514    fn from_field(field: &Field) -> syn::Result<Self> {
515        let ident = field.ident.clone().ok_or_else(|| {
516            syn::Error::new_spanned(field, "expected named field")
517        })?;
518
519        let attrs = FieldAttrs::parse(&field.attrs)?;
520
521        // Validate collect field type
522        if attrs.collect {
523            validate_collect_type(&field.ty, field)?;
524            return Ok(Self {
525                ident,
526                key_name: String::new(),
527                kind: FieldKind::Collect,
528                inner_type: field.ty.clone(),
529                original_type: field.ty.clone(),
530            });
531        }
532
533        // Validate multiline is only used with Vec types
534        if attrs.multiline
535            && extract_type_param(&field.ty, "Vec").is_none()
536            && extract_option_vec_inner(&field.ty).is_none()
537        {
538            return Err(syn::Error::new_spanned(
539                &field.ty,
540                "`multiline` attribute requires `Vec<T>` or `Option<Vec<T>>` type",
541            ));
542        }
543
544        let key_name = attrs
545            .variable
546            .unwrap_or_else(|| ident.to_string().to_uppercase());
547
548        let (kind, inner_type) = analyze_type(&field.ty, attrs.multiline);
549
550        Ok(Self {
551            ident,
552            key_name,
553            kind,
554            inner_type,
555            original_type: field.ty.clone(),
556        })
557    }
558
559    /// Returns the type used during parsing to accumulate values.
560    fn state_type(&self) -> TokenStream2 {
561        let inner = &self.inner_type;
562        match self.kind {
563            FieldKind::Required | FieldKind::Optional => {
564                quote! { Option<#inner> }
565            }
566            FieldKind::Vec
567            | FieldKind::OptionVec
568            | FieldKind::MultiLine
569            | FieldKind::OptionMultiLine => {
570                quote! { Option<Vec<#inner>> }
571            }
572            FieldKind::Collect => {
573                quote! { std::collections::HashMap<String, String> }
574            }
575        }
576    }
577
578    /// Generates an expression to merge a new value into the accumulator.
579    fn merge_expr(&self) -> TokenStream2 {
580        let inner = &self.inner_type;
581        let ident = &self.ident;
582
583        match self.kind {
584            FieldKind::Required | FieldKind::Optional => {
585                quote! {
586                    <#inner as FromKv>::from_kv(value, value_span)?
587                }
588            }
589            FieldKind::Vec | FieldKind::OptionVec => {
590                quote! {
591                    {
592                        let mut items = Vec::new();
593                        let mut word_start = 0;
594                        let value_bytes = value.as_bytes();
595                        let mut in_word = false;
596
597                        for (i, &b) in value_bytes.iter().enumerate() {
598                            let is_ws = b == b' ' || b == b'\t';
599                            if is_ws && in_word {
600                                let word = &value[word_start..i];
601                                let word_offset = value_offset + word_start;
602                                let word_span = ::pkgsrc::kv::Span { offset: word_offset, len: word.len() };
603                                items.push(<#inner as FromKv>::from_kv(word, word_span)?);
604                                in_word = false;
605                            } else if !is_ws && !in_word {
606                                word_start = i;
607                                in_word = true;
608                            }
609                        }
610                        if in_word {
611                            let word = &value[word_start..];
612                            let word_offset = value_offset + word_start;
613                            let word_span = ::pkgsrc::kv::Span { offset: word_offset, len: word.len() };
614                            items.push(<#inner as FromKv>::from_kv(word, word_span)?);
615                        }
616                        items
617                    }
618                }
619            }
620            FieldKind::MultiLine | FieldKind::OptionMultiLine => {
621                quote! {
622                    {
623                        let mut vec = #ident.unwrap_or_default();
624                        vec.push(<#inner as FromKv>::from_kv(value, value_span)?);
625                        vec
626                    }
627                }
628            }
629            FieldKind::Collect => {
630                // Handled separately in unknown_handling
631                quote! { unreachable!() }
632            }
633        }
634    }
635
636    /// Generates an expression to extract the final value from the accumulator.
637    fn extract_expr(&self) -> TokenStream2 {
638        let ident = &self.ident;
639        let key_name = &self.key_name;
640
641        match self.kind {
642            FieldKind::Required | FieldKind::Vec | FieldKind::MultiLine => {
643                quote! {
644                    #ident.ok_or_else(|| ::pkgsrc::kv::Error::Incomplete(#key_name.to_string()))?
645                }
646            }
647            FieldKind::Optional
648            | FieldKind::OptionVec
649            | FieldKind::OptionMultiLine
650            | FieldKind::Collect => {
651                quote! { #ident }
652            }
653        }
654    }
655}
656
657/// Validates that a collect field has the correct type.
658fn validate_collect_type(ty: &Type, field: &Field) -> syn::Result<()> {
659    let err = || {
660        syn::Error::new_spanned(
661            field,
662            "`collect` attribute requires `HashMap<String, String>` type",
663        )
664    };
665    let Type::Path(type_path) = ty else {
666        return Err(err());
667    };
668    let Some(segment) = type_path.path.segments.last() else {
669        return Err(err());
670    };
671    if segment.ident != "HashMap" {
672        return Err(err());
673    }
674    let PathArguments::AngleBracketed(args) = &segment.arguments else {
675        return Err(err());
676    };
677    let mut arg_iter = args.args.iter();
678    let is_valid = matches!(
679        (arg_iter.next(), arg_iter.next(), arg_iter.next()),
680        (
681            Some(GenericArgument::Type(Type::Path(k))),
682            Some(GenericArgument::Type(Type::Path(v))),
683            None
684        ) if k.path.is_ident("String") && v.path.is_ident("String")
685    );
686    if is_valid {
687        Ok(())
688    } else {
689        Err(err())
690    }
691}
692
693/// Analyzes a type to determine its field kind and inner type.
694fn analyze_type(ty: &Type, multiline: bool) -> (FieldKind, Type) {
695    // Check for Option<Vec<T>>
696    if let Some(vec_inner) = extract_option_vec_inner(ty) {
697        let kind = if multiline {
698            FieldKind::OptionMultiLine
699        } else {
700            FieldKind::OptionVec
701        };
702        return (kind, vec_inner);
703    }
704
705    // Check for Option<T>
706    if let Some(inner) = extract_type_param(ty, "Option") {
707        return (FieldKind::Optional, inner);
708    }
709
710    // Check for Vec<T>
711    if let Some(inner) = extract_type_param(ty, "Vec") {
712        let kind = if multiline {
713            FieldKind::MultiLine
714        } else {
715            FieldKind::Vec
716        };
717        return (kind, inner);
718    }
719
720    // Plain T
721    (FieldKind::Required, ty.clone())
722}
723
724/// Extracts the inner type from `Option<Vec<T>>`.
725fn extract_option_vec_inner(ty: &Type) -> Option<Type> {
726    let option_inner = extract_type_param(ty, "Option")?;
727    extract_type_param(&option_inner, "Vec")
728}
729
730/// Extracts the type parameter from a generic type like `Wrapper<T>`.
731fn extract_type_param(ty: &Type, wrapper: &str) -> Option<Type> {
732    let Type::Path(type_path) = ty else {
733        return None;
734    };
735    let segment = type_path.path.segments.last()?;
736    if segment.ident != wrapper {
737        return None;
738    }
739    let PathArguments::AngleBracketed(args) = &segment.arguments else {
740        return None;
741    };
742    let GenericArgument::Type(inner) = args.args.first()? else {
743        return None;
744    };
745    Some(inner.clone())
746}