pkgsrc/
kv.rs

1/*
2 * Copyright (c) 2025 Jonathan Perkin <jonathan@perkin.org.uk>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*!
18 * Type-safe `KEY=VALUE` parsing for various pkgsrc formats.
19 *
20 * This module provides the [`Kv`] derive macro and supporting types for
21 * parsing various pkgsrc formats that use `KEY=VALUE` pairs, including:
22 *
23 * - [`pkg_summary(5)`] via [`Summary`]
24 * - [`pbulk-index`] via [`ScanIndex`]
25 *
26 * Types such as [`PkgName`] only need to implement the [`FromKv`] trait to
27 * be used directly.
28 *
29 * Multi-line variables such as `DESCRIPTION` in [`pkg_summary(5)`] are
30 * supported by adding the `#[kv(multiline)]` attribute which will append each
31 * line to a [`Vec`].
32 *
33 * Single-line variables where it makes sense to split the input such as
34 * `CATEGORIES` can do so easily by declaring themselves as [`Vec`].
35 *
36 * # Example
37 *
38 * ```
39 * use indoc::indoc;
40 * use pkgsrc::{PkgName, kv::Kv};
41 *
42 * #[derive(Kv, Debug, PartialEq)]
43 * #[kv(allow_unknown)]
44 * struct Package {
45 *     pkgname: PkgName,
46 *     size_pkg: u64,
47 *     categories: Vec<String>,
48 *     #[kv(variable = "DESCRIPTION", multiline)]
49 *     desc: Vec<String>,
50 *     // There is no known multi-line variable that also contains multiple
51 *     // values per line, this is purely to show how one might be handled if
52 *     // necessary, though it would be strongly recommended against.
53 *     #[kv(multiline)]
54 *     all_depends: Vec<Vec<String>>,
55 * }
56 *
57 * let input = indoc! {"
58 *     PKGNAME=mktool-1.4.2
59 *     SIZE_PKG=6999600
60 *     CATEGORIES=devel pkgtools
61 *     DESCRIPTION=This is a highly-performant collection of utilities that provide
62 *     DESCRIPTION=alternate implementations for parts of the pkgsrc mk infrastructure.
63 *     UNKNOWN=Without allow_unknown this would trigger parse failure.
64 *     ALL_DEPENDS=cwrappers>=20150314:../../pkgtools/cwrappers
65 *     ALL_DEPENDS=checkperms>=1.1:../../sysutils/checkperms rust>=1.74.0:../../lang/rust
66 * "};
67 *
68 * let pkg = Package::parse(input)?;
69 * assert_eq!(pkg.pkgname, "mktool-1.4.2");
70 * assert_eq!(pkg.size_pkg, 6999600);
71 * assert_eq!(pkg.categories, vec!["devel", "pkgtools"]);
72 * assert!(pkg.desc[1].starts_with("alternate implementations "));
73 * assert_eq!(pkg.all_depends.len(), 2);
74 * assert_eq!(pkg.all_depends[0].len(), 1);
75 * assert_eq!(pkg.all_depends[1].len(), 2);
76 * # Ok::<(), pkgsrc::kv::KvError>(())
77 * ```
78 *
79 * [`PkgName`]: crate::PkgName
80 * [`ScanIndex`]: crate::ScanIndex
81 * [`Summary`]: crate::summary::Summary
82 * [`pkg_summary(5)`]: https://man.netbsd.org/pkg_summary.5
83 * [`pbulk-index`]: https://man.netbsd.org/pbulk-build.1
84 */
85
86use std::num::ParseIntError;
87use std::path::PathBuf;
88use thiserror::Error;
89
90pub use pkgsrc_kv_derive::Kv;
91
92/**
93 * A byte offset and length in the input, for error reporting.
94 *
95 * `Span` tracks the location of errors within the original input string,
96 * enabling precise error messages for diagnostic tools.
97 *
98 * ```
99 * use pkgsrc::kv::Span;
100 *
101 * let span = Span { offset: 10, len: 5 };
102 * let range: std::ops::Range<usize> = span.into();
103 * assert_eq!(range, 10..15);
104 * ```
105 */
106#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
107#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
108pub struct Span {
109    /** Byte offset where this span starts. */
110    pub offset: usize,
111    /** Length in bytes. */
112    pub len: usize,
113}
114
115impl From<Span> for std::ops::Range<usize> {
116    fn from(span: Span) -> Self {
117        span.offset..span.offset + span.len
118    }
119}
120
121/** Errors that can occur during parsing. */
122#[derive(Debug, Error)]
123pub enum KvError {
124    /** A line was not in `KEY=VALUE` format. */
125    #[error("line is not in KEY=VALUE format")]
126    ParseLine(Span),
127
128    /** A required field was missing from the input. */
129    #[error("missing required field '{0}'")]
130    Incomplete(String),
131
132    /** An unknown variable was encountered. */
133    #[error("unknown variable '{variable}'")]
134    UnknownVariable {
135        /** The name of the unknown variable. */
136        variable: String,
137        /** Location of the variable name in the input. */
138        span: Span,
139    },
140
141    /** Failed to parse an integer value. */
142    #[error("failed to parse integer")]
143    ParseInt {
144        /** The underlying parse error. */
145        #[source]
146        source: ParseIntError,
147        /** Location of the invalid value in the input. */
148        span: Span,
149    },
150
151    /** Failed to parse a value. */
152    #[error("{message}")]
153    Parse {
154        /** Description of the parse error. */
155        message: String,
156        /** Location of the invalid value in the input. */
157        span: Span,
158    },
159}
160
161impl KvError {
162    /** Returns the [`Span`] for this error, if available. */
163    #[must_use]
164    pub const fn span(&self) -> Option<Span> {
165        match self {
166            Self::ParseLine(span)
167            | Self::UnknownVariable { span, .. }
168            | Self::ParseInt { span, .. }
169            | Self::Parse { span, .. } => Some(*span),
170            Self::Incomplete(_) => None,
171        }
172    }
173}
174
175/** A [`Result`](std::result::Result) type alias using [`KvError`]. */
176pub type Result<T> = std::result::Result<T, KvError>;
177
178/**
179 * Trait for types that can be parsed from a KEY=VALUE string.
180 *
181 * This is the extension point for custom types. Implement this trait to
182 * allow your type to be used in a `#[derive(Kv)]` struct.
183 *
184 * The `span` parameter indicates where in the input the value is located,
185 * for error reporting.
186 *
187 * # Example
188 *
189 * ```
190 * use pkgsrc::kv::{FromKv, KvError, Span};
191 *
192 * struct MyId(u32);
193 *
194 * impl FromKv for MyId {
195 *     fn from_kv(value: &str, span: Span) -> Result<Self, KvError> {
196 *         value.parse::<u32>()
197 *             .map(MyId)
198 *             .map_err(|e| KvError::Parse {
199 *                 message: e.to_string(),
200 *                 span,
201 *             })
202 *     }
203 * }
204 * ```
205 */
206pub trait FromKv: Sized {
207    /**
208     * Parse a value from a string.
209     *
210     * # Errors
211     *
212     * Returns an error if the value cannot be parsed into the target type.
213     */
214    fn from_kv(value: &str, span: Span) -> Result<Self>;
215}
216
217// Implementation for String - always succeeds
218impl FromKv for String {
219    fn from_kv(value: &str, _span: Span) -> Result<Self> {
220        Ok(value.to_string())
221    }
222}
223
224// Implementation for numeric types
225macro_rules! impl_fromkv_for_int {
226    ($($t:ty),*) => {
227        $(
228            impl FromKv for $t {
229                fn from_kv(value: &str, span: Span) -> Result<Self> {
230                    value.parse().map_err(|source: ParseIntError| KvError::ParseInt {
231                        source,
232                        span,
233                    })
234                }
235            }
236        )*
237    };
238}
239
240impl_fromkv_for_int!(u8, u16, u32, u64, usize, i8, i16, i32, i64, isize);
241
242// Implementation for PathBuf
243impl FromKv for PathBuf {
244    fn from_kv(value: &str, _span: Span) -> Result<Self> {
245        Ok(Self::from(value))
246    }
247}
248
249// Implementation for bool (common patterns: yes/no, true/false, 1/0)
250impl FromKv for bool {
251    fn from_kv(value: &str, span: Span) -> Result<Self> {
252        match value.to_lowercase().as_str() {
253            "true" | "yes" | "1" => Ok(true),
254            "false" | "no" | "0" => Ok(false),
255            _ => Err(KvError::Parse {
256                message: format!("invalid boolean: {value}"),
257                span,
258            }),
259        }
260    }
261}
262
263impl<T: FromKv> FromKv for Vec<T> {
264    fn from_kv(value: &str, span: Span) -> Result<Self> {
265        value
266            .split_whitespace()
267            .map(|word| T::from_kv(word, span))
268            .collect()
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use crate::{Depend, PkgName};
276    use indoc::indoc;
277    use std::collections::HashMap;
278
279    // Standard mktool test data matching pkg_summary.gz
280    const MKTOOL_INPUT: &str = indoc! {"
281        PKGNAME=mktool-1.4.2
282        COMMENT=High performance alternatives for pkgsrc/mk
283        SIZE_PKG=6999600
284        CATEGORIES=pkgtools
285        HOMEPAGE=https://github.com/jperkin/mktool/
286    "};
287
288    #[test]
289    fn span_to_range() {
290        let span = Span { offset: 10, len: 5 };
291        let range: std::ops::Range<usize> = span.into();
292        assert_eq!(range, 10..15);
293    }
294
295    #[test]
296    fn fromkv_string() -> Result<()> {
297        let span = Span::default();
298        assert_eq!(String::from_kv("hello", span)?, "hello");
299        Ok(())
300    }
301
302    #[test]
303    fn fromkv_u64() -> Result<()> {
304        let span = Span::default();
305        assert_eq!(u64::from_kv("6999600", span)?, 6999600);
306        assert!(u64::from_kv("not_a_number", span).is_err());
307        Ok(())
308    }
309
310    #[test]
311    fn fromkv_bool() -> Result<()> {
312        let span = Span::default();
313        assert!(bool::from_kv("true", span)?);
314        assert!(bool::from_kv("yes", span)?);
315        assert!(bool::from_kv("1", span)?);
316        assert!(!bool::from_kv("false", span)?);
317        assert!(!bool::from_kv("no", span)?);
318        assert!(!bool::from_kv("0", span)?);
319        assert!(bool::from_kv("maybe", span).is_err());
320        Ok(())
321    }
322
323    #[test]
324    fn fromkv_pathbuf() -> Result<()> {
325        let span = Span::default();
326        let path = PathBuf::from_kv("/usr/bin", span)?;
327        assert_eq!(path, PathBuf::from("/usr/bin"));
328        Ok(())
329    }
330
331    #[derive(Kv, Debug, PartialEq)]
332    #[kv(allow_unknown)]
333    struct SimplePackage {
334        pkgname: String,
335        #[kv(variable = "SIZE_PKG")]
336        size: u64,
337        comment: Option<String>,
338    }
339
340    #[test]
341    fn derive_simple() -> Result<()> {
342        let pkg = SimplePackage::parse(MKTOOL_INPUT)?;
343        assert_eq!(pkg.pkgname, "mktool-1.4.2");
344        assert_eq!(pkg.size, 6999600);
345        assert_eq!(
346            pkg.comment,
347            Some("High performance alternatives for pkgsrc/mk".to_string())
348        );
349        Ok(())
350    }
351
352    #[test]
353    fn derive_with_optional() -> Result<()> {
354        let input = indoc! {"
355            PKGNAME=mktool-1.4.2
356            SIZE_PKG=6999600
357            COMMENT=High performance alternatives for pkgsrc/mk
358        "};
359        let pkg = SimplePackage::parse(input)?;
360        assert_eq!(pkg.pkgname, "mktool-1.4.2");
361        assert_eq!(pkg.size, 6999600);
362        assert_eq!(
363            pkg.comment,
364            Some("High performance alternatives for pkgsrc/mk".to_string())
365        );
366        Ok(())
367    }
368
369    #[test]
370    fn derive_optional_missing() -> Result<()> {
371        let input = indoc! {"
372            PKGNAME=mktool-1.4.2
373            SIZE_PKG=6999600
374        "};
375        let pkg = SimplePackage::parse(input)?;
376        assert_eq!(pkg.pkgname, "mktool-1.4.2");
377        assert_eq!(pkg.size, 6999600);
378        assert_eq!(pkg.comment, None);
379        Ok(())
380    }
381
382    #[test]
383    fn derive_unknown_ignored() -> Result<()> {
384        let pkg = SimplePackage::parse(MKTOOL_INPUT)?;
385        assert_eq!(pkg.pkgname, "mktool-1.4.2");
386        Ok(())
387    }
388
389    #[test]
390    fn derive_missing_required() {
391        let input = "PKGNAME=mktool-1.4.2\n";
392        let result = SimplePackage::parse(input);
393        assert!(matches!(result, Err(KvError::Incomplete(_))));
394    }
395
396    #[derive(Kv, Debug, PartialEq)]
397    struct VecPackage {
398        pkgname: String,
399        categories: Vec<String>,
400    }
401
402    #[test]
403    fn derive_vec_whitespace_separated() -> Result<()> {
404        let input = indoc! {"
405            PKGNAME=mktool-1.4.2
406            CATEGORIES=pkgtools devel
407        "};
408        let pkg = VecPackage::parse(input)?;
409        assert_eq!(pkg.pkgname, "mktool-1.4.2");
410        assert_eq!(pkg.categories, vec!["pkgtools", "devel"]);
411        Ok(())
412    }
413
414    #[derive(Kv, Debug, PartialEq)]
415    struct MultiLinePackage {
416        pkgname: String,
417        #[kv(multiline)]
418        description: Vec<String>,
419    }
420
421    #[test]
422    fn derive_multiline() -> Result<()> {
423        let input = indoc! {"
424            PKGNAME=mktool-1.4.2
425            DESCRIPTION=This is a highly-performant collection of utilities.
426            DESCRIPTION=Many targets under pkgsrc/mk are implemented using shell.
427        "};
428        let pkg = MultiLinePackage::parse(input)?;
429        assert_eq!(pkg.pkgname, "mktool-1.4.2");
430        assert_eq!(pkg.description.len(), 2);
431        assert_eq!(
432            pkg.description[0],
433            "This is a highly-performant collection of utilities."
434        );
435        assert_eq!(
436            pkg.description[1],
437            "Many targets under pkgsrc/mk are implemented using shell."
438        );
439        Ok(())
440    }
441
442    #[test]
443    fn derive_parse_error() {
444        let input = indoc! {"
445            PKGNAME=mktool-1.4.2
446            SIZE_PKG=not_a_number
447        "};
448        let result = SimplePackage::parse(input);
449        assert!(matches!(result, Err(KvError::ParseInt { .. })));
450    }
451
452    #[test]
453    fn derive_bad_line() {
454        let input = indoc! {"
455            PKGNAME=mktool-1.4.2
456            bad-line
457            SIZE_PKG=6999600
458        "};
459        let result = SimplePackage::parse(input);
460        assert!(matches!(result, Err(KvError::ParseLine(_))));
461    }
462
463    #[derive(Kv, Debug, PartialEq)]
464    #[kv(allow_unknown)]
465    struct ScanIndexTest {
466        pkgname: PkgName,
467        all_depends: Option<Vec<Depend>>,
468    }
469
470    #[test]
471    fn derive_pkgname() -> Result<()> {
472        let input = "PKGNAME=mktool-1.4.2\n";
473        let pkg = ScanIndexTest::parse(input)?;
474        assert_eq!(pkg.pkgname.pkgbase(), "mktool");
475        assert_eq!(pkg.pkgname.pkgversion(), "1.4.2");
476        assert_eq!(pkg.all_depends, None);
477        Ok(())
478    }
479
480    #[test]
481    fn derive_depend_vec() -> Result<()> {
482        let input = indoc! {"
483            PKGNAME=mktool-1.4.2
484            ALL_DEPENDS=rust-[0-9]*:../../lang/rust curl>=7.0:../../www/curl
485        "};
486        let pkg = ScanIndexTest::parse(input)?;
487        let all_depends = pkg
488            .all_depends
489            .as_ref()
490            .ok_or(KvError::Incomplete("all_depends".to_string()))?;
491        assert_eq!(all_depends.len(), 2);
492        Ok(())
493    }
494
495    #[test]
496    fn derive_depend_invalid() {
497        let input = indoc! {"
498            PKGNAME=mktool-1.4.2
499            ALL_DEPENDS=invalid
500        "};
501        let result = ScanIndexTest::parse(input);
502        assert!(matches!(result, Err(KvError::Parse { .. })));
503    }
504
505    #[derive(Kv, Debug, PartialEq)]
506    struct WithExtras {
507        pkgname: String,
508        #[kv(collect)]
509        extra: HashMap<String, String>,
510    }
511
512    #[test]
513    fn derive_extras() -> Result<()> {
514        let pkg = WithExtras::parse(MKTOOL_INPUT)?;
515        assert_eq!(pkg.pkgname, "mktool-1.4.2");
516        assert_eq!(
517            pkg.extra.get("COMMENT"),
518            Some(&"High performance alternatives for pkgsrc/mk".to_string())
519        );
520        assert_eq!(pkg.extra.get("SIZE_PKG"), Some(&"6999600".to_string()));
521        assert_eq!(pkg.extra.get("CATEGORIES"), Some(&"pkgtools".to_string()));
522        assert_eq!(
523            pkg.extra.get("HOMEPAGE"),
524            Some(&"https://github.com/jperkin/mktool/".to_string())
525        );
526        assert_eq!(pkg.extra.len(), 4);
527        Ok(())
528    }
529
530    #[test]
531    fn derive_extras_empty() -> Result<()> {
532        let input = "PKGNAME=mktool-1.4.2\n";
533        let pkg = WithExtras::parse(input)?;
534        assert_eq!(pkg.pkgname, "mktool-1.4.2");
535        assert!(pkg.extra.is_empty());
536        Ok(())
537    }
538}