Skip to main content

atomr_config/
hocon.rs

1//! HOCON-subset parser for migrating Akka.NET / Pekko `reference.conf`
2//! files (Phase 2 of `docs/full-port-plan.md`).
3//!
4//! Supports the slice of HOCON that the upstream `reference.conf`
5//! files actually use:
6//!
7//! * `key = value` and `key : value` assignments;
8//! * dotted keys (`akka.actor.provider = "local"`);
9//! * nested objects (`akka { actor { provider = "local" } }`);
10//! * arrays (`["a", "b"]`);
11//! * single-line `#` and `//` comments, multi-line `/* … */` comments;
12//! * triple-quoted strings (`"""…"""`);
13//! * `include "file"` of a file relative to the current path;
14//! * `${path.to.value}` strict substitutions (errors if missing);
15//! * `${?ENV_VAR}` optional env-var substitutions
16//!   (silently skipped if absent).
17//!
18//! What it does **not** support yet (folded into a follow-on if a
19//! real `reference.conf` needs them): unquoted multi-token strings,
20//! self-referential substitutions, list concatenation across lines,
21//! `${path}` references that mix scalar + object types, durations
22//! parsed at parse time (we keep those as strings; `Config::
23//! get_duration` already parses on read).
24//!
25//! The parser is intentionally hand-written (no `nom` / `pest`
26//! dependency) — it's <500 LOC and the syntax surface is small.
27
28use std::collections::BTreeMap;
29use std::env;
30use std::path::{Path, PathBuf};
31
32use thiserror::Error;
33
34use crate::value::ConfigValue;
35
36#[derive(Debug, Error)]
37#[non_exhaustive]
38pub enum HoconError {
39    #[error("unexpected character `{ch}` at line {line}, col {col}")]
40    Unexpected { ch: char, line: usize, col: usize },
41    #[error("unterminated {kind} at line {line}")]
42    Unterminated { kind: &'static str, line: usize },
43    #[error("unknown substitution `${{{key}}}` (no such config key)")]
44    MissingSubstitution { key: String },
45    #[error("include error: {0}")]
46    Include(String),
47    #[error("io error reading `{path}`: {source}")]
48    Io {
49        path: String,
50        #[source]
51        source: std::io::Error,
52    },
53    #[error("expected {expected}, found `{found}` at line {line}")]
54    Expected { expected: &'static str, found: String, line: usize },
55}
56
57/// Parse a HOCON document and return the merged root object.
58///
59/// `base_dir` controls how `include "rel/path"` resolves. Pass
60/// `Path::new(".")` when parsing in-memory documents without a
61/// physical anchor.
62pub fn parse(text: &str, base_dir: &Path) -> Result<ConfigValue, HoconError> {
63    let mut p = Parser::new(text, base_dir.to_path_buf());
64    let root = p.parse_root()?;
65    let resolved = resolve_substitutions(root)?;
66    Ok(resolved)
67}
68
69/// Parse a HOCON file from disk, resolving `include` relative to its
70/// parent directory.
71pub fn parse_file(path: &Path) -> Result<ConfigValue, HoconError> {
72    let text = std::fs::read_to_string(path)
73        .map_err(|e| HoconError::Io { path: path.display().to_string(), source: e })?;
74    let base = path.parent().unwrap_or(Path::new(".")).to_path_buf();
75    parse(&text, &base)
76}
77
78// -- Parser -----------------------------------------------------------
79
80struct Parser<'a> {
81    src: &'a [u8],
82    pos: usize,
83    line: usize,
84    col: usize,
85    base_dir: PathBuf,
86}
87
88impl<'a> Parser<'a> {
89    fn new(src: &'a str, base_dir: PathBuf) -> Self {
90        Self { src: src.as_bytes(), pos: 0, line: 1, col: 1, base_dir }
91    }
92
93    fn parse_root(&mut self) -> Result<ConfigValue, HoconError> {
94        self.skip_ws_and_comments();
95        // A HOCON root may omit the surrounding braces.
96        let mut obj = if self.peek() == Some(b'{') {
97            self.advance(1);
98            let o = self.parse_object_body(b'}')?;
99            self.skip_ws_and_comments();
100            o
101        } else {
102            self.parse_object_body(0)?
103        };
104        // Allow trailing whitespace.
105        self.skip_ws_and_comments();
106        if self.pos < self.src.len() {
107            return Err(HoconError::Unexpected {
108                ch: self.src[self.pos] as char,
109                line: self.line,
110                col: self.col,
111            });
112        }
113        merge_in_place(&mut obj, BTreeMap::new());
114        Ok(ConfigValue::Object(obj))
115    }
116
117    /// Parse an object body — either until `terminator` (e.g. `b'}'`)
118    /// or end-of-input if `terminator == 0`.
119    fn parse_object_body(&mut self, terminator: u8) -> Result<BTreeMap<String, ConfigValue>, HoconError> {
120        let mut obj: BTreeMap<String, ConfigValue> = BTreeMap::new();
121        loop {
122            self.skip_ws_and_comments();
123            match self.peek() {
124                None => {
125                    if terminator != 0 {
126                        return Err(HoconError::Unterminated { kind: "object", line: self.line });
127                    }
128                    return Ok(obj);
129                }
130                Some(c) if c == terminator => {
131                    self.advance(1);
132                    return Ok(obj);
133                }
134                Some(b',') | Some(b'\n') | Some(b';') => {
135                    self.advance(1);
136                    continue;
137                }
138                _ => {}
139            }
140
141            // include "..."
142            if self.starts_with_keyword("include") {
143                self.advance(7);
144                self.skip_ws_inline();
145                let path = self.parse_string()?;
146                let included = self.do_include(&path)?;
147                if let ConfigValue::Object(child) = included {
148                    deep_merge(&mut obj, child);
149                } else {
150                    return Err(HoconError::Include(format!(
151                        "included file `{path}` did not resolve to an object"
152                    )));
153                }
154                continue;
155            }
156
157            // key (.key)*  ('=' | ':' | '{')  value
158            let key = self.parse_key()?;
159            self.skip_ws_inline();
160            let next = self.peek();
161            let value = match next {
162                Some(b'{') => {
163                    self.advance(1);
164                    let inner = self.parse_object_body(b'}')?;
165                    ConfigValue::Object(inner)
166                }
167                Some(b'=') | Some(b':') => {
168                    self.advance(1);
169                    self.skip_ws_inline();
170                    self.parse_value()?
171                }
172                Some(c) => {
173                    return Err(HoconError::Expected {
174                        expected: "= or : or {",
175                        found: format!("{}", c as char),
176                        line: self.line,
177                    })
178                }
179                None => return Err(HoconError::Unterminated { kind: "assignment", line: self.line }),
180            };
181            insert_dotted(&mut obj, &key, value);
182        }
183    }
184
185    fn parse_key(&mut self) -> Result<Vec<String>, HoconError> {
186        let mut parts = Vec::new();
187        loop {
188            self.skip_ws_inline();
189            let part = if self.peek() == Some(b'"') {
190                self.parse_string()?
191            } else {
192                let start = self.pos;
193                while let Some(c) = self.peek() {
194                    if c.is_ascii_alphanumeric() || matches!(c, b'_' | b'-') {
195                        self.advance(1);
196                    } else {
197                        break;
198                    }
199                }
200                if start == self.pos {
201                    return Err(HoconError::Expected {
202                        expected: "key",
203                        found: self.peek().map(|c| (c as char).to_string()).unwrap_or_default(),
204                        line: self.line,
205                    });
206                }
207                std::str::from_utf8(&self.src[start..self.pos])
208                    .map_err(|_| HoconError::Unexpected { ch: '\0', line: self.line, col: self.col })?
209                    .to_string()
210            };
211            parts.push(part);
212            if self.peek() == Some(b'.') {
213                self.advance(1);
214                continue;
215            }
216            return Ok(parts);
217        }
218    }
219
220    fn parse_value(&mut self) -> Result<ConfigValue, HoconError> {
221        self.skip_ws_inline();
222        match self.peek() {
223            Some(b'"') => Ok(ConfigValue::String(self.parse_string()?)),
224            Some(b'[') => self.parse_array(),
225            Some(b'{') => {
226                self.advance(1);
227                let inner = self.parse_object_body(b'}')?;
228                Ok(ConfigValue::Object(inner))
229            }
230            Some(b'$') if self.peek_at(1) == Some(b'{') => {
231                let sub = self.parse_substitution()?;
232                Ok(sub)
233            }
234            Some(_) => self.parse_unquoted_scalar(),
235            None => Err(HoconError::Unterminated { kind: "value", line: self.line }),
236        }
237    }
238
239    fn parse_string(&mut self) -> Result<String, HoconError> {
240        // Triple-quoted?
241        if self.starts_with(b"\"\"\"") {
242            self.advance(3);
243            let start = self.pos;
244            while self.pos + 2 < self.src.len() && &self.src[self.pos..self.pos + 3] != b"\"\"\"" {
245                if self.src[self.pos] == b'\n' {
246                    self.line += 1;
247                    self.col = 1;
248                } else {
249                    self.col += 1;
250                }
251                self.pos += 1;
252            }
253            if self.pos + 2 >= self.src.len() {
254                return Err(HoconError::Unterminated { kind: "string", line: self.line });
255            }
256            let s = std::str::from_utf8(&self.src[start..self.pos])
257                .map_err(|_| HoconError::Unterminated { kind: "string", line: self.line })?
258                .to_string();
259            self.advance(3);
260            return Ok(s);
261        }
262        if self.peek() != Some(b'"') {
263            return Err(HoconError::Expected {
264                expected: "\"",
265                found: self.peek().map(|c| (c as char).to_string()).unwrap_or_default(),
266                line: self.line,
267            });
268        }
269        self.advance(1);
270        let mut out = String::new();
271        loop {
272            match self.peek() {
273                None | Some(b'\n') => {
274                    return Err(HoconError::Unterminated { kind: "string", line: self.line })
275                }
276                Some(b'"') => {
277                    self.advance(1);
278                    return Ok(out);
279                }
280                Some(b'\\') => {
281                    self.advance(1);
282                    match self.peek() {
283                        Some(b'n') => {
284                            out.push('\n');
285                            self.advance(1);
286                        }
287                        Some(b't') => {
288                            out.push('\t');
289                            self.advance(1);
290                        }
291                        Some(b'r') => {
292                            out.push('\r');
293                            self.advance(1);
294                        }
295                        Some(b'"') => {
296                            out.push('"');
297                            self.advance(1);
298                        }
299                        Some(b'\\') => {
300                            out.push('\\');
301                            self.advance(1);
302                        }
303                        Some(b'/') => {
304                            out.push('/');
305                            self.advance(1);
306                        }
307                        Some(c) => {
308                            out.push(c as char);
309                            self.advance(1);
310                        }
311                        None => {
312                            return Err(HoconError::Unterminated { kind: "string-escape", line: self.line })
313                        }
314                    }
315                }
316                Some(c) => {
317                    out.push(c as char);
318                    self.advance(1);
319                }
320            }
321        }
322    }
323
324    fn parse_array(&mut self) -> Result<ConfigValue, HoconError> {
325        debug_assert_eq!(self.peek(), Some(b'['));
326        self.advance(1);
327        let mut items = Vec::new();
328        loop {
329            self.skip_ws_and_comments();
330            match self.peek() {
331                Some(b']') => {
332                    self.advance(1);
333                    return Ok(ConfigValue::Array(items));
334                }
335                Some(b',') | Some(b'\n') => {
336                    self.advance(1);
337                }
338                Some(_) => {
339                    let v = self.parse_value()?;
340                    items.push(v);
341                }
342                None => return Err(HoconError::Unterminated { kind: "array", line: self.line }),
343            }
344        }
345    }
346
347    fn parse_substitution(&mut self) -> Result<ConfigValue, HoconError> {
348        // `${path}` or `${?path}`
349        debug_assert_eq!(self.peek(), Some(b'$'));
350        self.advance(1);
351        debug_assert_eq!(self.peek(), Some(b'{'));
352        self.advance(1);
353        let optional = self.peek() == Some(b'?');
354        if optional {
355            self.advance(1);
356        }
357        let start = self.pos;
358        while let Some(c) = self.peek() {
359            if c == b'}' {
360                break;
361            }
362            self.advance(1);
363        }
364        if self.peek() != Some(b'}') {
365            return Err(HoconError::Unterminated { kind: "substitution", line: self.line });
366        }
367        let key = std::str::from_utf8(&self.src[start..self.pos])
368            .map_err(|_| HoconError::Unterminated { kind: "substitution", line: self.line })?
369            .trim()
370            .to_string();
371        self.advance(1);
372        // Stash as a placeholder; resolver replaces in pass 2.
373        let marker = if optional { format!("__atomr_sub_opt::{key}") } else { format!("__atomr_sub::{key}") };
374        Ok(ConfigValue::String(marker))
375    }
376
377    fn parse_unquoted_scalar(&mut self) -> Result<ConfigValue, HoconError> {
378        let start = self.pos;
379        while let Some(c) = self.peek() {
380            if matches!(c, b',' | b'\n' | b'}' | b']' | b';' | b'#') {
381                break;
382            }
383            if c == b'/' && self.peek_at(1) == Some(b'/') {
384                break;
385            }
386            self.advance(1);
387        }
388        let raw = std::str::from_utf8(&self.src[start..self.pos])
389            .map_err(|_| HoconError::Unexpected { ch: '\0', line: self.line, col: self.col })?
390            .trim();
391        if raw.is_empty() {
392            return Err(HoconError::Expected { expected: "value", found: String::new(), line: self.line });
393        }
394        Ok(scalar_from_str(raw))
395    }
396
397    fn do_include(&self, rel: &str) -> Result<ConfigValue, HoconError> {
398        let p = self.base_dir.join(rel);
399        parse_file(&p)
400    }
401
402    // -- low-level cursor helpers --
403
404    fn peek(&self) -> Option<u8> {
405        self.src.get(self.pos).copied()
406    }
407    fn peek_at(&self, off: usize) -> Option<u8> {
408        self.src.get(self.pos + off).copied()
409    }
410    fn starts_with(&self, needle: &[u8]) -> bool {
411        self.src.len() >= self.pos + needle.len() && &self.src[self.pos..self.pos + needle.len()] == needle
412    }
413    fn starts_with_keyword(&self, kw: &str) -> bool {
414        if !self.starts_with(kw.as_bytes()) {
415            return false;
416        }
417        match self.src.get(self.pos + kw.len()) {
418            None => true,
419            Some(c) => !c.is_ascii_alphanumeric() && *c != b'_',
420        }
421    }
422    fn advance(&mut self, n: usize) {
423        for _ in 0..n {
424            if self.pos >= self.src.len() {
425                break;
426            }
427            if self.src[self.pos] == b'\n' {
428                self.line += 1;
429                self.col = 1;
430            } else {
431                self.col += 1;
432            }
433            self.pos += 1;
434        }
435    }
436    fn skip_ws_inline(&mut self) {
437        while let Some(c) = self.peek() {
438            if c == b' ' || c == b'\t' {
439                self.advance(1);
440            } else {
441                break;
442            }
443        }
444    }
445    fn skip_ws_and_comments(&mut self) {
446        loop {
447            match self.peek() {
448                Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => {
449                    self.advance(1);
450                }
451                Some(b'#') => {
452                    while let Some(c) = self.peek() {
453                        if c == b'\n' {
454                            break;
455                        }
456                        self.advance(1);
457                    }
458                }
459                Some(b'/') if self.peek_at(1) == Some(b'/') => {
460                    while let Some(c) = self.peek() {
461                        if c == b'\n' {
462                            break;
463                        }
464                        self.advance(1);
465                    }
466                }
467                Some(b'/') if self.peek_at(1) == Some(b'*') => {
468                    self.advance(2);
469                    while !(self.peek() == Some(b'*') && self.peek_at(1) == Some(b'/')) {
470                        if self.peek().is_none() {
471                            return;
472                        }
473                        self.advance(1);
474                    }
475                    self.advance(2);
476                }
477                _ => return,
478            }
479        }
480    }
481}
482
483fn scalar_from_str(s: &str) -> ConfigValue {
484    if s == "null" {
485        return ConfigValue::Null;
486    }
487    if s == "true" {
488        return ConfigValue::Bool(true);
489    }
490    if s == "false" {
491        return ConfigValue::Bool(false);
492    }
493    if let Ok(i) = s.parse::<i64>() {
494        return ConfigValue::Int(i);
495    }
496    if let Ok(f) = s.parse::<f64>() {
497        return ConfigValue::Float(f);
498    }
499    ConfigValue::String(s.to_string())
500}
501
502fn insert_dotted(obj: &mut BTreeMap<String, ConfigValue>, key: &[String], value: ConfigValue) {
503    if key.is_empty() {
504        return;
505    }
506    if key.len() == 1 {
507        if let Some(ConfigValue::Object(existing)) = obj.get_mut(&key[0]) {
508            if let ConfigValue::Object(new_obj) = value {
509                deep_merge(existing, new_obj);
510                return;
511            }
512        }
513        obj.insert(key[0].clone(), value);
514        return;
515    }
516    let entry = obj.entry(key[0].clone()).or_insert_with(|| ConfigValue::Object(BTreeMap::new()));
517    if let ConfigValue::Object(child) = entry {
518        insert_dotted(child, &key[1..], value);
519    } else {
520        let mut new_child: BTreeMap<String, ConfigValue> = BTreeMap::new();
521        insert_dotted(&mut new_child, &key[1..], value);
522        *entry = ConfigValue::Object(new_child);
523    }
524}
525
526fn deep_merge(into: &mut BTreeMap<String, ConfigValue>, from: BTreeMap<String, ConfigValue>) {
527    for (k, v) in from {
528        match (into.get_mut(&k), v) {
529            (Some(ConfigValue::Object(a)), ConfigValue::Object(b)) => {
530                deep_merge(a, b);
531            }
532            (_, v) => {
533                into.insert(k, v);
534            }
535        }
536    }
537}
538
539fn merge_in_place(_into: &mut BTreeMap<String, ConfigValue>, _from: BTreeMap<String, ConfigValue>) {}
540
541// -- Substitution resolution -----------------------------------------
542
543fn resolve_substitutions(v: ConfigValue) -> Result<ConfigValue, HoconError> {
544    let snapshot = v.clone();
545    resolve_in(v, &snapshot)
546}
547
548fn resolve_in(v: ConfigValue, root: &ConfigValue) -> Result<ConfigValue, HoconError> {
549    match v {
550        ConfigValue::String(s) => {
551            if let Some(rest) = s.strip_prefix("__atomr_sub::") {
552                let lookup = lookup_path(root, rest);
553                lookup.ok_or_else(|| HoconError::MissingSubstitution { key: rest.to_string() })
554            } else if let Some(rest) = s.strip_prefix("__atomr_sub_opt::") {
555                Ok(env::var(rest).map(ConfigValue::String).unwrap_or(ConfigValue::Null))
556            } else {
557                Ok(ConfigValue::String(s))
558            }
559        }
560        ConfigValue::Array(items) => {
561            let mut out = Vec::with_capacity(items.len());
562            for it in items {
563                out.push(resolve_in(it, root)?);
564            }
565            Ok(ConfigValue::Array(out))
566        }
567        ConfigValue::Object(o) => {
568            let mut out = BTreeMap::new();
569            for (k, v) in o {
570                out.insert(k, resolve_in(v, root)?);
571            }
572            Ok(ConfigValue::Object(out))
573        }
574        other => Ok(other),
575    }
576}
577
578fn lookup_path(root: &ConfigValue, path: &str) -> Option<ConfigValue> {
579    let mut cur = root;
580    for seg in path.split('.') {
581        cur = match cur {
582            ConfigValue::Object(o) => o.get(seg)?,
583            _ => return None,
584        };
585    }
586    Some(cur.clone())
587}
588
589#[cfg(test)]
590mod tests {
591    use super::*;
592    use std::path::Path;
593
594    fn parse_str(s: &str) -> ConfigValue {
595        parse(s, Path::new(".")).unwrap()
596    }
597
598    #[test]
599    fn flat_assignments() {
600        let v = parse_str("a = 1\nb = \"hi\"\nc = true");
601        if let ConfigValue::Object(o) = v {
602            assert_eq!(o.get("a"), Some(&ConfigValue::Int(1)));
603            assert_eq!(o.get("b"), Some(&ConfigValue::String("hi".into())));
604            assert_eq!(o.get("c"), Some(&ConfigValue::Bool(true)));
605        } else {
606            panic!("expected object");
607        }
608    }
609
610    #[test]
611    fn dotted_keys_create_nested_objects() {
612        let v = parse_str("akka.actor.provider = \"local\"");
613        if let ConfigValue::Object(o) = v {
614            let actor = o.get("akka").unwrap();
615            if let ConfigValue::Object(a) = actor {
616                let inner = a.get("actor").unwrap();
617                if let ConfigValue::Object(b) = inner {
618                    assert_eq!(b.get("provider"), Some(&ConfigValue::String("local".into())));
619                    return;
620                }
621            }
622        }
623        panic!("nested structure missing");
624    }
625
626    #[test]
627    fn nested_object_syntax() {
628        let v = parse_str("akka { actor { provider = \"local\" } }");
629        if let ConfigValue::Object(o) = v {
630            let s = lookup_path(&ConfigValue::Object(o.clone()), "akka.actor.provider");
631            assert_eq!(s, Some(ConfigValue::String("local".into())));
632        }
633    }
634
635    #[test]
636    fn comments_ignored() {
637        let v = parse_str("# comment\na = 1 // inline\n/* block */\nb = 2");
638        if let ConfigValue::Object(o) = v {
639            assert_eq!(o.get("a"), Some(&ConfigValue::Int(1)));
640            assert_eq!(o.get("b"), Some(&ConfigValue::Int(2)));
641        }
642    }
643
644    #[test]
645    fn substitution_resolves() {
646        let src = "host = \"example.com\"\nurl = ${host}";
647        let v = parse_str(src);
648        let s = lookup_path(&v, "url");
649        assert_eq!(s, Some(ConfigValue::String("example.com".into())));
650    }
651
652    #[test]
653    fn missing_substitution_errors() {
654        let r = parse("missing = ${nope}", Path::new("."));
655        assert!(matches!(r, Err(HoconError::MissingSubstitution { .. })));
656    }
657
658    #[test]
659    fn optional_env_substitution_returns_null_when_unset() {
660        // Choose an env var name unlikely to exist.
661        std::env::remove_var("ATOMR_TEST_HOCON_UNSET_X9Z");
662        let v = parse_str("x = ${?ATOMR_TEST_HOCON_UNSET_X9Z}");
663        assert_eq!(lookup_path(&v, "x"), Some(ConfigValue::Null));
664    }
665
666    #[test]
667    fn optional_env_substitution_returns_value_when_set() {
668        std::env::set_var("ATOMR_TEST_HOCON_SET_K1", "from-env");
669        let v = parse_str("x = ${?ATOMR_TEST_HOCON_SET_K1}");
670        assert_eq!(lookup_path(&v, "x"), Some(ConfigValue::String("from-env".into())));
671        std::env::remove_var("ATOMR_TEST_HOCON_SET_K1");
672    }
673
674    #[test]
675    fn arrays_parse() {
676        let v = parse_str("xs = [1, 2, 3]");
677        let xs = lookup_path(&v, "xs").unwrap();
678        if let ConfigValue::Array(items) = xs {
679            assert_eq!(items.len(), 3);
680            assert_eq!(items[0], ConfigValue::Int(1));
681        } else {
682            panic!("expected array");
683        }
684    }
685
686    #[test]
687    fn dotted_assignment_does_not_clobber_sibling() {
688        let v = parse_str("akka.actor.provider = \"local\"\nakka.actor.dispatcher = \"default\"");
689        assert_eq!(lookup_path(&v, "akka.actor.provider"), Some(ConfigValue::String("local".into())));
690        assert_eq!(lookup_path(&v, "akka.actor.dispatcher"), Some(ConfigValue::String("default".into())));
691    }
692
693    #[test]
694    fn triple_quoted_string() {
695        let v = parse_str("x = \"\"\"line1\nline2\"\"\"");
696        assert_eq!(lookup_path(&v, "x"), Some(ConfigValue::String("line1\nline2".into())));
697    }
698}