1use std::collections::BTreeMap;
29use std::env;
30use std::path::{Path, PathBuf};
31
32use thiserror::Error;
33
34use crate::value::ConfigValue;
35
36#[derive(Debug, Error)]
37#[non_exhaustive]
38pub enum HoconError {
39 #[error("unexpected character `{ch}` at line {line}, col {col}")]
40 Unexpected { ch: char, line: usize, col: usize },
41 #[error("unterminated {kind} at line {line}")]
42 Unterminated { kind: &'static str, line: usize },
43 #[error("unknown substitution `${{{key}}}` (no such config key)")]
44 MissingSubstitution { key: String },
45 #[error("include error: {0}")]
46 Include(String),
47 #[error("io error reading `{path}`: {source}")]
48 Io {
49 path: String,
50 #[source]
51 source: std::io::Error,
52 },
53 #[error("expected {expected}, found `{found}` at line {line}")]
54 Expected { expected: &'static str, found: String, line: usize },
55}
56
57pub fn parse(text: &str, base_dir: &Path) -> Result<ConfigValue, HoconError> {
63 let mut p = Parser::new(text, base_dir.to_path_buf());
64 let root = p.parse_root()?;
65 let resolved = resolve_substitutions(root)?;
66 Ok(resolved)
67}
68
69pub fn parse_file(path: &Path) -> Result<ConfigValue, HoconError> {
72 let text = std::fs::read_to_string(path)
73 .map_err(|e| HoconError::Io { path: path.display().to_string(), source: e })?;
74 let base = path.parent().unwrap_or(Path::new(".")).to_path_buf();
75 parse(&text, &base)
76}
77
78struct Parser<'a> {
81 src: &'a [u8],
82 pos: usize,
83 line: usize,
84 col: usize,
85 base_dir: PathBuf,
86}
87
88impl<'a> Parser<'a> {
89 fn new(src: &'a str, base_dir: PathBuf) -> Self {
90 Self { src: src.as_bytes(), pos: 0, line: 1, col: 1, base_dir }
91 }
92
93 fn parse_root(&mut self) -> Result<ConfigValue, HoconError> {
94 self.skip_ws_and_comments();
95 let mut obj = if self.peek() == Some(b'{') {
97 self.advance(1);
98 let o = self.parse_object_body(b'}')?;
99 self.skip_ws_and_comments();
100 o
101 } else {
102 self.parse_object_body(0)?
103 };
104 self.skip_ws_and_comments();
106 if self.pos < self.src.len() {
107 return Err(HoconError::Unexpected {
108 ch: self.src[self.pos] as char,
109 line: self.line,
110 col: self.col,
111 });
112 }
113 merge_in_place(&mut obj, BTreeMap::new());
114 Ok(ConfigValue::Object(obj))
115 }
116
117 fn parse_object_body(&mut self, terminator: u8) -> Result<BTreeMap<String, ConfigValue>, HoconError> {
120 let mut obj: BTreeMap<String, ConfigValue> = BTreeMap::new();
121 loop {
122 self.skip_ws_and_comments();
123 match self.peek() {
124 None => {
125 if terminator != 0 {
126 return Err(HoconError::Unterminated { kind: "object", line: self.line });
127 }
128 return Ok(obj);
129 }
130 Some(c) if c == terminator => {
131 self.advance(1);
132 return Ok(obj);
133 }
134 Some(b',') | Some(b'\n') | Some(b';') => {
135 self.advance(1);
136 continue;
137 }
138 _ => {}
139 }
140
141 if self.starts_with_keyword("include") {
143 self.advance(7);
144 self.skip_ws_inline();
145 let path = self.parse_string()?;
146 let included = self.do_include(&path)?;
147 if let ConfigValue::Object(child) = included {
148 deep_merge(&mut obj, child);
149 } else {
150 return Err(HoconError::Include(format!(
151 "included file `{path}` did not resolve to an object"
152 )));
153 }
154 continue;
155 }
156
157 let key = self.parse_key()?;
159 self.skip_ws_inline();
160 let next = self.peek();
161 let (value, append) = match next {
162 Some(b'{') => {
163 self.advance(1);
164 let inner = self.parse_object_body(b'}')?;
165 (ConfigValue::Object(inner), false)
166 }
167 Some(b'+') if self.peek_at(1) == Some(b'=') => {
168 self.advance(2);
170 self.skip_ws_inline();
171 (self.parse_value()?, true)
172 }
173 Some(b'=') | Some(b':') => {
174 self.advance(1);
175 self.skip_ws_inline();
176 (self.parse_value()?, false)
177 }
178 Some(c) => {
179 return Err(HoconError::Expected {
180 expected: "= or : or { or +=",
181 found: format!("{}", c as char),
182 line: self.line,
183 })
184 }
185 None => return Err(HoconError::Unterminated { kind: "assignment", line: self.line }),
186 };
187 if append {
188 append_dotted(&mut obj, &key, value);
189 } else {
190 insert_dotted(&mut obj, &key, value);
191 }
192 }
193 }
194
195 fn parse_key(&mut self) -> Result<Vec<String>, HoconError> {
196 let mut parts = Vec::new();
197 loop {
198 self.skip_ws_inline();
199 let part = if self.peek() == Some(b'"') {
200 self.parse_string()?
201 } else {
202 let start = self.pos;
203 while let Some(c) = self.peek() {
204 if c.is_ascii_alphanumeric() || matches!(c, b'_' | b'-') {
205 self.advance(1);
206 } else {
207 break;
208 }
209 }
210 if start == self.pos {
211 return Err(HoconError::Expected {
212 expected: "key",
213 found: self.peek().map(|c| (c as char).to_string()).unwrap_or_default(),
214 line: self.line,
215 });
216 }
217 std::str::from_utf8(&self.src[start..self.pos])
218 .map_err(|_| HoconError::Unexpected { ch: '\0', line: self.line, col: self.col })?
219 .to_string()
220 };
221 parts.push(part);
222 if self.peek() == Some(b'.') {
223 self.advance(1);
224 continue;
225 }
226 return Ok(parts);
227 }
228 }
229
230 fn parse_value(&mut self) -> Result<ConfigValue, HoconError> {
231 self.skip_ws_inline();
232 match self.peek() {
233 Some(b'"') => Ok(ConfigValue::String(self.parse_string()?)),
234 Some(b'[') => self.parse_array(),
235 Some(b'{') => {
236 self.advance(1);
237 let inner = self.parse_object_body(b'}')?;
238 Ok(ConfigValue::Object(inner))
239 }
240 Some(b'$') if self.peek_at(1) == Some(b'{') => {
241 let sub = self.parse_substitution()?;
242 Ok(sub)
243 }
244 Some(_) => self.parse_unquoted_scalar(),
245 None => Err(HoconError::Unterminated { kind: "value", line: self.line }),
246 }
247 }
248
249 fn parse_string(&mut self) -> Result<String, HoconError> {
250 if self.starts_with(b"\"\"\"") {
252 self.advance(3);
253 let start = self.pos;
254 while self.pos + 2 < self.src.len() && &self.src[self.pos..self.pos + 3] != b"\"\"\"" {
255 if self.src[self.pos] == b'\n' {
256 self.line += 1;
257 self.col = 1;
258 } else {
259 self.col += 1;
260 }
261 self.pos += 1;
262 }
263 if self.pos + 2 >= self.src.len() {
264 return Err(HoconError::Unterminated { kind: "string", line: self.line });
265 }
266 let s = std::str::from_utf8(&self.src[start..self.pos])
267 .map_err(|_| HoconError::Unterminated { kind: "string", line: self.line })?
268 .to_string();
269 self.advance(3);
270 return Ok(s);
271 }
272 if self.peek() != Some(b'"') {
273 return Err(HoconError::Expected {
274 expected: "\"",
275 found: self.peek().map(|c| (c as char).to_string()).unwrap_or_default(),
276 line: self.line,
277 });
278 }
279 self.advance(1);
280 let mut out = String::new();
281 loop {
282 match self.peek() {
283 None | Some(b'\n') => {
284 return Err(HoconError::Unterminated { kind: "string", line: self.line })
285 }
286 Some(b'"') => {
287 self.advance(1);
288 return Ok(out);
289 }
290 Some(b'\\') => {
291 self.advance(1);
292 match self.peek() {
293 Some(b'n') => {
294 out.push('\n');
295 self.advance(1);
296 }
297 Some(b't') => {
298 out.push('\t');
299 self.advance(1);
300 }
301 Some(b'r') => {
302 out.push('\r');
303 self.advance(1);
304 }
305 Some(b'"') => {
306 out.push('"');
307 self.advance(1);
308 }
309 Some(b'\\') => {
310 out.push('\\');
311 self.advance(1);
312 }
313 Some(b'/') => {
314 out.push('/');
315 self.advance(1);
316 }
317 Some(c) => {
318 out.push(c as char);
319 self.advance(1);
320 }
321 None => {
322 return Err(HoconError::Unterminated { kind: "string-escape", line: self.line })
323 }
324 }
325 }
326 Some(c) => {
327 out.push(c as char);
328 self.advance(1);
329 }
330 }
331 }
332 }
333
334 fn parse_array(&mut self) -> Result<ConfigValue, HoconError> {
335 debug_assert_eq!(self.peek(), Some(b'['));
336 self.advance(1);
337 let mut items = Vec::new();
338 loop {
339 self.skip_ws_and_comments();
340 match self.peek() {
341 Some(b']') => {
342 self.advance(1);
343 return Ok(ConfigValue::Array(items));
344 }
345 Some(b',') | Some(b'\n') => {
346 self.advance(1);
347 }
348 Some(_) => {
349 let v = self.parse_value()?;
350 items.push(v);
351 }
352 None => return Err(HoconError::Unterminated { kind: "array", line: self.line }),
353 }
354 }
355 }
356
357 fn parse_substitution(&mut self) -> Result<ConfigValue, HoconError> {
358 debug_assert_eq!(self.peek(), Some(b'$'));
360 self.advance(1);
361 debug_assert_eq!(self.peek(), Some(b'{'));
362 self.advance(1);
363 let optional = self.peek() == Some(b'?');
364 if optional {
365 self.advance(1);
366 }
367 let start = self.pos;
368 while let Some(c) = self.peek() {
369 if c == b'}' {
370 break;
371 }
372 self.advance(1);
373 }
374 if self.peek() != Some(b'}') {
375 return Err(HoconError::Unterminated { kind: "substitution", line: self.line });
376 }
377 let key = std::str::from_utf8(&self.src[start..self.pos])
378 .map_err(|_| HoconError::Unterminated { kind: "substitution", line: self.line })?
379 .trim()
380 .to_string();
381 self.advance(1);
382 let marker = if optional { format!("__atomr_sub_opt::{key}") } else { format!("__atomr_sub::{key}") };
384 Ok(ConfigValue::String(marker))
385 }
386
387 fn parse_unquoted_scalar(&mut self) -> Result<ConfigValue, HoconError> {
388 let start = self.pos;
389 while let Some(c) = self.peek() {
390 if matches!(c, b',' | b'\n' | b'}' | b']' | b';' | b'#') {
391 break;
392 }
393 if c == b'/' && self.peek_at(1) == Some(b'/') {
394 break;
395 }
396 self.advance(1);
397 }
398 let raw = std::str::from_utf8(&self.src[start..self.pos])
399 .map_err(|_| HoconError::Unexpected { ch: '\0', line: self.line, col: self.col })?
400 .trim();
401 if raw.is_empty() {
402 return Err(HoconError::Expected { expected: "value", found: String::new(), line: self.line });
403 }
404 Ok(scalar_from_str(raw))
405 }
406
407 fn do_include(&self, rel: &str) -> Result<ConfigValue, HoconError> {
408 let p = self.base_dir.join(rel);
409 parse_file(&p)
410 }
411
412 fn peek(&self) -> Option<u8> {
415 self.src.get(self.pos).copied()
416 }
417 fn peek_at(&self, off: usize) -> Option<u8> {
418 self.src.get(self.pos + off).copied()
419 }
420 fn starts_with(&self, needle: &[u8]) -> bool {
421 self.src.len() >= self.pos + needle.len() && &self.src[self.pos..self.pos + needle.len()] == needle
422 }
423 fn starts_with_keyword(&self, kw: &str) -> bool {
424 if !self.starts_with(kw.as_bytes()) {
425 return false;
426 }
427 match self.src.get(self.pos + kw.len()) {
428 None => true,
429 Some(c) => !c.is_ascii_alphanumeric() && *c != b'_',
430 }
431 }
432 fn advance(&mut self, n: usize) {
433 for _ in 0..n {
434 if self.pos >= self.src.len() {
435 break;
436 }
437 if self.src[self.pos] == b'\n' {
438 self.line += 1;
439 self.col = 1;
440 } else {
441 self.col += 1;
442 }
443 self.pos += 1;
444 }
445 }
446 fn skip_ws_inline(&mut self) {
447 while let Some(c) = self.peek() {
448 if c == b' ' || c == b'\t' {
449 self.advance(1);
450 } else {
451 break;
452 }
453 }
454 }
455 fn skip_ws_and_comments(&mut self) {
456 loop {
457 match self.peek() {
458 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => {
459 self.advance(1);
460 }
461 Some(b'#') => {
462 while let Some(c) = self.peek() {
463 if c == b'\n' {
464 break;
465 }
466 self.advance(1);
467 }
468 }
469 Some(b'/') if self.peek_at(1) == Some(b'/') => {
470 while let Some(c) = self.peek() {
471 if c == b'\n' {
472 break;
473 }
474 self.advance(1);
475 }
476 }
477 Some(b'/') if self.peek_at(1) == Some(b'*') => {
478 self.advance(2);
479 while !(self.peek() == Some(b'*') && self.peek_at(1) == Some(b'/')) {
480 if self.peek().is_none() {
481 return;
482 }
483 self.advance(1);
484 }
485 self.advance(2);
486 }
487 _ => return,
488 }
489 }
490 }
491}
492
493fn scalar_from_str(s: &str) -> ConfigValue {
494 if s == "null" {
495 return ConfigValue::Null;
496 }
497 if s == "true" {
498 return ConfigValue::Bool(true);
499 }
500 if s == "false" {
501 return ConfigValue::Bool(false);
502 }
503 if let Ok(i) = s.parse::<i64>() {
504 return ConfigValue::Int(i);
505 }
506 if let Ok(f) = s.parse::<f64>() {
507 return ConfigValue::Float(f);
508 }
509 ConfigValue::String(s.to_string())
510}
511
512fn append_dotted(obj: &mut BTreeMap<String, ConfigValue>, key: &[String], value: ConfigValue) {
517 if key.is_empty() {
518 return;
519 }
520 if key.len() == 1 {
521 let leaf = obj.entry(key[0].clone()).or_insert_with(|| ConfigValue::Array(Vec::new()));
522 match leaf {
523 ConfigValue::Array(items) => items.push(value),
524 other => {
525 *other = ConfigValue::Array(vec![value]);
526 }
527 }
528 return;
529 }
530 let entry = obj.entry(key[0].clone()).or_insert_with(|| ConfigValue::Object(BTreeMap::new()));
531 if let ConfigValue::Object(child) = entry {
532 append_dotted(child, &key[1..], value);
533 } else {
534 let mut new_child: BTreeMap<String, ConfigValue> = BTreeMap::new();
535 append_dotted(&mut new_child, &key[1..], value);
536 *entry = ConfigValue::Object(new_child);
537 }
538}
539
540fn insert_dotted(obj: &mut BTreeMap<String, ConfigValue>, key: &[String], value: ConfigValue) {
541 if key.is_empty() {
542 return;
543 }
544 if key.len() == 1 {
545 if let Some(ConfigValue::Object(existing)) = obj.get_mut(&key[0]) {
546 if let ConfigValue::Object(new_obj) = value {
547 deep_merge(existing, new_obj);
548 return;
549 }
550 }
551 obj.insert(key[0].clone(), value);
552 return;
553 }
554 let entry = obj.entry(key[0].clone()).or_insert_with(|| ConfigValue::Object(BTreeMap::new()));
555 if let ConfigValue::Object(child) = entry {
556 insert_dotted(child, &key[1..], value);
557 } else {
558 let mut new_child: BTreeMap<String, ConfigValue> = BTreeMap::new();
559 insert_dotted(&mut new_child, &key[1..], value);
560 *entry = ConfigValue::Object(new_child);
561 }
562}
563
564fn deep_merge(into: &mut BTreeMap<String, ConfigValue>, from: BTreeMap<String, ConfigValue>) {
565 for (k, v) in from {
566 match (into.get_mut(&k), v) {
567 (Some(ConfigValue::Object(a)), ConfigValue::Object(b)) => {
568 deep_merge(a, b);
569 }
570 (_, v) => {
571 into.insert(k, v);
572 }
573 }
574 }
575}
576
577fn merge_in_place(_into: &mut BTreeMap<String, ConfigValue>, _from: BTreeMap<String, ConfigValue>) {}
578
579fn resolve_substitutions(v: ConfigValue) -> Result<ConfigValue, HoconError> {
582 let snapshot = v.clone();
583 resolve_in(v, &snapshot)
584}
585
586fn resolve_in(v: ConfigValue, root: &ConfigValue) -> Result<ConfigValue, HoconError> {
587 match v {
588 ConfigValue::String(s) => {
589 if let Some(rest) = s.strip_prefix("__atomr_sub::") {
590 let lookup = lookup_path(root, rest);
591 lookup.ok_or_else(|| HoconError::MissingSubstitution { key: rest.to_string() })
592 } else if let Some(rest) = s.strip_prefix("__atomr_sub_opt::") {
593 Ok(env::var(rest).map(ConfigValue::String).unwrap_or(ConfigValue::Null))
594 } else {
595 Ok(ConfigValue::String(s))
596 }
597 }
598 ConfigValue::Array(items) => {
599 let mut out = Vec::with_capacity(items.len());
600 for it in items {
601 out.push(resolve_in(it, root)?);
602 }
603 Ok(ConfigValue::Array(out))
604 }
605 ConfigValue::Object(o) => {
606 let mut out = BTreeMap::new();
607 for (k, v) in o {
608 out.insert(k, resolve_in(v, root)?);
609 }
610 Ok(ConfigValue::Object(out))
611 }
612 other => Ok(other),
613 }
614}
615
616fn lookup_path(root: &ConfigValue, path: &str) -> Option<ConfigValue> {
617 let mut cur = root;
618 for seg in path.split('.') {
619 cur = match cur {
620 ConfigValue::Object(o) => o.get(seg)?,
621 _ => return None,
622 };
623 }
624 Some(cur.clone())
625}
626
627#[cfg(test)]
628mod tests {
629 use super::*;
630 use std::path::Path;
631
632 fn parse_str(s: &str) -> ConfigValue {
633 parse(s, Path::new(".")).unwrap()
634 }
635
636 #[test]
637 fn flat_assignments() {
638 let v = parse_str("a = 1\nb = \"hi\"\nc = true");
639 if let ConfigValue::Object(o) = v {
640 assert_eq!(o.get("a"), Some(&ConfigValue::Int(1)));
641 assert_eq!(o.get("b"), Some(&ConfigValue::String("hi".into())));
642 assert_eq!(o.get("c"), Some(&ConfigValue::Bool(true)));
643 } else {
644 panic!("expected object");
645 }
646 }
647
648 #[test]
649 fn dotted_keys_create_nested_objects() {
650 let v = parse_str("akka.actor.provider = \"local\"");
651 if let ConfigValue::Object(o) = v {
652 let actor = o.get("akka").unwrap();
653 if let ConfigValue::Object(a) = actor {
654 let inner = a.get("actor").unwrap();
655 if let ConfigValue::Object(b) = inner {
656 assert_eq!(b.get("provider"), Some(&ConfigValue::String("local".into())));
657 return;
658 }
659 }
660 }
661 panic!("nested structure missing");
662 }
663
664 #[test]
665 fn nested_object_syntax() {
666 let v = parse_str("akka { actor { provider = \"local\" } }");
667 if let ConfigValue::Object(o) = v {
668 let s = lookup_path(&ConfigValue::Object(o.clone()), "akka.actor.provider");
669 assert_eq!(s, Some(ConfigValue::String("local".into())));
670 }
671 }
672
673 #[test]
674 fn comments_ignored() {
675 let v = parse_str("# comment\na = 1 // inline\n/* block */\nb = 2");
676 if let ConfigValue::Object(o) = v {
677 assert_eq!(o.get("a"), Some(&ConfigValue::Int(1)));
678 assert_eq!(o.get("b"), Some(&ConfigValue::Int(2)));
679 }
680 }
681
682 #[test]
683 fn substitution_resolves() {
684 let src = "host = \"example.com\"\nurl = ${host}";
685 let v = parse_str(src);
686 let s = lookup_path(&v, "url");
687 assert_eq!(s, Some(ConfigValue::String("example.com".into())));
688 }
689
690 #[test]
691 fn missing_substitution_errors() {
692 let r = parse("missing = ${nope}", Path::new("."));
693 assert!(matches!(r, Err(HoconError::MissingSubstitution { .. })));
694 }
695
696 #[test]
697 fn optional_env_substitution_returns_null_when_unset() {
698 std::env::remove_var("ATOMR_TEST_HOCON_UNSET_X9Z");
700 let v = parse_str("x = ${?ATOMR_TEST_HOCON_UNSET_X9Z}");
701 assert_eq!(lookup_path(&v, "x"), Some(ConfigValue::Null));
702 }
703
704 #[test]
705 fn optional_env_substitution_returns_value_when_set() {
706 std::env::set_var("ATOMR_TEST_HOCON_SET_K1", "from-env");
707 let v = parse_str("x = ${?ATOMR_TEST_HOCON_SET_K1}");
708 assert_eq!(lookup_path(&v, "x"), Some(ConfigValue::String("from-env".into())));
709 std::env::remove_var("ATOMR_TEST_HOCON_SET_K1");
710 }
711
712 #[test]
713 fn arrays_parse() {
714 let v = parse_str("xs = [1, 2, 3]");
715 let xs = lookup_path(&v, "xs").unwrap();
716 if let ConfigValue::Array(items) = xs {
717 assert_eq!(items.len(), 3);
718 assert_eq!(items[0], ConfigValue::Int(1));
719 } else {
720 panic!("expected array");
721 }
722 }
723
724 #[test]
725 fn dotted_assignment_does_not_clobber_sibling() {
726 let v = parse_str("akka.actor.provider = \"local\"\nakka.actor.dispatcher = \"default\"");
727 assert_eq!(lookup_path(&v, "akka.actor.provider"), Some(ConfigValue::String("local".into())));
728 assert_eq!(lookup_path(&v, "akka.actor.dispatcher"), Some(ConfigValue::String("default".into())));
729 }
730
731 #[test]
732 fn triple_quoted_string() {
733 let v = parse_str("x = \"\"\"line1\nline2\"\"\"");
734 assert_eq!(lookup_path(&v, "x"), Some(ConfigValue::String("line1\nline2".into())));
735 }
736
737 #[test]
738 fn append_creates_array_when_absent() {
739 let v = parse_str("xs += 1\nxs += 2");
740 if let Some(ConfigValue::Array(items)) = lookup_path(&v, "xs") {
741 assert_eq!(items, vec![ConfigValue::Int(1), ConfigValue::Int(2)]);
742 } else {
743 panic!("expected array");
744 }
745 }
746
747 #[test]
748 fn append_extends_existing_array() {
749 let v = parse_str("xs = [1, 2]\nxs += 3");
750 if let Some(ConfigValue::Array(items)) = lookup_path(&v, "xs") {
751 assert_eq!(items.len(), 3);
752 assert_eq!(items[2], ConfigValue::Int(3));
753 } else {
754 panic!("expected array");
755 }
756 }
757
758 #[test]
759 fn append_with_dotted_key() {
760 let v = parse_str("akka.actor.deployers += \"local\"\nakka.actor.deployers += \"remote\"");
761 if let Some(ConfigValue::Array(items)) = lookup_path(&v, "akka.actor.deployers") {
762 assert_eq!(items.len(), 2);
763 } else {
764 panic!("expected nested array");
765 }
766 }
767
768 #[test]
769 fn substitution_inside_array_resolves() {
770 let v = parse_str("base = \"x\"\nxs = [${base}, ${base}]");
771 if let Some(ConfigValue::Array(items)) = lookup_path(&v, "xs") {
772 assert_eq!(items, vec![ConfigValue::String("x".into()), ConfigValue::String("x".into())]);
773 } else {
774 panic!("expected array");
775 }
776 }
777}