qubit_config/source/properties_config_source.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2025 - 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! # Properties File Configuration Source
11//!
12//! Loads configuration from Java `.properties` format files.
13//!
14//! # Format
15//!
16//! The `.properties` format supports:
17//! - `key=value` assignments
18//! - `key: value` assignments (colon separator)
19//! - `key value` assignments (whitespace separator)
20//! - `# comment` and `! comment` lines
21//! - Blank lines (ignored)
22//! - Line continuation with an odd number of `\` characters at end of line
23//! - Java properties escape sequences (`\uXXXX`, `\=`, `\:`, `\ `, etc.)
24//!
25
26use std::iter::Peekable;
27use std::path::{Path, PathBuf};
28use std::str::Chars;
29
30use crate::{Config, ConfigError, ConfigResult};
31
32use super::ConfigSource;
33
34/// Configuration source that loads from Java `.properties` format files
35///
36/// # Examples
37///
38/// ```rust
39/// use qubit_config::source::{PropertiesConfigSource, ConfigSource};
40/// use qubit_config::Config;
41///
42/// let temp_dir = tempfile::tempdir().unwrap();
43/// let path = temp_dir.path().join("config.properties");
44/// std::fs::write(&path, "server.port=8080\n").unwrap();
45/// let source = PropertiesConfigSource::from_file(path);
46/// let mut config = Config::new();
47/// source.load(&mut config).unwrap();
48/// let value = config.get::<String>("server.port").unwrap();
49/// assert_eq!(value, "8080");
50/// ```
51///
52#[derive(Debug, Clone)]
53pub struct PropertiesConfigSource {
54 path: PathBuf,
55}
56
57impl PropertiesConfigSource {
58 /// Creates a new `PropertiesConfigSource` from a file path
59 ///
60 /// # Parameters
61 ///
62 /// * `path` - Path to the `.properties` file
63 #[inline]
64 pub fn from_file<P: AsRef<Path>>(path: P) -> Self {
65 Self {
66 path: path.as_ref().to_path_buf(),
67 }
68 }
69
70 /// Parses a `.properties` format string into key-value pairs
71 ///
72 /// # Parameters
73 ///
74 /// * `content` - The content of the `.properties` file
75 ///
76 /// # Returns
77 ///
78 /// Returns a vector of `(key, value)` pairs
79 pub fn parse_content(content: &str) -> Vec<(String, String)> {
80 let mut result = Vec::new();
81 let mut lines = content.lines().peekable();
82
83 while let Some(line) = lines.next() {
84 let trimmed = line.trim_start();
85
86 // Skip blank lines and comments
87 if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
88 continue;
89 }
90
91 // Handle line continuation
92 let mut full_line = trimmed.to_string();
93 while has_line_continuation(&full_line) {
94 full_line.pop(); // remove trailing backslash
95 if let Some(next) = lines.next() {
96 full_line.push_str(next.trim_start());
97 } else {
98 break;
99 }
100 }
101
102 // Parse key/value pairs using Java properties separators.
103 if let Some((key, value)) = parse_key_value(&full_line) {
104 let key = unescape_properties(key);
105 let value = unescape_properties(value);
106 result.push((key, value));
107 }
108 }
109
110 result
111 }
112}
113
114/// Parses a single `key=value`, `key: value`, or `key value` line.
115fn parse_key_value(line: &str) -> Option<(&str, &str)> {
116 let line = line.trim_start();
117
118 for (i, ch) in line.char_indices() {
119 if ch == '=' || ch == ':' {
120 // Separator is escaped only if there is an odd number of trailing backslashes.
121 if !is_escaped_separator(line, i) {
122 let value_start = skip_properties_whitespace(line, i + ch.len_utf8());
123 return Some((&line[..i], &line[value_start..]));
124 }
125 }
126 if ch.is_whitespace() && !is_escaped_separator(line, i) {
127 let mut value_start = skip_properties_whitespace(line, i);
128 if let Some((sep, sep_len)) = char_at(line, value_start)
129 && (sep == '=' || sep == ':')
130 && !is_escaped_separator(line, value_start)
131 {
132 value_start = skip_properties_whitespace(line, value_start + sep_len);
133 }
134 return Some((&line[..i], &line[value_start..]));
135 }
136 }
137 // No separator found - treat the whole line as a key with empty value.
138 (!line.is_empty()).then_some((line, ""))
139}
140
141/// Returns the character and byte width at `index`.
142///
143/// # Parameters
144///
145/// * `line` - Source properties line.
146/// * `index` - Byte index to inspect.
147///
148/// # Returns
149///
150/// `Some((ch, len))` if `index` points to a character boundary inside `line`,
151/// otherwise `None`.
152#[inline]
153fn char_at(line: &str, index: usize) -> Option<(char, usize)> {
154 if index == line.len() {
155 return None;
156 }
157 let ch = line[index..]
158 .chars()
159 .next()
160 .expect("index below line length should point to a character");
161 Some((ch, ch.len_utf8()))
162}
163
164/// Skips Java properties whitespace from a byte index.
165///
166/// # Parameters
167///
168/// * `line` - Source properties line.
169/// * `start` - Byte index to start scanning from.
170///
171/// # Returns
172///
173/// The first byte index at or after `start` that is not whitespace, or the end
174/// of `line`.
175fn skip_properties_whitespace(line: &str, start: usize) -> usize {
176 for (offset, ch) in line[start..].char_indices() {
177 if !ch.is_whitespace() {
178 return start + offset;
179 }
180 }
181 line.len()
182}
183
184/// Returns true if the separator at `sep_pos` is escaped by a preceding odd
185/// number of backslashes.
186///
187/// # Parameters
188///
189/// * `line` - Full properties line being parsed.
190/// * `sep_pos` - Byte index of `=` or `:` in `line`.
191///
192/// # Returns
193///
194/// `true` when the separator is escaped and must not split the key/value.
195#[inline]
196fn is_escaped_separator(line: &str, sep_pos: usize) -> bool {
197 let slash_count = line.as_bytes()[..sep_pos]
198 .iter()
199 .rev()
200 .take_while(|&&b| b == b'\\')
201 .count();
202 slash_count % 2 == 1
203}
204
205/// Returns true if a physical line continues on the next line.
206///
207/// Java-style properties only treat an odd number of trailing backslashes as a
208/// continuation marker; an even number represents escaped literal backslashes.
209///
210/// # Parameters
211///
212/// * `line` - Physical properties line after outer whitespace trimming.
213///
214/// # Returns
215///
216/// `true` when the line should be joined with the next physical line.
217#[inline]
218fn has_line_continuation(line: &str) -> bool {
219 count_trailing_backslashes(line) % 2 == 1
220}
221
222/// Counts consecutive trailing backslashes in a string.
223///
224/// # Parameters
225///
226/// * `line` - Source line or key/value segment.
227///
228/// # Returns
229///
230/// Number of trailing `\` bytes.
231#[inline]
232fn count_trailing_backslashes(line: &str) -> usize {
233 line.as_bytes()
234 .iter()
235 .rev()
236 .take_while(|&&b| b == b'\\')
237 .count()
238}
239
240/// Processes Java properties escape sequences in a string.
241fn unescape_properties(s: &str) -> String {
242 let mut result = String::with_capacity(s.len());
243 let mut chars = s.chars().peekable();
244
245 while let Some(ch) = chars.next() {
246 if ch == '\\' {
247 let escaped = chars.next().unwrap_or('\\');
248 match escaped {
249 'u' => {
250 let hex: String = chars.by_ref().take(4).collect();
251 if hex.len() == 4
252 && let Ok(code) = u32::from_str_radix(&hex, 16)
253 && let Some(unicode_char) = decode_unicode_escape(code, &mut chars)
254 {
255 result.push(unicode_char);
256 continue;
257 }
258 // If parsing fails, keep original
259 result.push('\\');
260 result.push('u');
261 result.push_str(&hex);
262 }
263 'n' => {
264 result.push('\n');
265 }
266 't' => {
267 result.push('\t');
268 }
269 'r' => {
270 result.push('\r');
271 }
272 'f' => {
273 result.push('\u{000C}');
274 }
275 '\\' => {
276 result.push('\\');
277 }
278 '=' | ':' | ' ' | '#' | '!' => {
279 result.push(escaped);
280 }
281 _ => {
282 result.push(escaped);
283 }
284 }
285 } else {
286 result.push(ch);
287 }
288 }
289
290 result
291}
292
293/// Decodes a Java properties `\uXXXX` escape, including UTF-16 surrogate pairs.
294fn decode_unicode_escape(code: u32, chars: &mut Peekable<Chars<'_>>) -> Option<char> {
295 if is_high_surrogate(code) {
296 let mut lookahead = chars.clone();
297 if lookahead.next() == Some('\\') && lookahead.next() == Some('u') {
298 let low_hex: String = lookahead.by_ref().take(4).collect();
299 if low_hex.len() == 4
300 && let Ok(low) = u32::from_str_radix(&low_hex, 16)
301 && is_low_surrogate(low)
302 {
303 *chars = lookahead;
304 return decode_surrogate_pair(code, low);
305 }
306 }
307 None
308 } else if is_low_surrogate(code) {
309 None
310 } else {
311 char::from_u32(code)
312 }
313}
314
315/// Returns whether `code` is a UTF-16 high surrogate.
316#[inline]
317fn is_high_surrogate(code: u32) -> bool {
318 (0xD800..=0xDBFF).contains(&code)
319}
320
321/// Returns whether `code` is a UTF-16 low surrogate.
322#[inline]
323fn is_low_surrogate(code: u32) -> bool {
324 (0xDC00..=0xDFFF).contains(&code)
325}
326
327/// Decodes a UTF-16 surrogate pair to a Unicode scalar value.
328fn decode_surrogate_pair(high: u32, low: u32) -> Option<char> {
329 let scalar = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
330 char::from_u32(scalar)
331}
332
333impl ConfigSource for PropertiesConfigSource {
334 fn load(&self, config: &mut Config) -> ConfigResult<()> {
335 let content = std::fs::read_to_string(&self.path).map_err(|e| {
336 ConfigError::IoError(std::io::Error::new(
337 e.kind(),
338 format!(
339 "Failed to read properties file '{}': {}",
340 self.path.display(),
341 e
342 ),
343 ))
344 })?;
345
346 let mut staged = config.clone();
347 for (key, value) in Self::parse_content(&content) {
348 staged.set(&key, value)?;
349 }
350
351 *config = staged;
352 Ok(())
353 }
354}