Skip to main content

qubit_config/source/
properties_config_source.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2025 - 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! # Properties File Configuration Source
11//!
12//! Loads configuration from Java `.properties` format files.
13//!
14//! # Format
15//!
16//! The `.properties` format supports:
17//! - `key=value` assignments
18//! - `key: value` assignments (colon separator)
19//! - `key value` assignments (whitespace separator)
20//! - `# comment` and `! comment` lines
21//! - Blank lines (ignored)
22//! - Line continuation with an odd number of `\` characters at end of line
23//! - Java properties escape sequences (`\uXXXX`, `\=`, `\:`, `\ `, etc.)
24//!
25
26use std::iter::Peekable;
27use std::path::{Path, PathBuf};
28use std::str::Chars;
29
30use crate::{Config, ConfigError, ConfigResult};
31
32use super::ConfigSource;
33
34/// Configuration source that loads from Java `.properties` format files
35///
36/// # Examples
37///
38/// ```rust
39/// use qubit_config::source::{PropertiesConfigSource, ConfigSource};
40/// use qubit_config::Config;
41///
42/// let temp_dir = tempfile::tempdir().unwrap();
43/// let path = temp_dir.path().join("config.properties");
44/// std::fs::write(&path, "server.port=8080\n").unwrap();
45/// let source = PropertiesConfigSource::from_file(path);
46/// let mut config = Config::new();
47/// source.load(&mut config).unwrap();
48/// let value = config.get::<String>("server.port").unwrap();
49/// assert_eq!(value, "8080");
50/// ```
51///
52#[derive(Debug, Clone)]
53pub struct PropertiesConfigSource {
54    path: PathBuf,
55}
56
57impl PropertiesConfigSource {
58    /// Creates a new `PropertiesConfigSource` from a file path
59    ///
60    /// # Parameters
61    ///
62    /// * `path` - Path to the `.properties` file
63    #[inline]
64    pub fn from_file<P: AsRef<Path>>(path: P) -> Self {
65        Self {
66            path: path.as_ref().to_path_buf(),
67        }
68    }
69
70    /// Parses a `.properties` format string into key-value pairs
71    ///
72    /// # Parameters
73    ///
74    /// * `content` - The content of the `.properties` file
75    ///
76    /// # Returns
77    ///
78    /// Returns a vector of `(key, value)` pairs
79    pub fn parse_content(content: &str) -> Vec<(String, String)> {
80        let mut result = Vec::new();
81        let mut lines = content.lines().peekable();
82
83        while let Some(line) = lines.next() {
84            let trimmed = line.trim_start();
85
86            // Skip blank lines and comments
87            if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('!') {
88                continue;
89            }
90
91            // Handle line continuation
92            let mut full_line = trimmed.to_string();
93            while has_line_continuation(&full_line) {
94                full_line.pop(); // remove trailing backslash
95                if let Some(next) = lines.next() {
96                    full_line.push_str(next.trim_start());
97                } else {
98                    break;
99                }
100            }
101
102            // Parse key/value pairs using Java properties separators.
103            if let Some((key, value)) = parse_key_value(&full_line) {
104                let key = unescape_properties(key);
105                let value = unescape_properties(value);
106                result.push((key, value));
107            }
108        }
109
110        result
111    }
112}
113
114/// Parses a single `key=value`, `key: value`, or `key value` line.
115fn parse_key_value(line: &str) -> Option<(&str, &str)> {
116    let line = line.trim_start();
117
118    for (i, ch) in line.char_indices() {
119        if ch == '=' || ch == ':' {
120            // Separator is escaped only if there is an odd number of trailing backslashes.
121            if !is_escaped_separator(line, i) {
122                let value_start = skip_properties_whitespace(line, i + ch.len_utf8());
123                return Some((&line[..i], &line[value_start..]));
124            }
125        }
126        if ch.is_whitespace() && !is_escaped_separator(line, i) {
127            let mut value_start = skip_properties_whitespace(line, i);
128            if let Some((sep, sep_len)) = char_at(line, value_start)
129                && (sep == '=' || sep == ':')
130                && !is_escaped_separator(line, value_start)
131            {
132                value_start = skip_properties_whitespace(line, value_start + sep_len);
133            }
134            return Some((&line[..i], &line[value_start..]));
135        }
136    }
137    // No separator found - treat the whole line as a key with empty value.
138    (!line.is_empty()).then_some((line, ""))
139}
140
141/// Returns the character and byte width at `index`.
142///
143/// # Parameters
144///
145/// * `line` - Source properties line.
146/// * `index` - Byte index to inspect.
147///
148/// # Returns
149///
150/// `Some((ch, len))` if `index` points to a character boundary inside `line`,
151/// otherwise `None`.
152#[inline]
153fn char_at(line: &str, index: usize) -> Option<(char, usize)> {
154    if index == line.len() {
155        return None;
156    }
157    let ch = line[index..]
158        .chars()
159        .next()
160        .expect("index below line length should point to a character");
161    Some((ch, ch.len_utf8()))
162}
163
164/// Skips Java properties whitespace from a byte index.
165///
166/// # Parameters
167///
168/// * `line` - Source properties line.
169/// * `start` - Byte index to start scanning from.
170///
171/// # Returns
172///
173/// The first byte index at or after `start` that is not whitespace, or the end
174/// of `line`.
175fn skip_properties_whitespace(line: &str, start: usize) -> usize {
176    for (offset, ch) in line[start..].char_indices() {
177        if !ch.is_whitespace() {
178            return start + offset;
179        }
180    }
181    line.len()
182}
183
184/// Returns true if the separator at `sep_pos` is escaped by a preceding odd
185/// number of backslashes.
186///
187/// # Parameters
188///
189/// * `line` - Full properties line being parsed.
190/// * `sep_pos` - Byte index of `=` or `:` in `line`.
191///
192/// # Returns
193///
194/// `true` when the separator is escaped and must not split the key/value.
195#[inline]
196fn is_escaped_separator(line: &str, sep_pos: usize) -> bool {
197    let slash_count = line.as_bytes()[..sep_pos]
198        .iter()
199        .rev()
200        .take_while(|&&b| b == b'\\')
201        .count();
202    slash_count % 2 == 1
203}
204
205/// Returns true if a physical line continues on the next line.
206///
207/// Java-style properties only treat an odd number of trailing backslashes as a
208/// continuation marker; an even number represents escaped literal backslashes.
209///
210/// # Parameters
211///
212/// * `line` - Physical properties line after outer whitespace trimming.
213///
214/// # Returns
215///
216/// `true` when the line should be joined with the next physical line.
217#[inline]
218fn has_line_continuation(line: &str) -> bool {
219    count_trailing_backslashes(line) % 2 == 1
220}
221
222/// Counts consecutive trailing backslashes in a string.
223///
224/// # Parameters
225///
226/// * `line` - Source line or key/value segment.
227///
228/// # Returns
229///
230/// Number of trailing `\` bytes.
231#[inline]
232fn count_trailing_backslashes(line: &str) -> usize {
233    line.as_bytes()
234        .iter()
235        .rev()
236        .take_while(|&&b| b == b'\\')
237        .count()
238}
239
240/// Processes Java properties escape sequences in a string.
241fn unescape_properties(s: &str) -> String {
242    let mut result = String::with_capacity(s.len());
243    let mut chars = s.chars().peekable();
244
245    while let Some(ch) = chars.next() {
246        if ch == '\\' {
247            let escaped = chars.next().unwrap_or('\\');
248            match escaped {
249                'u' => {
250                    let hex: String = chars.by_ref().take(4).collect();
251                    if hex.len() == 4
252                        && let Ok(code) = u32::from_str_radix(&hex, 16)
253                        && let Some(unicode_char) = decode_unicode_escape(code, &mut chars)
254                    {
255                        result.push(unicode_char);
256                        continue;
257                    }
258                    // If parsing fails, keep original
259                    result.push('\\');
260                    result.push('u');
261                    result.push_str(&hex);
262                }
263                'n' => {
264                    result.push('\n');
265                }
266                't' => {
267                    result.push('\t');
268                }
269                'r' => {
270                    result.push('\r');
271                }
272                'f' => {
273                    result.push('\u{000C}');
274                }
275                '\\' => {
276                    result.push('\\');
277                }
278                '=' | ':' | ' ' | '#' | '!' => {
279                    result.push(escaped);
280                }
281                _ => {
282                    result.push(escaped);
283                }
284            }
285        } else {
286            result.push(ch);
287        }
288    }
289
290    result
291}
292
293/// Decodes a Java properties `\uXXXX` escape, including UTF-16 surrogate pairs.
294fn decode_unicode_escape(code: u32, chars: &mut Peekable<Chars<'_>>) -> Option<char> {
295    if is_high_surrogate(code) {
296        let mut lookahead = chars.clone();
297        if lookahead.next() == Some('\\') && lookahead.next() == Some('u') {
298            let low_hex: String = lookahead.by_ref().take(4).collect();
299            if low_hex.len() == 4
300                && let Ok(low) = u32::from_str_radix(&low_hex, 16)
301                && is_low_surrogate(low)
302            {
303                *chars = lookahead;
304                return decode_surrogate_pair(code, low);
305            }
306        }
307        None
308    } else if is_low_surrogate(code) {
309        None
310    } else {
311        char::from_u32(code)
312    }
313}
314
315/// Returns whether `code` is a UTF-16 high surrogate.
316#[inline]
317fn is_high_surrogate(code: u32) -> bool {
318    (0xD800..=0xDBFF).contains(&code)
319}
320
321/// Returns whether `code` is a UTF-16 low surrogate.
322#[inline]
323fn is_low_surrogate(code: u32) -> bool {
324    (0xDC00..=0xDFFF).contains(&code)
325}
326
327/// Decodes a UTF-16 surrogate pair to a Unicode scalar value.
328fn decode_surrogate_pair(high: u32, low: u32) -> Option<char> {
329    let scalar = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
330    char::from_u32(scalar)
331}
332
333impl ConfigSource for PropertiesConfigSource {
334    fn load(&self, config: &mut Config) -> ConfigResult<()> {
335        let content = std::fs::read_to_string(&self.path).map_err(|e| {
336            ConfigError::IoError(std::io::Error::new(
337                e.kind(),
338                format!(
339                    "Failed to read properties file '{}': {}",
340                    self.path.display(),
341                    e
342                ),
343            ))
344        })?;
345
346        let mut staged = config.clone();
347        for (key, value) in Self::parse_content(&content) {
348            staged.set(&key, value)?;
349        }
350
351        *config = staged;
352        Ok(())
353    }
354}