use std::io::Cursor;
use plist::{Value, XmlWriteOptions};
use crate::{DodotError, Result};
pub fn clean(input: &[u8]) -> Result<Vec<u8>> {
let mut value = Value::from_reader(Cursor::new(input)).map_err(|e| filter_err("clean", e))?;
sort_keys_recursive(&mut value);
let mut raw = Vec::with_capacity(input.len());
value
.to_writer_xml_with_options(&mut raw, &XmlWriteOptions::default())
.map_err(|e| filter_err("clean", e))?;
let mut out = Vec::with_capacity(raw.len() + 1);
let mut i = 0;
while i < raw.len() {
let b = raw[i];
if b == b'\r' {
out.push(b'\n');
if raw.get(i + 1) == Some(&b'\n') {
i += 1;
}
} else {
out.push(b);
}
i += 1;
}
if !out.ends_with(b"\n") {
out.push(b'\n');
}
Ok(out)
}
pub fn smudge(input: &[u8]) -> Result<Vec<u8>> {
let value = Value::from_reader_xml(Cursor::new(input)).map_err(|e| filter_err("smudge", e))?;
let mut out = Vec::new();
value
.to_writer_binary(&mut out)
.map_err(|e| filter_err("smudge", e))?;
Ok(out)
}
fn sort_keys_recursive(value: &mut Value) {
match value {
Value::Dictionary(dict) => {
dict.sort_keys();
for (_, v) in dict.iter_mut() {
sort_keys_recursive(v);
}
}
Value::Array(arr) => {
for v in arr.iter_mut() {
sort_keys_recursive(v);
}
}
_ => {}
}
}
fn filter_err(direction: &str, e: plist::Error) -> DodotError {
DodotError::Other(format!(
"plist {direction} failed: {e}\n \
The input does not look like a valid plist. Common causes:\n \
- .gitattributes binds *.plist (or a broader pattern) to the\n \
dodot-plist filter, but the file matched is not actually a plist.\n \
- A corrupt or truncated plist on disk; try `plutil -lint <file>`\n \
to verify, or `plutil -convert xml1 <file>` to inspect.\n \
Run `dodot git-show-filters` to review the current filter binding."
))
}
#[cfg(test)]
mod tests {
use super::*;
const UNSORTED_XML: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>zebra</key>
<string>last</string>
<key>apple</key>
<string>first</string>
</dict>
</plist>"#;
#[test]
fn clean_sorts_top_level_keys() {
let xml = clean(UNSORTED_XML.as_bytes()).expect("clean");
let xml_str = std::str::from_utf8(&xml).expect("utf8");
let apple_pos = xml_str.find("apple").expect("apple key present");
let zebra_pos = xml_str.find("zebra").expect("zebra key present");
assert!(
apple_pos < zebra_pos,
"expected `apple` to appear before `zebra` after canonicalisation, got:\n{xml_str}"
);
}
#[test]
fn clean_sorts_nested_dict_keys() {
let nested = r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>outer</key>
<dict>
<key>z_inner</key>
<string>last</string>
<key>a_inner</key>
<string>first</string>
</dict>
</dict>
</plist>"#;
let xml = clean(nested.as_bytes()).expect("clean");
let xml_str = std::str::from_utf8(&xml).expect("utf8");
let a_pos = xml_str.find("a_inner").expect("a_inner present");
let z_pos = xml_str.find("z_inner").expect("z_inner present");
assert!(
a_pos < z_pos,
"nested dict keys should be sorted, got:\n{xml_str}"
);
}
#[test]
fn clean_preserves_array_order() {
let arr_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<array>
<string>third</string>
<string>first</string>
<string>second</string>
</array>
</plist>"#;
let xml = clean(arr_xml.as_bytes()).expect("clean");
let xml_str = std::str::from_utf8(&xml).expect("utf8");
let third = xml_str.find("third").expect("third");
let first = xml_str.find("first").expect("first");
let second = xml_str.find("second").expect("second");
assert!(
third < first && first < second,
"array order must be preserved, got:\n{xml_str}"
);
}
#[test]
fn smudge_then_clean_roundtrips_to_canonical_xml() {
let canonical = clean(UNSORTED_XML.as_bytes()).expect("first clean");
let binary = smudge(&canonical).expect("smudge");
let back = clean(&binary).expect("second clean");
assert_eq!(
canonical, back,
"binary→clean must reproduce the canonical XML"
);
}
#[test]
fn determinism_property_test() {
let canonical = clean(UNSORTED_XML.as_bytes()).expect("clean 1");
let mut current = canonical.clone();
for i in 0..5 {
let binary = smudge(¤t).unwrap_or_else(|e| panic!("smudge iter {i}: {e}"));
let xml = clean(&binary).unwrap_or_else(|e| panic!("clean iter {i}: {e}"));
assert_eq!(
canonical, xml,
"round-trip {i} diverged from canonical form"
);
current = xml;
}
}
#[test]
fn clean_accepts_binary_input() {
let canonical = clean(UNSORTED_XML.as_bytes()).expect("clean");
let binary = smudge(&canonical).expect("smudge");
let from_binary = clean(&binary).expect("clean from binary");
assert_eq!(canonical, from_binary);
}
#[test]
fn clean_handles_mixed_value_types() {
let mixed = r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>string_key</key>
<string>hello</string>
<key>int_key</key>
<integer>42</integer>
<key>bool_key</key>
<true/>
<key>real_key</key>
<real>3.14</real>
<key>arr_key</key>
<array>
<string>a</string>
<dict>
<key>z</key>
<string>z</string>
<key>a</key>
<string>a</string>
</dict>
</array>
</dict>
</plist>"#;
let xml = clean(mixed.as_bytes()).expect("clean");
let xml_str = std::str::from_utf8(&xml).expect("utf8");
let positions = ["arr_key", "bool_key", "int_key", "real_key", "string_key"]
.iter()
.map(|k| xml_str.find(k).unwrap_or_else(|| panic!("missing {k}")))
.collect::<Vec<_>>();
assert!(
positions.windows(2).all(|w| w[0] < w[1]),
"top-level keys not sorted, got:\n{xml_str}"
);
let a_inner = xml_str.rfind(">a<").expect("inner a");
let z_inner = xml_str.rfind(">z<").expect("inner z");
assert!(
a_inner < z_inner,
"nested dict keys not sorted in array element"
);
}
#[test]
fn clean_emits_trailing_newline() {
let xml = clean(UNSORTED_XML.as_bytes()).expect("clean");
assert_eq!(xml.last().copied(), Some(b'\n'), "must end with LF");
}
#[test]
fn clean_output_contains_no_carriage_returns() {
let crlf_xml = UNSORTED_XML.replace('\n', "\r\n");
let xml = clean(crlf_xml.as_bytes()).expect("clean");
assert!(
!xml.contains(&b'\r'),
"clean output must contain no CR bytes"
);
let lf_xml = clean(UNSORTED_XML.as_bytes()).expect("clean lf");
assert_eq!(xml, lf_xml, "CRLF and LF inputs must produce same output");
}
#[test]
fn smudge_rejects_garbage_input() {
let bad = b"not actually XML at all";
assert!(smudge(bad).is_err(), "smudge should reject non-XML input");
}
#[test]
fn clean_rejects_garbage_input() {
let bad = b"\x00\x01\x02neither binary nor XML";
assert!(clean(bad).is_err(), "clean should reject non-plist garbage");
}
}