use std::collections::HashSet;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use flate2::read::MultiGzDecoder;
use quick_xml::events::Event;
use quick_xml::Reader;
use rustc_hash::FxHashMap;
use super::compact::{arena_append_node, arena_append_relation, arena_append_way};
use super::interner::StringInterner;
use super::xml_parse::{
handle_empty_event_compact, handle_end_event_compact, handle_start_event_compact, ParserState,
};
use super::ParseResult;
pub use super::compact::{
CompactMemberIter, CompactNodeRef, CompactRefIter, CompactRelationRef,
CompactTagIter, CompactWayRef,
};
pub struct CompactDiffOverlay {
node_arena: Vec<u8>,
way_arena: Vec<u8>,
relation_arena: Vec<u8>,
node_index: FxHashMap<i64, u32>,
way_index: FxHashMap<i64, u32>,
relation_index: FxHashMap<i64, u32>,
pub deleted_nodes: HashSet<i64>,
pub deleted_ways: HashSet<i64>,
pub deleted_relations: HashSet<i64>,
interner: StringInterner,
}
impl CompactDiffOverlay {
pub fn new() -> Self {
Self {
node_arena: Vec::new(),
way_arena: Vec::new(),
relation_arena: Vec::new(),
node_index: FxHashMap::default(),
way_index: FxHashMap::default(),
relation_index: FxHashMap::default(),
deleted_nodes: HashSet::new(),
deleted_ways: HashSet::new(),
deleted_relations: HashSet::new(),
interner: StringInterner::new(),
}
}
pub fn is_empty(&self) -> bool {
self.node_index.is_empty()
&& self.way_index.is_empty()
&& self.relation_index.is_empty()
&& self.deleted_nodes.is_empty()
&& self.deleted_ways.is_empty()
&& self.deleted_relations.is_empty()
}
pub fn get_node(&self, id: i64) -> Option<CompactNodeRef<'_>> {
let &offset = self.node_index.get(&id)?;
Some(CompactNodeRef {
data: &self.node_arena[offset as usize..],
interner: &self.interner,
})
}
pub fn get_way(&self, id: i64) -> Option<CompactWayRef<'_>> {
let &offset = self.way_index.get(&id)?;
Some(CompactWayRef {
data: &self.way_arena[offset as usize..],
interner: &self.interner,
})
}
pub fn get_relation(&self, id: i64) -> Option<CompactRelationRef<'_>> {
let &offset = self.relation_index.get(&id)?;
Some(CompactRelationRef {
data: &self.relation_arena[offset as usize..],
interner: &self.interner,
})
}
pub fn has_node(&self, id: i64) -> bool {
self.node_index.contains_key(&id)
}
pub fn has_way(&self, id: i64) -> bool {
self.way_index.contains_key(&id)
}
pub fn has_relation(&self, id: i64) -> bool {
self.relation_index.contains_key(&id)
}
pub fn node_ids(&self) -> impl Iterator<Item = &i64> {
self.node_index.keys()
}
pub fn way_ids(&self) -> impl Iterator<Item = &i64> {
self.way_index.keys()
}
pub fn relation_ids(&self) -> impl Iterator<Item = &i64> {
self.relation_index.keys()
}
pub fn node_count(&self) -> usize {
self.node_index.len()
}
pub fn way_count(&self) -> usize {
self.way_index.len()
}
pub fn relation_count(&self) -> usize {
self.relation_index.len()
}
pub fn heap_size_estimate(&self) -> usize {
let mut total: usize = 0;
total += self.node_arena.capacity();
total += self.way_arena.capacity();
total += self.relation_arena.capacity();
let index_entry_size = std::mem::size_of::<(i64, u32)>() + 1;
total += self.node_index.capacity() * index_entry_size;
total += self.way_index.capacity() * index_entry_size;
total += self.relation_index.capacity() * index_entry_size;
let delete_entry_size = std::mem::size_of::<i64>() + 1;
total += self.deleted_nodes.capacity() * delete_entry_size;
total += self.deleted_ways.capacity() * delete_entry_size;
total += self.deleted_relations.capacity() * delete_entry_size;
total += self.interner.heap_size_estimate();
total
}
#[inline]
pub(super) fn push_node(&mut self, id: i64, lat: i32, lon: i32, tags: &[(u32, &str)]) {
let offset = arena_append_node(&mut self.node_arena, id, lat, lon, tags);
self.node_index.insert(id, offset);
}
#[inline]
pub(super) fn push_way(&mut self, id: i64, refs: &[i64], tags: &[(u32, &str)]) {
let offset = arena_append_way(&mut self.way_arena, id, refs, tags);
self.way_index.insert(id, offset);
}
#[inline]
pub(super) fn push_relation(
&mut self,
id: i64,
members: &[(i64, u8, u32)],
tags: &[(u32, &str)],
) {
let offset = arena_append_relation(&mut self.relation_arena, id, members, tags);
self.relation_index.insert(id, offset);
}
#[inline]
pub(super) fn delete_node(&mut self, id: i64) {
self.deleted_nodes.insert(id);
self.node_index.remove(&id);
}
#[inline]
pub(super) fn delete_way(&mut self, id: i64) {
self.deleted_ways.insert(id);
self.way_index.remove(&id);
}
#[inline]
pub(super) fn delete_relation(&mut self, id: i64) {
self.deleted_relations.insert(id);
self.relation_index.remove(&id);
}
#[inline]
pub(super) fn intern(&mut self, s: &str) -> u32 {
self.interner.intern(s)
}
}
impl Default for CompactDiffOverlay {
fn default() -> Self {
Self::new()
}
}
pub fn parse_osc_file_into(path: &Path, overlay: &mut CompactDiffOverlay) -> ParseResult<()> {
let file = File::open(path)?;
let decoder = MultiGzDecoder::new(file);
let buf_reader = BufReader::new(decoder);
let mut reader = Reader::from_reader(buf_reader);
reader.config_mut().trim_text(true);
let mut state = ParserState::new();
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
handle_start_event_compact(e, &mut state, overlay)?;
}
Ok(Event::Empty(ref e)) => {
handle_empty_event_compact(e, &mut state, overlay)?;
}
Ok(Event::End(ref e)) => {
handle_end_event_compact(e, &mut state, overlay);
}
Ok(Event::Eof) => break,
Ok(_) => {} Err(e) => return Err(Box::new(e)),
}
buf.clear();
}
Ok(())
}
pub fn parse_osc_file(path: &Path) -> ParseResult<CompactDiffOverlay> {
let mut overlay = CompactDiffOverlay::new();
parse_osc_file_into(path, &mut overlay)?;
Ok(overlay)
}
fn parse_sequence_number(filename: &str) -> Option<u64> {
let stem = filename.strip_suffix(".gz")?;
let num_str = stem.strip_suffix(".osc")?;
num_str.parse::<u64>().ok()
}
pub fn load_all_diffs(diffs_dir: &Path) -> ParseResult<CompactDiffOverlay> {
let mut entries: Vec<(u64, std::path::PathBuf)> = Vec::new();
for entry in std::fs::read_dir(diffs_dir)? {
let entry = entry?;
let path = entry.path();
let filename = match path.file_name().and_then(|f| f.to_str()) {
Some(f) => f.to_string(),
None => continue,
};
if !filename.ends_with(".gz") {
continue;
}
if let Some(seq) = parse_sequence_number(&filename) {
entries.push((seq, path));
}
}
entries.sort_by_key(|(seq, _)| *seq);
let mut overlay = CompactDiffOverlay::new();
let total = entries.len();
for (i, (seq, path)) in entries.iter().enumerate() {
eprintln!(
"[{}/{}] Parsing diff {} (sequence {seq})...",
i + 1,
total,
path.display()
);
parse_osc_file_into(path, &mut overlay)?;
}
eprintln!(
"Loaded {total} diffs: {} nodes, {} ways, {} relations \
({} deleted nodes, {} deleted ways, {} deleted relations)",
overlay.node_count(),
overlay.way_count(),
overlay.relation_count(),
overlay.deleted_nodes.len(),
overlay.deleted_ways.len(),
overlay.deleted_relations.len(),
);
Ok(overlay)
}
#[cfg(test)]
mod tests {
use super::*;
use super::super::compact::{
arena_append_node, arena_append_relation, arena_append_way, member_type_to_byte,
};
use crate::read::elements::MemberType;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
fn make_test_dir(suffix: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir().join(format!("pbfhogg_osc_test_{suffix}"));
drop(std::fs::remove_dir_all(&dir));
std::fs::create_dir_all(&dir).expect("create test dir");
dir
}
fn write_osc_gz(dir: &Path, filename: &str, xml: &str) {
let path = dir.join(filename);
let file = File::create(&path).expect("create osc.gz");
let mut enc = GzEncoder::new(file, Compression::fast());
enc.write_all(xml.as_bytes()).expect("write xml");
enc.finish().expect("finish gz");
}
#[test]
fn test_parse_osc_create_modify_delete() -> ParseResult<()> {
let dir = make_test_dir("create_modify_delete");
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<create>
<node id="100" lat="55.6761" lon="12.5683" version="1">
<tag k="name" v="Copenhagen"/>
</node>
</create>
<modify>
<node id="100" lat="55.6800" lon="12.5700" version="2">
<tag k="name" v="CPH"/>
</node>
</modify>
<delete>
<way id="200" version="3"/>
</delete>
</osmChange>"#;
write_osc_gz(&dir, "test.osc.gz", xml);
let overlay = parse_osc_file(&dir.join("test.osc.gz"))?;
let node = overlay
.get_node(100)
.ok_or("node 100 should exist in overlay")?;
assert!((node.decimicro_lat() - 556_800_000).abs() <= 1);
assert!((node.decimicro_lon() - 125_700_000).abs() <= 1);
let tags: Vec<(&str, &str)> = node.tags().collect();
assert_eq!(tags.len(), 1);
assert_eq!(tags[0].0, "name");
assert_eq!(tags[0].1, "CPH");
assert!(overlay.deleted_ways.contains(&200));
assert!(!overlay.has_way(200));
std::fs::remove_dir_all(&dir)?;
Ok(())
}
#[test]
fn test_merge_later_wins() -> ParseResult<()> {
let dir = make_test_dir("merge_later_wins");
let xml_create = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<create>
<node id="100" lat="1.0" lon="2.0" version="1"/>
</create>
</osmChange>"#;
let xml_modify = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<modify>
<node id="100" lat="3.0" lon="4.0" version="2"/>
</modify>
</osmChange>"#;
write_osc_gz(&dir, "001.osc.gz", xml_create);
write_osc_gz(&dir, "002.osc.gz", xml_modify);
let mut overlay = CompactDiffOverlay::new();
parse_osc_file_into(&dir.join("001.osc.gz"), &mut overlay)?;
parse_osc_file_into(&dir.join("002.osc.gz"), &mut overlay)?;
let node = overlay.get_node(100).ok_or("node 100 should exist after merge")?;
assert_eq!(node.decimicro_lat(), 30_000_000);
assert_eq!(node.decimicro_lon(), 40_000_000);
std::fs::remove_dir_all(&dir)?;
Ok(())
}
#[test]
fn test_merge_delete_removes_create() -> ParseResult<()> {
let dir = make_test_dir("delete_removes_create");
let xml_create = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<create>
<node id="100" lat="1.0" lon="2.0" version="1"/>
</create>
</osmChange>"#;
let xml_delete = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<delete>
<node id="100" version="2"/>
</delete>
</osmChange>"#;
write_osc_gz(&dir, "001.osc.gz", xml_create);
write_osc_gz(&dir, "002.osc.gz", xml_delete);
let mut overlay = CompactDiffOverlay::new();
parse_osc_file_into(&dir.join("001.osc.gz"), &mut overlay)?;
parse_osc_file_into(&dir.join("002.osc.gz"), &mut overlay)?;
assert!(!overlay.has_node(100));
assert!(overlay.deleted_nodes.contains(&100));
std::fs::remove_dir_all(&dir)?;
Ok(())
}
#[test]
fn test_merge_create_removes_delete() -> ParseResult<()> {
let dir = make_test_dir("create_removes_delete");
let xml_delete = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<delete>
<node id="100" version="1"/>
</delete>
</osmChange>"#;
let xml_create = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<create>
<node id="100" lat="1.0" lon="2.0" version="2"/>
</create>
</osmChange>"#;
write_osc_gz(&dir, "001.osc.gz", xml_delete);
write_osc_gz(&dir, "002.osc.gz", xml_create);
let mut overlay = CompactDiffOverlay::new();
parse_osc_file_into(&dir.join("001.osc.gz"), &mut overlay)?;
parse_osc_file_into(&dir.join("002.osc.gz"), &mut overlay)?;
assert!(overlay.has_node(100));
assert!(!overlay.deleted_nodes.contains(&100));
std::fs::remove_dir_all(&dir)?;
Ok(())
}
#[test]
fn test_numeric_sort() -> ParseResult<()> {
let dir = make_test_dir("numeric_sort");
let xml_999 = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<create><node id="1" lat="1.0" lon="1.0" version="1"/></create>
</osmChange>"#;
let xml_4705 = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<create><node id="2" lat="2.0" lon="2.0" version="1"/></create>
</osmChange>"#;
let xml_10000 = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<modify><node id="1" lat="10.0" lon="10.0" version="2"/></modify>
</osmChange>"#;
write_osc_gz(&dir, "10000.osc.gz", xml_10000);
write_osc_gz(&dir, "4705.osc.gz", xml_4705);
write_osc_gz(&dir, "999.osc.gz", xml_999);
let overlay = load_all_diffs(&dir)?;
let node1 = overlay
.get_node(1)
.ok_or("node 1 should exist after loading diffs")?;
assert_eq!(node1.decimicro_lat(), 100_000_000);
assert_eq!(node1.decimicro_lon(), 100_000_000);
assert!(overlay.has_node(2));
std::fs::remove_dir_all(&dir)?;
Ok(())
}
#[test]
fn test_empty_overlay() {
let overlay = CompactDiffOverlay::new();
assert!(overlay.is_empty());
}
#[test]
fn test_self_closing_delete() -> ParseResult<()> {
let dir = make_test_dir("self_closing_delete");
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<delete>
<node id="123"/>
</delete>
</osmChange>"#;
write_osc_gz(&dir, "test.osc.gz", xml);
let overlay = parse_osc_file(&dir.join("test.osc.gz"))?;
assert!(overlay.deleted_nodes.contains(&123));
assert!(!overlay.has_node(123));
std::fs::remove_dir_all(&dir)?;
Ok(())
}
#[test]
fn test_interner_roundtrip() {
let mut interner = StringInterner::new();
let id_hello = interner.intern("hello");
let id_world = interner.intern("world");
let id_empty = interner.intern("");
assert_eq!(interner.resolve(id_hello), "hello");
assert_eq!(interner.resolve(id_world), "world");
assert_eq!(interner.resolve(id_empty), "");
assert_eq!(id_empty, 0); }
#[test]
fn test_interner_dedup() {
let mut interner = StringInterner::new();
let id1 = interner.intern("highway");
let id2 = interner.intern("highway");
let id3 = interner.intern("name");
assert_eq!(id1, id2); assert_ne!(id1, id3); }
#[test]
fn test_node_roundtrip() {
let mut interner = StringInterner::new();
let key_name = interner.intern("name");
let key_place = interner.intern("place");
let mut arena = Vec::new();
let tags: Vec<(u32, &str)> = vec![(key_name, "Test City"), (key_place, "city")];
let offset = arena_append_node(&mut arena, 42, 556_800_000, 125_700_000, &tags);
let node = CompactNodeRef {
data: &arena[offset as usize..],
interner: &interner,
};
assert_eq!(node.id(), 42);
assert_eq!(node.decimicro_lat(), 556_800_000);
assert_eq!(node.decimicro_lon(), 125_700_000);
assert_eq!(node.tag_count(), 2);
let tag_vec: Vec<(&str, &str)> = node.tags().collect();
assert_eq!(tag_vec[0], ("name", "Test City"));
assert_eq!(tag_vec[1], ("place", "city"));
}
#[test]
fn test_way_roundtrip() {
let mut interner = StringInterner::new();
let key_highway = interner.intern("highway");
let mut arena = Vec::new();
let refs = vec![1, 2, 3, 4, 5];
let tags: Vec<(u32, &str)> = vec![(key_highway, "residential")];
let offset = arena_append_way(&mut arena, 99, &refs, &tags);
let way = CompactWayRef {
data: &arena[offset as usize..],
interner: &interner,
};
assert_eq!(way.id(), 99);
assert_eq!(way.ref_count(), 5);
assert_eq!(way.tag_count(), 1);
let ref_vec: Vec<i64> = way.refs().collect();
assert_eq!(ref_vec, vec![1, 2, 3, 4, 5]);
let tag_vec: Vec<(&str, &str)> = way.tags().collect();
assert_eq!(tag_vec[0], ("highway", "residential"));
}
#[test]
fn test_relation_roundtrip() {
let mut interner = StringInterner::new();
let key_type = interner.intern("type");
let role_outer = interner.intern("outer");
let role_inner = interner.intern("inner");
let mut arena = Vec::new();
let members = vec![
(10, member_type_to_byte(MemberType::Way), role_outer),
(20, member_type_to_byte(MemberType::Way), role_inner),
(30, member_type_to_byte(MemberType::Node), interner.intern("")),
];
let tags: Vec<(u32, &str)> = vec![(key_type, "multipolygon")];
let offset = arena_append_relation(&mut arena, 500, &members, &tags);
let rel = CompactRelationRef {
data: &arena[offset as usize..],
interner: &interner,
};
assert_eq!(rel.id(), 500);
assert_eq!(rel.member_count(), 3);
assert_eq!(rel.tag_count(), 1);
let member_vec: Vec<(MemberType, i64, &str)> = rel.members().collect();
assert_eq!(member_vec[0], (MemberType::Way, 10, "outer"));
assert_eq!(member_vec[1], (MemberType::Way, 20, "inner"));
assert_eq!(member_vec[2], (MemberType::Node, 30, ""));
let tag_vec: Vec<(&str, &str)> = rel.tags().collect();
assert_eq!(tag_vec[0], ("type", "multipolygon"));
}
#[test]
fn test_parse_osc_way_and_relation_children() -> ParseResult<()> {
let dir = make_test_dir("way_relation_children");
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<osmChange version="0.6">
<create>
<way id="100" version="1">
<nd ref="1"/>
<nd ref="2"/>
<nd ref="3"/>
<tag k="highway" v="residential"/>
<tag k="name" v="Main Street"/>
</way>
<relation id="200" version="1">
<member type="way" ref="100" role="outer"/>
<member type="way" ref="101" role="inner"/>
<member type="node" ref="1" role="label"/>
<tag k="type" v="multipolygon"/>
</relation>
</create>
<modify>
<way id="300" version="5">
<nd ref="10"/>
<nd ref="11"/>
<tag k="highway" v="primary"/>
</way>
</modify>
</osmChange>"#;
write_osc_gz(&dir, "children.osc.gz", xml);
let overlay = parse_osc_file(&dir.join("children.osc.gz"))?;
let way100 = overlay.get_way(100).ok_or("way 100 should exist")?;
let refs: Vec<i64> = way100.refs().collect();
assert_eq!(refs, vec![1, 2, 3]);
let tags: Vec<(&str, &str)> = way100.tags().collect();
assert_eq!(tags, vec![("highway", "residential"), ("name", "Main Street")]);
let way300 = overlay.get_way(300).ok_or("way 300 should exist")?;
let refs: Vec<i64> = way300.refs().collect();
assert_eq!(refs, vec![10, 11]);
let tags: Vec<(&str, &str)> = way300.tags().collect();
assert_eq!(tags, vec![("highway", "primary")]);
let rel = overlay.get_relation(200).ok_or("relation 200 should exist")?;
let members: Vec<(MemberType, i64, &str)> = rel.members().collect();
assert_eq!(members.len(), 3);
assert_eq!(members[0], (MemberType::Way, 100, "outer"));
assert_eq!(members[1], (MemberType::Way, 101, "inner"));
assert_eq!(members[2], (MemberType::Node, 1, "label"));
let tags: Vec<(&str, &str)> = rel.tags().collect();
assert_eq!(tags, vec![("type", "multipolygon")]);
drop(std::fs::remove_dir_all(&dir));
Ok(())
}
}