use std::path::Path;
use std::time::Instant;
use crate::{Element, MemberId};
use crate::idset::IdSet;
use crate::scan::classify::{build_classify_schedules_split, parallel_classify_phase};
use crate::BoxResult as Result;
pub struct MissingRef {
pub missing_type: char,
pub missing_id: i64,
pub referencing_type: char,
pub referencing_id: i64,
}
pub struct RefCheckResult {
pub node_count: u64,
pub way_count: u64,
pub relation_count: u64,
pub missing_node_refs: u64,
pub missing_way_refs: u64,
pub missing_node_members: u64,
pub missing_relation_members: u64,
pub missing_relation_member_occurrences: u64,
pub missing_refs: Vec<MissingRef>,
}
impl RefCheckResult {
pub fn is_valid(&self) -> bool {
self.missing_node_refs == 0
&& self.missing_way_refs == 0
&& self.missing_node_members == 0
&& self.missing_relation_members == 0
}
pub fn total_missing(&self) -> u64 {
self.missing_node_refs
+ self.missing_way_refs
+ self.missing_node_members
+ self.missing_relation_members
}
}
#[derive(Default)]
struct WayBlobResult {
way_count: u64,
way_refs_checked: u64,
missing_node_refs: Vec<i64>,
missing_refs: Vec<MissingRef>,
}
#[derive(Default)]
struct RelBlobResult {
relation_count: u64,
rel_node_members_checked: u64,
rel_way_members_checked: u64,
rel_rel_members_deferred: u64,
missing_node_members: Vec<i64>,
missing_way_refs: Vec<i64>,
deferred_relation_refs: Vec<i64>,
deferred_relation_ref_sources: Vec<i64>,
missing_refs: Vec<MissingRef>,
}
#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]
#[hotpath::measure]
pub fn check_refs(path: &Path, check_relations: bool, show_ids: bool, direct_io: bool) -> Result<RefCheckResult> {
let _ = direct_io;
crate::debug::emit_marker("CHECKREFS_SCAN_START");
#[cfg(target_os = "linux")]
unsafe {
libc::mallopt(libc::M_ARENA_MAX, 2);
}
let mut node_ids = IdSet::new();
node_ids.pre_allocate(14_000_000_000);
let mut way_ids = IdSet::new();
let mut relation_ids = IdSet::new();
if check_relations {
way_ids.pre_allocate(1_500_000_000);
relation_ids.pre_allocate(25_000_000);
}
let mut result = RefCheckResult {
node_count: 0,
way_count: 0,
relation_count: 0,
missing_node_refs: 0,
missing_way_refs: 0,
missing_node_members: 0,
missing_relation_members: 0,
missing_relation_member_occurrences: 0,
missing_refs: Vec::new(),
};
let mut missing_node_refs_vec: Vec<i64> = Vec::new();
let mut missing_way_refs_vec: Vec<i64> = Vec::new();
let mut missing_node_members_vec: Vec<i64> = Vec::new();
let mut deferred_relation_refs: Vec<i64> = Vec::new();
let mut deferred_relation_ref_sources: Vec<i64> = Vec::new();
let mut missing_refs: Vec<MissingRef> = Vec::new();
let mut way_refs_checked: u64 = 0;
let mut rel_node_members_checked: u64 = 0;
let mut rel_way_members_checked: u64 = 0;
let mut rel_rel_members_deferred: u64 = 0;
let (node_schedule, way_schedule, rel_schedule, shared_file) =
build_classify_schedules_split(path)?;
let node_blobs = node_schedule.len() as u64;
let way_blobs = way_schedule.len() as u64;
let relation_blobs_total = rel_schedule.len() as u64;
crate::debug::emit_marker("CHECKREFS_NODES_START");
{
let node_ids_ref = &node_ids;
parallel_classify_phase(
&shared_file,
&node_schedule,
None,
|| (),
|block, &mut ()| -> u64 {
let mut count: u64 = 0;
for el in block.elements_skip_metadata() {
match el {
Element::DenseNode(dn) => {
node_ids_ref.set_atomic(dn.id());
count += 1;
}
Element::Node(n) => {
node_ids_ref.set_atomic(n.id());
count += 1;
}
_ => {}
}
}
count
},
|_seq, count| {
result.node_count += count;
},
)?;
}
crate::debug::emit_marker("CHECKREFS_NODES_END");
crate::debug::emit_mallinfo2("checkrefs_after_nodes");
crate::debug::emit_marker("CHECKREFS_WAYS_START");
{
let node_ids_ref = &node_ids;
let way_ids_ref = &way_ids;
parallel_classify_phase(
&shared_file,
&way_schedule,
None,
|| (),
|block, &mut ()| -> WayBlobResult {
let mut r = WayBlobResult::default();
for el in block.elements_skip_metadata() {
if let Element::Way(w) = el {
let wid = w.id();
if check_relations {
way_ids_ref.set_atomic(wid);
}
r.way_count += 1;
for nref in w.refs() {
r.way_refs_checked += 1;
if !node_ids_ref.get(nref) {
r.missing_node_refs.push(nref);
if show_ids {
r.missing_refs.push(MissingRef {
missing_type: 'n',
missing_id: nref,
referencing_type: 'w',
referencing_id: wid,
});
}
}
}
}
}
r
},
|_seq, r| {
result.way_count += r.way_count;
way_refs_checked += r.way_refs_checked;
missing_node_refs_vec.extend(r.missing_node_refs);
if show_ids {
missing_refs.extend(r.missing_refs);
}
},
)?;
}
crate::debug::emit_marker("CHECKREFS_WAYS_END");
crate::debug::emit_mallinfo2("checkrefs_after_ways");
let relation_blobs: u64 = if check_relations { relation_blobs_total } else { 0 };
if check_relations {
crate::debug::emit_marker("CHECKREFS_RELATIONS_START");
{
let node_ids_ref = &node_ids;
let way_ids_ref = &way_ids;
let relation_ids_ref = &relation_ids;
parallel_classify_phase(
&shared_file,
&rel_schedule,
None,
|| (),
|block, &mut ()| -> RelBlobResult {
let mut r = RelBlobResult::default();
for el in block.elements_skip_metadata() {
if let Element::Relation(rel) = el {
let rid = rel.id();
relation_ids_ref.set_atomic(rid);
r.relation_count += 1;
for mem in rel.members() {
match mem.id {
MemberId::Node(id) => {
r.rel_node_members_checked += 1;
if !node_ids_ref.get(id) {
r.missing_node_members.push(id);
if show_ids {
r.missing_refs.push(MissingRef {
missing_type: 'n',
missing_id: id,
referencing_type: 'r',
referencing_id: rid,
});
}
}
}
MemberId::Way(id) => {
r.rel_way_members_checked += 1;
if !way_ids_ref.get(id) {
r.missing_way_refs.push(id);
if show_ids {
r.missing_refs.push(MissingRef {
missing_type: 'w',
missing_id: id,
referencing_type: 'r',
referencing_id: rid,
});
}
}
}
MemberId::Relation(id) => {
r.rel_rel_members_deferred += 1;
r.deferred_relation_refs.push(id);
if show_ids {
r.deferred_relation_ref_sources.push(rid);
}
}
MemberId::Unknown(_, _) => {}
}
}
}
}
r
},
|_seq, r| {
result.relation_count += r.relation_count;
rel_node_members_checked += r.rel_node_members_checked;
rel_way_members_checked += r.rel_way_members_checked;
rel_rel_members_deferred += r.rel_rel_members_deferred;
missing_node_members_vec.extend(r.missing_node_members);
missing_way_refs_vec.extend(r.missing_way_refs);
deferred_relation_refs.extend(r.deferred_relation_refs);
if show_ids {
deferred_relation_ref_sources.extend(r.deferred_relation_ref_sources);
missing_refs.extend(r.missing_refs);
}
},
)?;
}
crate::debug::emit_marker("CHECKREFS_RELATIONS_END");
crate::debug::emit_mallinfo2("checkrefs_after_relations");
}
let missing_node_refs_occurrences = missing_node_refs_vec.len() as u64;
let missing_way_refs_occurrences = missing_way_refs_vec.len() as u64;
let missing_node_members_occurrences = missing_node_members_vec.len() as u64;
let t_dedup = Instant::now();
let unique_len = |v: &mut Vec<i64>| -> u64 {
v.sort_unstable();
v.dedup();
v.len() as u64
};
result.missing_node_refs = unique_len(&mut missing_node_refs_vec);
result.missing_way_refs = unique_len(&mut missing_way_refs_vec);
result.missing_node_members = unique_len(&mut missing_node_members_vec);
let missing_dedup_ns = t_dedup.elapsed().as_nanos();
crate::debug::emit_marker("CHECKREFS_DEFERRED_RESOLVE_START");
let t_deferred = Instant::now();
if check_relations {
let mut missing_relation_members_vec: Vec<i64> = Vec::new();
let mut occurrences: u64 = 0;
for (i, &id) in deferred_relation_refs.iter().enumerate() {
if !relation_ids.get(id) {
missing_relation_members_vec.push(id);
occurrences += 1;
if show_ids {
missing_refs.push(MissingRef {
missing_type: 'r',
missing_id: id,
referencing_type: 'r',
referencing_id: deferred_relation_ref_sources[i],
});
}
}
}
result.missing_relation_members = unique_len(&mut missing_relation_members_vec);
result.missing_relation_member_occurrences = occurrences;
}
let deferred_resolve_ns = t_deferred.elapsed().as_nanos();
crate::debug::emit_marker("CHECKREFS_DEFERRED_RESOLVE_END");
result.missing_refs = missing_refs;
crate::debug::emit_marker("CHECKREFS_SCAN_END");
crate::debug::emit_mallinfo2("checkrefs_final");
#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
{
crate::debug::emit_counter("checkrefs_node_count", result.node_count as i64);
crate::debug::emit_counter("checkrefs_way_count", result.way_count as i64);
crate::debug::emit_counter("checkrefs_relation_count", result.relation_count as i64);
crate::debug::emit_counter("checkrefs_missing_node_refs", result.missing_node_refs as i64);
crate::debug::emit_counter("checkrefs_missing_way_refs", result.missing_way_refs as i64);
crate::debug::emit_counter("checkrefs_missing_node_members", result.missing_node_members as i64);
crate::debug::emit_counter("checkrefs_missing_relation_members", result.missing_relation_members as i64);
let ns_to_ms = |ns: u128| (ns / 1_000_000) as i64;
crate::debug::emit_counter("checkrefs_missing_dedup_ms", ns_to_ms(missing_dedup_ns));
crate::debug::emit_counter("checkrefs_deferred_resolve_ms", ns_to_ms(deferred_resolve_ns));
crate::debug::emit_counter("checkrefs_node_blobs", node_blobs as i64);
crate::debug::emit_counter("checkrefs_way_blobs", way_blobs as i64);
crate::debug::emit_counter("checkrefs_relation_blobs", relation_blobs as i64);
crate::debug::emit_counter("checkrefs_way_refs_checked", way_refs_checked as i64);
crate::debug::emit_counter("checkrefs_rel_node_members_checked", rel_node_members_checked as i64);
crate::debug::emit_counter("checkrefs_rel_way_members_checked", rel_way_members_checked as i64);
crate::debug::emit_counter("checkrefs_rel_rel_members_deferred", rel_rel_members_deferred as i64);
crate::debug::emit_counter("checkrefs_missing_node_refs_occurrences", missing_node_refs_occurrences as i64);
crate::debug::emit_counter("checkrefs_missing_way_refs_occurrences", missing_way_refs_occurrences as i64);
crate::debug::emit_counter("checkrefs_missing_node_members_occurrences", missing_node_members_occurrences as i64);
}
Ok(result)
}