mod consensus;
mod medoid;
mod optimized;
mod overlap;
mod portions;
mod postprocess;
mod rtree;
mod traces;
use crate::matching::calculate_route_distance;
use crate::{GpsPoint, RouteGroup};
use log::info;
#[cfg(feature = "parallel")]
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
pub(crate) use consensus::compute_consensus_polyline;
pub(crate) use medoid::select_medoid;
pub(crate) use overlap::{
FullTrackOverlap, OverlapCluster, cluster_overlaps, find_full_track_overlap,
};
pub(crate) use portions::compute_activity_portions;
pub(crate) use postprocess::{
consolidate_fragments, filter_low_quality_sections, make_sections_exclusive,
merge_nearby_sections, remove_overlapping_sections, split_at_gradient_changes,
split_at_heading_changes, split_folding_sections, split_high_variance_sections,
};
pub(crate) use rtree::{IndexedPoint, bounds_overlap_tracks, build_rtree};
pub(crate) use traces::extract_all_activity_traces;
pub use optimized::{
SectionMatch, SplitResult, detect_sections_optimized, find_sections_in_route,
recalculate_section_polyline, split_section_at_index, split_section_at_point,
};
pub(crate) fn compute_initial_stability(
observation_count: u32,
average_spread: f64,
proximity_threshold: f64,
) -> f64 {
let obs_factor = (observation_count as f64 / 10.0).min(1.0);
let spread_factor = 1.0 - (average_spread / proximity_threshold).clamp(0.0, 1.0);
(obs_factor * 0.6 + spread_factor * 0.4).clamp(0.0, 1.0)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ScalePreset {
pub name: String,
pub min_length: f64,
pub max_length: f64,
pub min_activities: u32,
}
impl ScalePreset {
pub fn short() -> Self {
Self {
name: "short".to_string(),
min_length: 100.0,
max_length: 500.0,
min_activities: 2,
}
}
pub fn medium() -> Self {
Self {
name: "medium".to_string(),
min_length: 500.0,
max_length: 2000.0,
min_activities: 2,
}
}
pub fn long() -> Self {
Self {
name: "long".to_string(),
min_length: 2000.0,
max_length: 5000.0,
min_activities: 3,
}
}
pub fn default_presets() -> Vec<Self> {
vec![Self::short(), Self::medium(), Self::long()]
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SectionConfig {
pub proximity_threshold: f64,
pub min_section_length: f64,
pub max_section_length: f64,
pub min_activities: u32,
pub cluster_tolerance: f64,
pub sample_points: u32,
pub detection_mode: String,
pub include_potentials: bool,
pub scale_presets: Vec<ScalePreset>,
pub preserve_hierarchy: bool,
}
impl Default for SectionConfig {
fn default() -> Self {
Self {
proximity_threshold: 50.0, min_section_length: 200.0, max_section_length: 5000.0, min_activities: 3, cluster_tolerance: 80.0, sample_points: 50, detection_mode: "discovery".to_string(),
include_potentials: true,
scale_presets: ScalePreset::default_presets(),
preserve_hierarchy: true,
}
}
}
impl SectionConfig {
pub fn discovery() -> Self {
Self {
detection_mode: "discovery".to_string(),
include_potentials: true,
scale_presets: ScalePreset::default_presets(),
preserve_hierarchy: true,
..Default::default()
}
}
pub fn conservative() -> Self {
Self {
detection_mode: "conservative".to_string(),
include_potentials: false,
min_activities: 4,
scale_presets: vec![ScalePreset::medium(), ScalePreset::long()],
preserve_hierarchy: false,
..Default::default()
}
}
pub fn legacy() -> Self {
Self {
detection_mode: "legacy".to_string(),
include_potentials: false,
scale_presets: vec![], preserve_hierarchy: false,
min_activities: 3,
..Default::default()
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SectionPortion {
#[serde(alias = "activity_id")]
pub activity_id: String,
#[serde(alias = "start_index")]
pub start_index: u32,
#[serde(alias = "end_index")]
pub end_index: u32,
#[serde(alias = "distance_meters")]
pub distance_meters: f64,
pub direction: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FrequentSection {
pub id: String,
pub name: Option<String>,
#[serde(alias = "sport_type")]
pub sport_type: String,
pub polyline: Vec<GpsPoint>,
#[serde(alias = "representative_activity_id")]
pub representative_activity_id: String,
#[serde(alias = "activity_ids")]
pub activity_ids: Vec<String>,
#[serde(alias = "activity_portions")]
pub activity_portions: Vec<SectionPortion>,
#[serde(alias = "route_ids")]
pub route_ids: Vec<String>,
#[serde(alias = "visit_count")]
pub visit_count: u32,
#[serde(alias = "distance_meters")]
pub distance_meters: f64,
#[serde(alias = "activity_traces")]
pub activity_traces: HashMap<String, Vec<GpsPoint>>,
pub confidence: f64,
#[serde(alias = "observation_count")]
pub observation_count: u32,
#[serde(alias = "average_spread")]
pub average_spread: f64,
#[serde(alias = "point_density")]
pub point_density: Vec<u32>,
pub scale: Option<String>,
pub version: u32,
#[serde(alias = "is_user_defined")]
pub is_user_defined: bool,
#[serde(alias = "created_at")]
pub created_at: Option<String>,
#[serde(alias = "updated_at")]
pub updated_at: Option<String>,
pub stability: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PotentialSection {
pub id: String,
#[serde(alias = "sport_type")]
pub sport_type: String,
pub polyline: Vec<GpsPoint>,
#[serde(alias = "activity_ids")]
pub activity_ids: Vec<String>,
#[serde(alias = "visit_count")]
pub visit_count: u32,
#[serde(alias = "distance_meters")]
pub distance_meters: f64,
pub confidence: f64,
pub scale: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MultiScaleSectionResult {
pub sections: Vec<FrequentSection>,
pub potentials: Vec<PotentialSection>,
pub stats: DetectionStats,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DetectionStats {
pub activities_processed: u32,
pub overlaps_found: u32,
pub sections_by_scale: HashMap<String, u32>,
pub potentials_by_scale: HashMap<String, u32>,
}
fn process_cluster(
idx: usize,
cluster: OverlapCluster,
sport_type: &str,
track_map: &HashMap<String, Vec<GpsPoint>>,
activity_to_route: &HashMap<&str, &str>,
config: &SectionConfig,
scale_name: Option<&str>,
) -> Option<FrequentSection> {
let (representative_id, representative_polyline) = select_medoid(&cluster);
if representative_polyline.is_empty() {
return None;
}
let distance_meters = calculate_route_distance(&representative_polyline);
if distance_meters > config.max_section_length {
return None;
}
let activity_portions =
compute_activity_portions(&cluster, &representative_polyline, track_map, config);
let route_ids: Vec<String> = cluster
.activity_ids
.iter()
.filter_map(|aid| activity_to_route.get(aid.as_str()).map(|s| s.to_string()))
.collect::<HashSet<_>>()
.into_iter()
.collect();
let activity_id_vec: Vec<String> = cluster.activity_ids.iter().cloned().collect();
let activity_traces =
extract_all_activity_traces(&activity_id_vec, &representative_polyline, track_map);
let all_traces: Vec<Vec<GpsPoint>> = activity_traces.values().cloned().collect();
let consensus = compute_consensus_polyline(
&representative_polyline,
&all_traces,
config.proximity_threshold,
);
if consensus.polyline.len() < 2 {
return None;
}
let consensus_distance = calculate_route_distance(&consensus.polyline);
if consensus_distance < config.min_section_length {
return None;
}
let stability = compute_initial_stability(
consensus.observation_count,
consensus.average_spread,
config.proximity_threshold,
);
let activity_count = cluster.activity_ids.len();
Some(FrequentSection {
id: format!("sec_{}_{}", sport_type.to_lowercase(), idx),
name: None,
sport_type: sport_type.to_string(),
polyline: consensus.polyline,
representative_activity_id: representative_id,
activity_ids: cluster.activity_ids.into_iter().collect(),
activity_portions,
route_ids,
visit_count: activity_count as u32,
distance_meters: consensus_distance,
activity_traces,
confidence: consensus.confidence,
observation_count: consensus.observation_count,
average_spread: consensus.average_spread,
point_density: consensus.point_density,
scale: scale_name.map(|s| s.to_string()),
version: 1,
is_user_defined: false,
created_at: None,
updated_at: None,
stability,
})
}
pub fn detect_sections_from_tracks(
tracks: &[(String, Vec<GpsPoint>)], sport_types: &HashMap<String, String>,
groups: &[RouteGroup],
config: &SectionConfig,
) -> Vec<FrequentSection> {
info!("[Sections] Detecting from {} full GPS tracks", tracks.len());
if tracks.len() < config.min_activities as usize {
return vec![];
}
let significant_groups: Vec<&RouteGroup> = groups
.iter()
.filter(|g| g.activity_ids.len() >= 2)
.collect();
let activity_to_route: HashMap<&str, &str> = significant_groups
.iter()
.flat_map(|g| {
g.activity_ids
.iter()
.map(|aid| (aid.as_str(), g.group_id.as_str()))
})
.collect();
info!(
"[Sections] Received {} groups, {} with 2+ activities, {} total activity mappings",
groups.len(),
significant_groups.len(),
activity_to_route.len()
);
let track_map: HashMap<String, Vec<GpsPoint>> = tracks
.iter()
.map(|(id, pts)| (id.clone(), pts.clone()))
.collect();
let mut tracks_by_sport: HashMap<String, Vec<(&str, &[GpsPoint])>> = HashMap::new();
for (activity_id, points) in tracks {
let sport = sport_types
.get(activity_id)
.cloned()
.unwrap_or_else(|| "Unknown".to_string());
tracks_by_sport
.entry(sport)
.or_default()
.push((activity_id.as_str(), points.as_slice()));
}
let mut all_sections: Vec<FrequentSection> = Vec::new();
let mut section_counter = 0;
for (sport_type, sport_tracks) in &tracks_by_sport {
if sport_tracks.len() < config.min_activities as usize {
continue;
}
info!(
"[Sections] Processing {} {} tracks",
sport_tracks.len(),
sport_type
);
let rtree_start = std::time::Instant::now();
#[cfg(feature = "parallel")]
let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
.par_iter()
.map(|(_, pts)| build_rtree(pts))
.collect();
#[cfg(not(feature = "parallel"))]
let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
.iter()
.map(|(_, pts)| build_rtree(pts))
.collect();
info!(
"[Sections] Built {} R-trees in {}ms",
rtrees.len(),
rtree_start.elapsed().as_millis()
);
let overlap_start = std::time::Instant::now();
let pairs: Vec<(usize, usize)> = (0..sport_tracks.len())
.flat_map(|i| ((i + 1)..sport_tracks.len()).map(move |j| (i, j)))
.collect();
let total_pairs = pairs.len();
#[cfg(feature = "parallel")]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_par_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], config)
})
.collect();
#[cfg(not(feature = "parallel"))]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], config)
})
.collect();
info!(
"[Sections] Found {} pairwise overlaps for {} ({} pairs) in {}ms",
overlaps.len(),
sport_type,
total_pairs,
overlap_start.elapsed().as_millis()
);
let cluster_start = std::time::Instant::now();
let clusters = cluster_overlaps(overlaps, config);
let significant_clusters: Vec<_> = clusters
.into_iter()
.filter(|c| c.activity_ids.len() >= config.min_activities as usize)
.collect();
info!(
"[Sections] {} significant clusters ({}+ activities) for {} in {}ms",
significant_clusters.len(),
config.min_activities,
sport_type,
cluster_start.elapsed().as_millis()
);
let section_convert_start = std::time::Instant::now();
let cluster_data: Vec<_> = significant_clusters.into_iter().enumerate().collect();
#[cfg(feature = "parallel")]
let sport_sections: Vec<FrequentSection> = cluster_data
.into_par_iter()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
&track_map,
&activity_to_route,
config,
None,
)
})
.collect();
#[cfg(not(feature = "parallel"))]
let sport_sections: Vec<FrequentSection> = cluster_data
.into_iter()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
&track_map,
&activity_to_route,
config,
None,
)
})
.collect();
info!(
"[Sections] Converted {} sections for {} in {}ms",
sport_sections.len(),
sport_type,
section_convert_start.elapsed().as_millis()
);
let fold_start = std::time::Instant::now();
let fold_sections = split_folding_sections(sport_sections, config);
info!(
"[Sections] After fold splitting: {} sections in {}ms",
fold_sections.len(),
fold_start.elapsed().as_millis()
);
let heading_start = std::time::Instant::now();
let heading_sections = split_at_heading_changes(fold_sections, config);
info!(
"[Sections] After heading splitting: {} sections in {}ms",
heading_sections.len(),
heading_start.elapsed().as_millis()
);
let gradient_start = std::time::Instant::now();
let gradient_sections = split_at_gradient_changes(heading_sections, config);
info!(
"[Sections] After gradient splitting: {} sections in {}ms",
gradient_sections.len(),
gradient_start.elapsed().as_millis()
);
let merge_start = std::time::Instant::now();
let merged_sections = merge_nearby_sections(gradient_sections, config);
info!(
"[Sections] After nearby merge: {} sections in {}ms",
merged_sections.len(),
merge_start.elapsed().as_millis()
);
let dedup_start = std::time::Instant::now();
let deduped_sections = remove_overlapping_sections(merged_sections, config);
info!(
"[Sections] After dedup: {} unique sections in {}ms",
deduped_sections.len(),
dedup_start.elapsed().as_millis()
);
let split_start = std::time::Instant::now();
let final_sections = split_high_variance_sections(deduped_sections, &track_map, config);
info!(
"[Sections] After density splitting: {} sections in {}ms",
final_sections.len(),
split_start.elapsed().as_millis()
);
for (i, mut section) in final_sections.into_iter().enumerate() {
section.id = format!("sec_{}_{}", sport_type.to_lowercase(), section_counter + i);
all_sections.push(section);
}
section_counter += all_sections.len();
}
all_sections.sort_by(|a, b| b.visit_count.cmp(&a.visit_count));
info!("[Sections] Detected {} total sections", all_sections.len());
all_sections
}
struct ScaleResult {
sections: Vec<FrequentSection>,
potentials: Vec<PotentialSection>,
overlaps_found: u32,
scale_name: String,
}
fn process_scale_preset(
preset: &ScalePreset,
tracks_by_sport: &HashMap<String, Vec<(&str, &[GpsPoint])>>,
track_map: &HashMap<String, Vec<GpsPoint>>,
activity_to_route: &HashMap<&str, &str>,
config: &SectionConfig,
) -> ScaleResult {
info!(
"[MultiScale] Processing {} scale: {}-{}m, min {} activities",
preset.name, preset.min_length, preset.max_length, preset.min_activities
);
let scale_config = SectionConfig {
min_section_length: preset.min_length,
max_section_length: preset.max_length,
min_activities: preset.min_activities,
..config.clone()
};
let mut scale_sections: Vec<FrequentSection> = Vec::new();
let mut scale_potentials: Vec<PotentialSection> = Vec::new();
let mut overlaps_found = 0u32;
for (sport_type, sport_tracks) in tracks_by_sport {
let min_tracks_for_processing = if config.include_potentials {
1
} else {
preset.min_activities as usize
};
if sport_tracks.len() < min_tracks_for_processing {
continue;
}
#[cfg(feature = "parallel")]
let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
.par_iter()
.map(|(_, pts)| build_rtree(pts))
.collect();
#[cfg(not(feature = "parallel"))]
let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
.iter()
.map(|(_, pts)| build_rtree(pts))
.collect();
let pairs: Vec<(usize, usize)> = (0..sport_tracks.len())
.flat_map(|i| ((i + 1)..sport_tracks.len()).map(move |j| (i, j)))
.collect();
#[cfg(feature = "parallel")]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_par_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, scale_config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], &scale_config)
})
.collect();
#[cfg(not(feature = "parallel"))]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, scale_config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], &scale_config)
})
.collect();
overlaps_found += overlaps.len() as u32;
let clusters = cluster_overlaps(overlaps, &scale_config);
let (significant, potential): (Vec<_>, Vec<_>) = clusters
.into_iter()
.partition(|c| c.activity_ids.len() >= preset.min_activities as usize);
#[cfg(feature = "parallel")]
let sport_sections: Vec<FrequentSection> = significant
.into_par_iter()
.enumerate()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
track_map,
activity_to_route,
&scale_config,
Some(&preset.name),
)
})
.collect();
#[cfg(not(feature = "parallel"))]
let sport_sections: Vec<FrequentSection> = significant
.into_iter()
.enumerate()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
track_map,
activity_to_route,
&scale_config,
Some(&preset.name),
)
})
.collect();
scale_sections.extend(sport_sections);
if config.include_potentials {
for (idx, cluster) in potential.into_iter().enumerate() {
let activity_count = cluster.activity_ids.len();
if activity_count >= 1
&& activity_count < preset.min_activities as usize
&& let Some((_rep_id, rep_polyline)) = Some(select_medoid(&cluster))
&& !rep_polyline.is_empty()
{
let distance = calculate_route_distance(&rep_polyline);
if distance >= preset.min_length && distance <= preset.max_length {
scale_potentials.push(PotentialSection {
id: format!(
"pot_{}_{}_{}",
preset.name,
sport_type.to_lowercase(),
idx
),
sport_type: sport_type.to_string(),
polyline: rep_polyline,
activity_ids: cluster.activity_ids.into_iter().collect(),
visit_count: activity_count as u32,
distance_meters: distance,
confidence: 0.3 + (activity_count as f64 * 0.2), scale: preset.name.clone(),
});
}
}
}
}
}
info!(
"[MultiScale] {} scale: {} sections, {} potentials",
preset.name,
scale_sections.len(),
scale_potentials.len()
);
ScaleResult {
sections: scale_sections,
potentials: scale_potentials,
overlaps_found,
scale_name: preset.name.clone(),
}
}
pub fn detect_sections_multiscale(
tracks: &[(String, Vec<GpsPoint>)],
sport_types: &HashMap<String, String>,
groups: &[RouteGroup],
config: &SectionConfig,
) -> MultiScaleSectionResult {
info!(
"[MultiScale] Detecting from {} tracks with {} scale presets",
tracks.len(),
config.scale_presets.len()
);
let mut stats = DetectionStats {
activities_processed: tracks.len() as u32,
overlaps_found: 0,
sections_by_scale: HashMap::new(),
potentials_by_scale: HashMap::new(),
};
if config.scale_presets.is_empty() {
let sections = detect_sections_from_tracks(tracks, sport_types, groups, config);
stats
.sections_by_scale
.insert("legacy".to_string(), sections.len() as u32);
return MultiScaleSectionResult {
sections,
potentials: vec![],
stats,
};
}
let track_map: HashMap<String, Vec<GpsPoint>> = tracks
.iter()
.map(|(id, pts)| (id.clone(), pts.clone()))
.collect();
let significant_groups: Vec<&RouteGroup> = groups
.iter()
.filter(|g| g.activity_ids.len() >= 2)
.collect();
let activity_to_route: HashMap<&str, &str> = significant_groups
.iter()
.flat_map(|g| {
g.activity_ids
.iter()
.map(|aid| (aid.as_str(), g.group_id.as_str()))
})
.collect();
let mut tracks_by_sport: HashMap<String, Vec<(&str, &[GpsPoint])>> = HashMap::new();
for (activity_id, points) in tracks {
let sport = sport_types
.get(activity_id)
.cloned()
.unwrap_or_else(|| "Unknown".to_string());
tracks_by_sport
.entry(sport)
.or_default()
.push((activity_id.as_str(), points.as_slice()));
}
#[cfg(feature = "parallel")]
let scale_results: Vec<ScaleResult> = config
.scale_presets
.par_iter()
.map(|preset| {
process_scale_preset(
preset,
&tracks_by_sport,
&track_map,
&activity_to_route,
config,
)
})
.collect();
#[cfg(not(feature = "parallel"))]
let scale_results: Vec<ScaleResult> = config
.scale_presets
.iter()
.map(|preset| {
process_scale_preset(
preset,
&tracks_by_sport,
&track_map,
&activity_to_route,
config,
)
})
.collect();
let mut all_sections: Vec<FrequentSection> = Vec::new();
let mut all_potentials: Vec<PotentialSection> = Vec::new();
for result in scale_results {
stats.overlaps_found += result.overlaps_found;
stats
.sections_by_scale
.insert(result.scale_name.clone(), result.sections.len() as u32);
stats
.potentials_by_scale
.insert(result.scale_name, result.potentials.len() as u32);
all_sections.extend(result.sections);
all_potentials.extend(result.potentials);
}
let fold_start = std::time::Instant::now();
let fold_sections = split_folding_sections(all_sections, config);
info!(
"[MultiScale] After fold splitting: {} sections in {}ms",
fold_sections.len(),
fold_start.elapsed().as_millis()
);
let heading_start = std::time::Instant::now();
let heading_sections = split_at_heading_changes(fold_sections, config);
info!(
"[MultiScale] After heading splitting: {} sections in {}ms",
heading_sections.len(),
heading_start.elapsed().as_millis()
);
let gradient_start = std::time::Instant::now();
let gradient_sections = split_at_gradient_changes(heading_sections, config);
info!(
"[MultiScale] After gradient splitting: {} sections in {}ms",
gradient_sections.len(),
gradient_start.elapsed().as_millis()
);
let merge_start = std::time::Instant::now();
let merged_sections = merge_nearby_sections(gradient_sections, config);
info!(
"[MultiScale] After nearby merge: {} sections in {}ms",
merged_sections.len(),
merge_start.elapsed().as_millis()
);
let dedup_start = std::time::Instant::now();
let deduped_sections = if config.preserve_hierarchy {
remove_overlapping_sections_hierarchical(merged_sections, config)
} else {
remove_overlapping_sections(merged_sections, config)
};
info!(
"[MultiScale] After dedup: {} sections in {}ms",
deduped_sections.len(),
dedup_start.elapsed().as_millis()
);
let split_start = std::time::Instant::now();
let final_sections = split_high_variance_sections(deduped_sections, &track_map, config);
info!(
"[MultiScale] After density splitting: {} sections in {}ms",
final_sections.len(),
split_start.elapsed().as_millis()
);
let mut sorted_sections = final_sections;
sorted_sections.sort_by(|a, b| b.visit_count.cmp(&a.visit_count));
let mut sorted_potentials = all_potentials;
sorted_potentials.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
});
info!(
"[MultiScale] Final: {} sections, {} potentials",
sorted_sections.len(),
sorted_potentials.len()
);
MultiScaleSectionResult {
sections: sorted_sections,
potentials: sorted_potentials,
stats,
}
}
fn remove_overlapping_sections_hierarchical(
mut sections: Vec<FrequentSection>,
config: &SectionConfig,
) -> Vec<FrequentSection> {
if sections.len() <= 1 {
return sections;
}
sections.sort_by(|a, b| {
b.distance_meters
.partial_cmp(&a.distance_meters)
.unwrap_or(std::cmp::Ordering::Equal)
});
#[cfg(feature = "parallel")]
let rtrees: Vec<rstar::RTree<IndexedPoint>> = {
use rayon::prelude::*;
sections
.par_iter()
.map(|s| build_rtree(&s.polyline))
.collect()
};
#[cfg(not(feature = "parallel"))]
let rtrees: Vec<rstar::RTree<IndexedPoint>> =
sections.iter().map(|s| build_rtree(&s.polyline)).collect();
let mut keep = vec![true; sections.len()];
for i in 0..sections.len() {
if !keep[i] {
continue;
}
let tree_i = &rtrees[i];
for j in (i + 1)..sections.len() {
if !keep[j] {
continue;
}
let containment = compute_polyline_containment_with_rtree(
§ions[j].polyline,
tree_i,
config.proximity_threshold,
);
let length_ratio = sections[j].distance_meters / sections[i].distance_meters;
let same_scale = match (§ions[i].scale, §ions[j].scale) {
(Some(a), Some(b)) => a == b,
_ => true, };
if containment > 0.9 && length_ratio > 0.7 && same_scale {
keep[j] = false;
}
}
}
sections
.into_iter()
.zip(keep)
.filter_map(|(s, k)| if k { Some(s) } else { None })
.collect()
}
fn compute_polyline_containment_with_rtree(
polyline_a: &[GpsPoint],
tree_b: &rstar::RTree<IndexedPoint>,
proximity_threshold: f64,
) -> f64 {
use rstar::PointDistance;
if polyline_a.is_empty() {
return 0.0;
}
let threshold_deg = proximity_threshold / 111_000.0;
let threshold_deg_sq = threshold_deg * threshold_deg;
let mut contained_count = 0;
for point_a in polyline_a {
let query = [point_a.latitude, point_a.longitude];
if let Some(nearest) = tree_b.nearest_neighbor(&query)
&& nearest.distance_2(&query) <= threshold_deg_sq
{
contained_count += 1;
}
}
contained_count as f64 / polyline_a.len() as f64
}
#[allow(dead_code)]
fn compute_polyline_containment(
polyline_a: &[GpsPoint],
polyline_b: &[GpsPoint],
proximity_threshold: f64,
) -> f64 {
use crate::geo_utils::haversine_distance;
if polyline_a.is_empty() || polyline_b.is_empty() {
return 0.0;
}
let mut contained_count = 0;
for point_a in polyline_a {
let min_dist = polyline_b
.iter()
.map(|point_b| haversine_distance(point_a, point_b))
.fold(f64::MAX, |a, b| a.min(b));
if min_dist <= proximity_threshold {
contained_count += 1;
}
}
contained_count as f64 / polyline_a.len() as f64
}