mod consensus;
mod medoid;
mod overlap;
mod portions;
mod postprocess;
mod rtree;
mod traces;
use crate::geo_utils::polyline_length;
use crate::{GpsPoint, RouteGroup};
use log::info;
#[cfg(feature = "parallel")]
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
pub(crate) use consensus::compute_consensus_polyline;
pub(crate) use medoid::select_medoid;
pub(crate) use overlap::{
cluster_overlaps, find_full_track_overlap, FullTrackOverlap, OverlapCluster,
};
pub(crate) use portions::compute_activity_portions;
pub(crate) use postprocess::{
merge_nearby_sections, remove_overlapping_sections, split_folding_sections,
split_high_variance_sections,
};
pub(crate) use rtree::{bounds_overlap_tracks, build_rtree, IndexedPoint};
pub(crate) use traces::extract_all_activity_traces;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
pub struct ScalePreset {
pub name: String,
pub min_length: f64,
pub max_length: f64,
pub min_activities: u32,
}
impl ScalePreset {
pub fn short() -> Self {
Self {
name: "short".to_string(),
min_length: 100.0,
max_length: 500.0,
min_activities: 2,
}
}
pub fn medium() -> Self {
Self {
name: "medium".to_string(),
min_length: 500.0,
max_length: 2000.0,
min_activities: 2,
}
}
pub fn long() -> Self {
Self {
name: "long".to_string(),
min_length: 2000.0,
max_length: 5000.0,
min_activities: 3,
}
}
pub fn default_presets() -> Vec<Self> {
vec![Self::short(), Self::medium(), Self::long()]
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
pub struct SectionConfig {
pub proximity_threshold: f64,
pub min_section_length: f64,
pub max_section_length: f64,
pub min_activities: u32,
pub cluster_tolerance: f64,
pub sample_points: u32,
pub detection_mode: String,
pub include_potentials: bool,
pub scale_presets: Vec<ScalePreset>,
pub preserve_hierarchy: bool,
}
impl Default for SectionConfig {
fn default() -> Self {
Self {
proximity_threshold: 50.0, min_section_length: 200.0, max_section_length: 5000.0, min_activities: 3, cluster_tolerance: 80.0, sample_points: 50, detection_mode: "discovery".to_string(),
include_potentials: true,
scale_presets: ScalePreset::default_presets(),
preserve_hierarchy: true,
}
}
}
impl SectionConfig {
pub fn discovery() -> Self {
Self {
detection_mode: "discovery".to_string(),
include_potentials: true,
scale_presets: ScalePreset::default_presets(),
preserve_hierarchy: true,
..Default::default()
}
}
pub fn conservative() -> Self {
Self {
detection_mode: "conservative".to_string(),
include_potentials: false,
min_activities: 4,
scale_presets: vec![ScalePreset::medium(), ScalePreset::long()],
preserve_hierarchy: false,
..Default::default()
}
}
pub fn legacy() -> Self {
Self {
detection_mode: "legacy".to_string(),
include_potentials: false,
scale_presets: vec![], preserve_hierarchy: false,
min_activities: 3,
..Default::default()
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
pub struct SectionPortion {
pub activity_id: String,
pub start_index: u32,
pub end_index: u32,
pub distance_meters: f64,
pub direction: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
pub struct FrequentSection {
pub id: String,
pub name: Option<String>,
pub sport_type: String,
pub polyline: Vec<GpsPoint>,
pub representative_activity_id: String,
pub activity_ids: Vec<String>,
pub activity_portions: Vec<SectionPortion>,
pub route_ids: Vec<String>,
pub visit_count: u32,
pub distance_meters: f64,
pub activity_traces: HashMap<String, Vec<GpsPoint>>,
pub confidence: f64,
pub observation_count: u32,
pub average_spread: f64,
pub point_density: Vec<u32>,
pub scale: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
pub struct PotentialSection {
pub id: String,
pub sport_type: String,
pub polyline: Vec<GpsPoint>,
pub activity_ids: Vec<String>,
pub visit_count: u32,
pub distance_meters: f64,
pub confidence: f64,
pub scale: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
pub struct MultiScaleSectionResult {
pub sections: Vec<FrequentSection>,
pub potentials: Vec<PotentialSection>,
pub stats: DetectionStats,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
pub struct DetectionStats {
pub activities_processed: u32,
pub overlaps_found: u32,
pub sections_by_scale: HashMap<String, u32>,
pub potentials_by_scale: HashMap<String, u32>,
}
fn process_cluster(
idx: usize,
cluster: OverlapCluster,
sport_type: &str,
track_map: &HashMap<String, Vec<GpsPoint>>,
activity_to_route: &HashMap<&str, &str>,
config: &SectionConfig,
scale_name: Option<&str>,
) -> Option<FrequentSection> {
let (representative_id, representative_polyline) = select_medoid(&cluster);
if representative_polyline.is_empty() {
return None;
}
let distance_meters = polyline_length(&representative_polyline);
if distance_meters > config.max_section_length {
return None;
}
let activity_portions =
compute_activity_portions(&cluster, &representative_polyline, track_map, config);
let route_ids: Vec<String> = cluster
.activity_ids
.iter()
.filter_map(|aid| activity_to_route.get(aid.as_str()).map(|s| s.to_string()))
.collect::<HashSet<_>>()
.into_iter()
.collect();
let activity_id_vec: Vec<String> = cluster.activity_ids.iter().cloned().collect();
let activity_traces =
extract_all_activity_traces(&activity_id_vec, &representative_polyline, track_map);
let all_traces: Vec<Vec<GpsPoint>> = activity_traces.values().cloned().collect();
let consensus = compute_consensus_polyline(
&representative_polyline,
&all_traces,
config.proximity_threshold,
);
let consensus_distance = polyline_length(&consensus.polyline);
Some(FrequentSection {
id: format!("sec_{}_{}", sport_type.to_lowercase(), idx),
name: None,
sport_type: sport_type.to_string(),
polyline: consensus.polyline,
representative_activity_id: representative_id,
activity_ids: cluster.activity_ids.into_iter().collect(),
activity_portions,
route_ids,
visit_count: cluster.overlaps.len() as u32 + 1,
distance_meters: consensus_distance,
activity_traces,
confidence: consensus.confidence,
observation_count: consensus.observation_count,
average_spread: consensus.average_spread,
point_density: consensus.point_density,
scale: scale_name.map(|s| s.to_string()),
})
}
pub fn detect_sections_from_tracks(
tracks: &[(String, Vec<GpsPoint>)], sport_types: &HashMap<String, String>,
groups: &[RouteGroup],
config: &SectionConfig,
) -> Vec<FrequentSection> {
info!("[Sections] Detecting from {} full GPS tracks", tracks.len());
if tracks.len() < config.min_activities as usize {
return vec![];
}
let significant_groups: Vec<&RouteGroup> = groups
.iter()
.filter(|g| g.activity_ids.len() >= 2)
.collect();
let activity_to_route: HashMap<&str, &str> = significant_groups
.iter()
.flat_map(|g| {
g.activity_ids
.iter()
.map(|aid| (aid.as_str(), g.group_id.as_str()))
})
.collect();
info!(
"[Sections] Received {} groups, {} with 2+ activities, {} total activity mappings",
groups.len(),
significant_groups.len(),
activity_to_route.len()
);
let track_map: HashMap<String, Vec<GpsPoint>> = tracks
.iter()
.map(|(id, pts)| (id.clone(), pts.clone()))
.collect();
let mut tracks_by_sport: HashMap<String, Vec<(&str, &[GpsPoint])>> = HashMap::new();
for (activity_id, points) in tracks {
let sport = sport_types
.get(activity_id)
.cloned()
.unwrap_or_else(|| "Unknown".to_string());
tracks_by_sport
.entry(sport)
.or_default()
.push((activity_id.as_str(), points.as_slice()));
}
let mut all_sections: Vec<FrequentSection> = Vec::new();
let mut section_counter = 0;
for (sport_type, sport_tracks) in &tracks_by_sport {
if sport_tracks.len() < config.min_activities as usize {
continue;
}
info!(
"[Sections] Processing {} {} tracks",
sport_tracks.len(),
sport_type
);
let rtree_start = std::time::Instant::now();
let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
.iter()
.map(|(_, pts)| build_rtree(pts))
.collect();
info!(
"[Sections] Built {} R-trees in {}ms",
rtrees.len(),
rtree_start.elapsed().as_millis()
);
let overlap_start = std::time::Instant::now();
let pairs: Vec<(usize, usize)> = (0..sport_tracks.len())
.flat_map(|i| ((i + 1)..sport_tracks.len()).map(move |j| (i, j)))
.collect();
let total_pairs = pairs.len();
#[cfg(feature = "parallel")]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_par_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], config)
})
.collect();
#[cfg(not(feature = "parallel"))]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], config)
})
.collect();
info!(
"[Sections] Found {} pairwise overlaps for {} ({} pairs) in {}ms",
overlaps.len(),
sport_type,
total_pairs,
overlap_start.elapsed().as_millis()
);
let cluster_start = std::time::Instant::now();
let clusters = cluster_overlaps(overlaps, config);
let significant_clusters: Vec<_> = clusters
.into_iter()
.filter(|c| c.activity_ids.len() >= config.min_activities as usize)
.collect();
info!(
"[Sections] {} significant clusters ({}+ activities) for {} in {}ms",
significant_clusters.len(),
config.min_activities,
sport_type,
cluster_start.elapsed().as_millis()
);
let section_convert_start = std::time::Instant::now();
let cluster_data: Vec<_> = significant_clusters.into_iter().enumerate().collect();
#[cfg(feature = "parallel")]
let sport_sections: Vec<FrequentSection> = cluster_data
.into_par_iter()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
&track_map,
&activity_to_route,
config,
None,
)
})
.collect();
#[cfg(not(feature = "parallel"))]
let sport_sections: Vec<FrequentSection> = cluster_data
.into_iter()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
&track_map,
&activity_to_route,
config,
None,
)
})
.collect();
info!(
"[Sections] Converted {} sections for {} in {}ms",
sport_sections.len(),
sport_type,
section_convert_start.elapsed().as_millis()
);
let fold_start = std::time::Instant::now();
let split_sections = split_folding_sections(sport_sections, config);
info!(
"[Sections] After fold splitting: {} sections in {}ms",
split_sections.len(),
fold_start.elapsed().as_millis()
);
let merge_start = std::time::Instant::now();
let merged_sections = merge_nearby_sections(split_sections, config);
info!(
"[Sections] After nearby merge: {} sections in {}ms",
merged_sections.len(),
merge_start.elapsed().as_millis()
);
let dedup_start = std::time::Instant::now();
let deduped_sections = remove_overlapping_sections(merged_sections, config);
info!(
"[Sections] After dedup: {} unique sections in {}ms",
deduped_sections.len(),
dedup_start.elapsed().as_millis()
);
let split_start = std::time::Instant::now();
let final_sections = split_high_variance_sections(deduped_sections, &track_map, config);
info!(
"[Sections] After density splitting: {} sections in {}ms",
final_sections.len(),
split_start.elapsed().as_millis()
);
for (i, mut section) in final_sections.into_iter().enumerate() {
section.id = format!("sec_{}_{}", sport_type.to_lowercase(), section_counter + i);
all_sections.push(section);
}
section_counter += all_sections.len();
}
all_sections.sort_by(|a, b| b.visit_count.cmp(&a.visit_count));
info!("[Sections] Detected {} total sections", all_sections.len());
all_sections
}
pub fn detect_sections_multiscale(
tracks: &[(String, Vec<GpsPoint>)],
sport_types: &HashMap<String, String>,
groups: &[RouteGroup],
config: &SectionConfig,
) -> MultiScaleSectionResult {
info!(
"[MultiScale] Detecting from {} tracks with {} scale presets",
tracks.len(),
config.scale_presets.len()
);
let mut all_sections: Vec<FrequentSection> = Vec::new();
let mut all_potentials: Vec<PotentialSection> = Vec::new();
let mut stats = DetectionStats {
activities_processed: tracks.len() as u32,
overlaps_found: 0,
sections_by_scale: HashMap::new(),
potentials_by_scale: HashMap::new(),
};
if config.scale_presets.is_empty() {
let sections = detect_sections_from_tracks(tracks, sport_types, groups, config);
stats
.sections_by_scale
.insert("legacy".to_string(), sections.len() as u32);
return MultiScaleSectionResult {
sections,
potentials: vec![],
stats,
};
}
let track_map: HashMap<String, Vec<GpsPoint>> = tracks
.iter()
.map(|(id, pts)| (id.clone(), pts.clone()))
.collect();
let significant_groups: Vec<&RouteGroup> = groups
.iter()
.filter(|g| g.activity_ids.len() >= 2)
.collect();
let activity_to_route: HashMap<&str, &str> = significant_groups
.iter()
.flat_map(|g| {
g.activity_ids
.iter()
.map(|aid| (aid.as_str(), g.group_id.as_str()))
})
.collect();
let mut tracks_by_sport: HashMap<String, Vec<(&str, &[GpsPoint])>> = HashMap::new();
for (activity_id, points) in tracks {
let sport = sport_types
.get(activity_id)
.cloned()
.unwrap_or_else(|| "Unknown".to_string());
tracks_by_sport
.entry(sport)
.or_default()
.push((activity_id.as_str(), points.as_slice()));
}
for preset in &config.scale_presets {
info!(
"[MultiScale] Processing {} scale: {}-{}m, min {} activities",
preset.name, preset.min_length, preset.max_length, preset.min_activities
);
let scale_config = SectionConfig {
min_section_length: preset.min_length,
max_section_length: preset.max_length,
min_activities: preset.min_activities,
..config.clone()
};
let mut scale_sections = 0u32;
let mut scale_potentials = 0u32;
for (sport_type, sport_tracks) in &tracks_by_sport {
let min_tracks_for_processing = if config.include_potentials {
1
} else {
preset.min_activities as usize
};
if sport_tracks.len() < min_tracks_for_processing {
continue;
}
let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
.iter()
.map(|(_, pts)| build_rtree(pts))
.collect();
let pairs: Vec<(usize, usize)> = (0..sport_tracks.len())
.flat_map(|i| ((i + 1)..sport_tracks.len()).map(move |j| (i, j)))
.collect();
#[cfg(feature = "parallel")]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_par_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, scale_config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], &scale_config)
})
.collect();
#[cfg(not(feature = "parallel"))]
let overlaps: Vec<FullTrackOverlap> = pairs
.into_iter()
.filter_map(|(i, j)| {
let (id_a, track_a) = sport_tracks[i];
let (id_b, track_b) = sport_tracks[j];
if !bounds_overlap_tracks(track_a, track_b, scale_config.proximity_threshold) {
return None;
}
find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], &scale_config)
})
.collect();
stats.overlaps_found += overlaps.len() as u32;
let clusters = cluster_overlaps(overlaps, &scale_config);
let (significant, potential): (Vec<_>, Vec<_>) = clusters
.into_iter()
.partition(|c| c.activity_ids.len() >= preset.min_activities as usize);
#[cfg(feature = "parallel")]
let sport_sections: Vec<FrequentSection> = significant
.into_par_iter()
.enumerate()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
&track_map,
&activity_to_route,
&scale_config,
Some(&preset.name),
)
})
.collect();
#[cfg(not(feature = "parallel"))]
let sport_sections: Vec<FrequentSection> = significant
.into_iter()
.enumerate()
.filter_map(|(idx, cluster)| {
process_cluster(
idx,
cluster,
sport_type,
&track_map,
&activity_to_route,
&scale_config,
Some(&preset.name),
)
})
.collect();
scale_sections += sport_sections.len() as u32;
all_sections.extend(sport_sections);
if config.include_potentials {
for (idx, cluster) in potential.into_iter().enumerate() {
let activity_count = cluster.activity_ids.len();
if activity_count >= 1 && activity_count < preset.min_activities as usize {
if let Some((_rep_id, rep_polyline)) = Some(select_medoid(&cluster)) {
if !rep_polyline.is_empty() {
let distance = polyline_length(&rep_polyline);
if distance >= preset.min_length && distance <= preset.max_length {
all_potentials.push(PotentialSection {
id: format!(
"pot_{}_{}_{}",
preset.name,
sport_type.to_lowercase(),
idx
),
sport_type: sport_type.to_string(),
polyline: rep_polyline,
activity_ids: cluster.activity_ids.into_iter().collect(),
visit_count: activity_count as u32,
distance_meters: distance,
confidence: 0.3 + (activity_count as f64 * 0.2), scale: preset.name.clone(),
});
scale_potentials += 1;
}
}
}
}
}
}
}
stats
.sections_by_scale
.insert(preset.name.clone(), scale_sections);
stats
.potentials_by_scale
.insert(preset.name.clone(), scale_potentials);
info!(
"[MultiScale] {} scale: {} sections, {} potentials",
preset.name, scale_sections, scale_potentials
);
}
let fold_start = std::time::Instant::now();
let split_sections = split_folding_sections(all_sections, config);
info!(
"[MultiScale] After fold splitting: {} sections in {}ms",
split_sections.len(),
fold_start.elapsed().as_millis()
);
let merge_start = std::time::Instant::now();
let merged_sections = merge_nearby_sections(split_sections, config);
info!(
"[MultiScale] After nearby merge: {} sections in {}ms",
merged_sections.len(),
merge_start.elapsed().as_millis()
);
let dedup_start = std::time::Instant::now();
let deduped_sections = if config.preserve_hierarchy {
remove_overlapping_sections_hierarchical(merged_sections, config)
} else {
remove_overlapping_sections(merged_sections, config)
};
info!(
"[MultiScale] After dedup: {} sections in {}ms",
deduped_sections.len(),
dedup_start.elapsed().as_millis()
);
let split_start = std::time::Instant::now();
let final_sections = split_high_variance_sections(deduped_sections, &track_map, config);
info!(
"[MultiScale] After density splitting: {} sections in {}ms",
final_sections.len(),
split_start.elapsed().as_millis()
);
let mut sorted_sections = final_sections;
sorted_sections.sort_by(|a, b| b.visit_count.cmp(&a.visit_count));
let mut sorted_potentials = all_potentials;
sorted_potentials.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
});
info!(
"[MultiScale] Final: {} sections, {} potentials",
sorted_sections.len(),
sorted_potentials.len()
);
MultiScaleSectionResult {
sections: sorted_sections,
potentials: sorted_potentials,
stats,
}
}
fn remove_overlapping_sections_hierarchical(
mut sections: Vec<FrequentSection>,
config: &SectionConfig,
) -> Vec<FrequentSection> {
if sections.len() <= 1 {
return sections;
}
sections.sort_by(|a, b| {
b.distance_meters
.partial_cmp(&a.distance_meters)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut keep = vec![true; sections.len()];
for i in 0..sections.len() {
if !keep[i] {
continue;
}
for j in (i + 1)..sections.len() {
if !keep[j] {
continue;
}
let containment = compute_polyline_containment(
§ions[j].polyline,
§ions[i].polyline,
config.proximity_threshold,
);
let length_ratio = sections[j].distance_meters / sections[i].distance_meters;
let same_scale = match (§ions[i].scale, §ions[j].scale) {
(Some(a), Some(b)) => a == b,
_ => true, };
if containment > 0.9 && length_ratio > 0.7 && same_scale {
keep[j] = false;
}
}
}
sections
.into_iter()
.zip(keep)
.filter_map(|(s, k)| if k { Some(s) } else { None })
.collect()
}
fn compute_polyline_containment(
polyline_a: &[GpsPoint],
polyline_b: &[GpsPoint],
proximity_threshold: f64,
) -> f64 {
use crate::geo_utils::haversine_distance;
if polyline_a.is_empty() || polyline_b.is_empty() {
return 0.0;
}
let mut contained_count = 0;
for point_a in polyline_a {
let min_dist = polyline_b
.iter()
.map(|point_b| haversine_distance(point_a, point_b))
.fold(f64::MAX, |a, b| a.min(b));
if min_dist <= proximity_threshold {
contained_count += 1;
}
}
contained_count as f64 / polyline_a.len() as f64
}
#[cfg(test)]
mod tests {
use super::*;
use crate::geo_utils::{compute_center, haversine_distance};
use medoid::resample_by_distance;
fn make_point(lat: f64, lng: f64) -> GpsPoint {
GpsPoint::new(lat, lng)
}
#[test]
fn test_haversine_distance() {
let p1 = make_point(51.5074, -0.1278); let p2 = make_point(48.8566, 2.3522); let dist = haversine_distance(&p1, &p2);
assert!(dist > 340_000.0 && dist < 350_000.0);
}
#[test]
fn test_compute_center() {
let points = vec![make_point(0.0, 0.0), make_point(2.0, 2.0)];
let center = compute_center(&points);
assert!((center.latitude - 1.0).abs() < 0.001);
assert!((center.longitude - 1.0).abs() < 0.001);
}
#[test]
fn test_resample_by_distance() {
let points = vec![
make_point(0.0, 0.0),
make_point(0.001, 0.0),
make_point(0.002, 0.0),
make_point(0.003, 0.0),
make_point(0.004, 0.0),
make_point(0.005, 0.0),
make_point(0.006, 0.0),
make_point(0.007, 0.0),
make_point(0.008, 0.0),
make_point(0.009, 0.0),
];
let resampled = resample_by_distance(&points, 5);
assert_eq!(resampled.len(), 5);
}
}