tracematch/sections/
mod.rs

1//! # Adaptive Consensus Section Detection
2//!
3//! Detects frequently-traveled road sections using FULL GPS tracks.
4//! Produces smooth, natural polylines that evolve and refine over time
5//! as more tracks are observed.
6//!
7//! ## Algorithm
8//! 1. Load full GPS tracks (1000s of points per activity)
9//! 2. Find overlapping portions using R-tree spatial indexing
10//! 3. Cluster overlaps that represent the same physical section
11//! 4. Select initial medoid as the starting reference
12//! 5. Compute consensus polyline via weighted averaging of all tracks
13//! 6. Track per-point confidence based on observation density
14//! 7. Adapt section boundaries based on where tracks consistently overlap
15//!
16//! ## Consensus Algorithm
17//! - Normalize all tracks to common parameterization (by distance)
18//! - At each position, collect nearby points from all tracks
19//! - Compute weighted average: weight = 1 / (distance_to_reference + epsilon)
20//! - Higher observation density → higher confidence → tighter future matching
21//!
22//! ## Adaptive Boundaries
23//! - Track where each activity's overlap starts/ends relative to section
24//! - Section can grow if tracks consistently extend beyond current bounds
25//! - Section contracts if tracks consistently end before current bounds
26
27mod consensus;
28mod evolution;
29mod medoid;
30mod optimized;
31mod overlap;
32mod portions;
33mod postprocess;
34mod rtree;
35mod traces;
36
37use crate::matching::calculate_route_distance;
38use crate::{GpsPoint, RouteGroup};
39use log::info;
40#[cfg(feature = "parallel")]
41use rayon::prelude::*;
42use serde::{Deserialize, Serialize};
43use std::collections::{HashMap, HashSet};
44
45#[cfg(feature = "persistence")]
46use crate::persistence::SectionDetectionProgress;
47
48// Re-export internal utilities for use across submodules
49pub(crate) use consensus::compute_consensus_polyline;
50pub(crate) use medoid::select_medoid;
51pub(crate) use overlap::{
52    FullTrackOverlap, OverlapCluster, cluster_overlaps, find_full_track_overlap,
53};
54pub(crate) use portions::compute_activity_portions;
55pub(crate) use postprocess::{
56    consolidate_fragments, filter_low_quality_sections, make_sections_exclusive,
57    merge_nearby_sections, remove_overlapping_sections, split_at_gradient_changes,
58    split_at_heading_changes, split_folding_sections, split_high_variance_sections,
59};
60pub(crate) use rtree::{IndexedPoint, bounds_overlap_tracks, build_rtree};
61pub(crate) use traces::extract_all_activity_traces;
62
63// Re-export evolution functions for public API
64pub use evolution::{
65    SectionUpdateResult, merge_overlapping_sections, update_section_with_new_traces,
66};
67
68// Re-export optimized detection functions
69pub use optimized::{
70    IncrementalResult, SectionMatch, SplitResult, detect_sections_incremental,
71    detect_sections_optimized, find_sections_in_route, recalculate_section_polyline,
72    split_section_at_index, split_section_at_point,
73};
74
75/// Compute initial stability score from consensus metrics.
76/// Stability increases with more observations and tighter spread.
77pub(crate) fn compute_initial_stability(
78    observation_count: u32,
79    average_spread: f64,
80    proximity_threshold: f64,
81) -> f64 {
82    // Observation factor: saturates at 10 observations
83    let obs_factor = (observation_count as f64 / 10.0).min(1.0);
84
85    // Spread factor: lower spread = more stable
86    let spread_factor = 1.0 - (average_spread / proximity_threshold).clamp(0.0, 1.0);
87
88    // Combined stability: weighted average
89    (obs_factor * 0.6 + spread_factor * 0.4).clamp(0.0, 1.0)
90}
91
92/// Scale preset for multi-scale section detection
93#[derive(Debug, Clone, Serialize, Deserialize)]
94#[serde(rename_all = "camelCase")]
95#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
96pub struct ScalePreset {
97    /// Scale name: "short", "medium", "long"
98    pub name: String,
99    /// Minimum section length for this scale (meters)
100    pub min_length: f64,
101    /// Maximum section length for this scale (meters)
102    pub max_length: f64,
103    /// Minimum activities required at this scale (can be lower for short sections)
104    pub min_activities: u32,
105}
106
107impl ScalePreset {
108    pub fn short() -> Self {
109        Self {
110            name: "short".to_string(),
111            min_length: 100.0,
112            max_length: 500.0,
113            min_activities: 2,
114        }
115    }
116
117    pub fn medium() -> Self {
118        Self {
119            name: "medium".to_string(),
120            min_length: 500.0,
121            max_length: 2000.0,
122            min_activities: 2,
123        }
124    }
125
126    pub fn long() -> Self {
127        Self {
128            name: "long".to_string(),
129            min_length: 2000.0,
130            max_length: 5000.0,
131            min_activities: 3,
132        }
133    }
134
135    pub fn default_presets() -> Vec<Self> {
136        vec![Self::short(), Self::medium(), Self::long()]
137    }
138}
139
140/// Configuration for section detection
141#[derive(Debug, Clone, Serialize, Deserialize)]
142#[serde(rename_all = "camelCase")]
143#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
144pub struct SectionConfig {
145    /// Maximum distance between tracks to consider overlapping (meters)
146    pub proximity_threshold: f64,
147    /// Minimum overlap length to consider a section (meters)
148    pub min_section_length: f64,
149    /// Maximum section length (meters) - prevents sections from becoming full routes
150    pub max_section_length: f64,
151    /// Minimum number of activities that must share an overlap
152    pub min_activities: u32,
153    /// Tolerance for clustering similar overlaps (meters)
154    pub cluster_tolerance: f64,
155    /// Number of sample points for AMD comparison (not for output!)
156    pub sample_points: u32,
157    /// Detection mode: "discovery" (lower thresholds) or "conservative"
158    pub detection_mode: String,
159    /// Include potential sections with only 1-2 activities as suggestions
160    pub include_potentials: bool,
161    /// Scale presets for multi-scale detection (empty = single-scale with min/max_section_length)
162    pub scale_presets: Vec<ScalePreset>,
163    /// Preserve hierarchical sections (don't deduplicate short sections inside longer ones)
164    pub preserve_hierarchy: bool,
165}
166
167impl Default for SectionConfig {
168    fn default() -> Self {
169        Self {
170            proximity_threshold: 50.0, // 50m - handles GPS error + wide roads + opposite sides
171            min_section_length: 200.0, // 200m minimum section (used when scale_presets is empty)
172            max_section_length: 5000.0, // 5km max (used when scale_presets is empty)
173            min_activities: 3,         // Need 3+ activities (used when scale_presets is empty)
174            cluster_tolerance: 80.0,   // 80m for clustering similar overlaps
175            sample_points: 50,         // For AMD comparison only
176            detection_mode: "discovery".to_string(),
177            include_potentials: true,
178            scale_presets: ScalePreset::default_presets(),
179            preserve_hierarchy: true,
180        }
181    }
182}
183
184impl SectionConfig {
185    /// Create a discovery-mode config (lower thresholds, more sections)
186    pub fn discovery() -> Self {
187        Self {
188            detection_mode: "discovery".to_string(),
189            include_potentials: true,
190            scale_presets: ScalePreset::default_presets(),
191            preserve_hierarchy: true,
192            ..Default::default()
193        }
194    }
195
196    /// Create a conservative config (higher thresholds, fewer sections)
197    pub fn conservative() -> Self {
198        Self {
199            detection_mode: "conservative".to_string(),
200            include_potentials: false,
201            min_activities: 4,
202            scale_presets: vec![ScalePreset::medium(), ScalePreset::long()],
203            preserve_hierarchy: false,
204            ..Default::default()
205        }
206    }
207
208    /// Create a legacy single-scale config (for backward compatibility)
209    pub fn legacy() -> Self {
210        Self {
211            detection_mode: "legacy".to_string(),
212            include_potentials: false,
213            scale_presets: vec![], // Empty = use min/max_section_length directly
214            preserve_hierarchy: false,
215            min_activities: 3,
216            ..Default::default()
217        }
218    }
219}
220
221/// Each activity's portion of a section (for pace comparison)
222#[derive(Debug, Clone, Serialize, Deserialize)]
223#[serde(rename_all = "camelCase")]
224#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
225pub struct SectionPortion {
226    /// Activity ID
227    #[serde(alias = "activity_id")]
228    pub activity_id: String,
229    /// Start index into the activity's FULL GPS track
230    #[serde(alias = "start_index")]
231    pub start_index: u32,
232    /// End index into the activity's FULL GPS track
233    #[serde(alias = "end_index")]
234    pub end_index: u32,
235    /// Distance of this portion in meters
236    #[serde(alias = "distance_meters")]
237    pub distance_meters: f64,
238    /// Direction relative to representative: "same" or "reverse"
239    pub direction: String,
240}
241
242/// A frequently-traveled section with adaptive consensus representation
243#[derive(Debug, Clone, Serialize, Deserialize)]
244#[serde(rename_all = "camelCase")]
245#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
246pub struct FrequentSection {
247    /// Unique section ID
248    pub id: String,
249    /// Custom name (user-defined, None if not set)
250    pub name: Option<String>,
251    /// Sport type ("Run", "Ride", etc.)
252    #[serde(alias = "sport_type")]
253    pub sport_type: String,
254    /// The consensus polyline - refined from all overlapping tracks
255    /// Initially the medoid, evolves via weighted averaging as more tracks are added
256    pub polyline: Vec<GpsPoint>,
257    /// Which activity provided the initial representative polyline (medoid)
258    #[serde(alias = "representative_activity_id")]
259    pub representative_activity_id: String,
260    /// All activity IDs that traverse this section
261    #[serde(alias = "activity_ids")]
262    pub activity_ids: Vec<String>,
263    /// Each activity's portion (start/end indices, distance, direction)
264    #[serde(alias = "activity_portions")]
265    pub activity_portions: Vec<SectionPortion>,
266    /// Route group IDs that include this section
267    #[serde(alias = "route_ids")]
268    pub route_ids: Vec<String>,
269    /// Number of times traversed
270    #[serde(alias = "visit_count")]
271    pub visit_count: u32,
272    /// Section length in meters
273    #[serde(alias = "distance_meters")]
274    pub distance_meters: f64,
275    /// Pre-computed GPS traces for each activity's overlapping portion
276    /// Key is activity ID, value is the GPS points within proximity of section
277    #[serde(alias = "activity_traces")]
278    pub activity_traces: HashMap<String, Vec<GpsPoint>>,
279    /// Confidence score (0.0-1.0) based on observation density
280    /// Higher confidence = more tracks observed, tighter consensus
281    pub confidence: f64,
282    /// Number of observations (tracks) used to compute consensus
283    #[serde(alias = "observation_count")]
284    pub observation_count: u32,
285    /// Average spread (meters) of track observations from consensus line
286    /// Lower spread = more consistent track alignment
287    #[serde(alias = "average_spread")]
288    pub average_spread: f64,
289    /// Per-point observation density (how many activities pass through each point)
290    /// Used for detecting high-traffic portions that should become separate sections
291    #[serde(alias = "point_density")]
292    pub point_density: Vec<u32>,
293    /// Scale at which this section was detected: "short", "medium", "long", or "legacy"
294    pub scale: Option<String>,
295
296    // === Evolution fields (Phase 3) ===
297    /// Section version - incremented each time the section is updated
298    pub version: u32,
299    /// Whether this section was user-defined (prevents automatic updates)
300    #[serde(alias = "is_user_defined")]
301    pub is_user_defined: bool,
302    /// ISO timestamp when section was created
303    #[serde(alias = "created_at")]
304    pub created_at: Option<String>,
305    /// ISO timestamp when section was last updated
306    #[serde(alias = "updated_at")]
307    pub updated_at: Option<String>,
308    /// Stability score (0.0-1.0) - how stable the consensus has become
309    /// High stability = consensus is well-established, unlikely to change significantly
310    pub stability: f64,
311}
312
313/// A potential section detected from 1-2 activities.
314/// These are suggestions that users can promote to full sections.
315#[derive(Debug, Clone, Serialize, Deserialize)]
316#[serde(rename_all = "camelCase")]
317#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
318pub struct PotentialSection {
319    /// Unique section ID
320    pub id: String,
321    /// Sport type ("Run", "Ride", etc.)
322    #[serde(alias = "sport_type")]
323    pub sport_type: String,
324    /// The polyline from the representative activity
325    pub polyline: Vec<GpsPoint>,
326    /// Activity IDs that traverse this potential section (1-2)
327    #[serde(alias = "activity_ids")]
328    pub activity_ids: Vec<String>,
329    /// Number of times traversed (1-2)
330    #[serde(alias = "visit_count")]
331    pub visit_count: u32,
332    /// Section length in meters
333    #[serde(alias = "distance_meters")]
334    pub distance_meters: f64,
335    /// Confidence score (0.0-1.0), lower than FrequentSection
336    pub confidence: f64,
337    /// Scale at which this was detected
338    pub scale: String,
339}
340
341/// Result of multi-scale section detection
342#[derive(Debug, Clone, Serialize, Deserialize)]
343#[serde(rename_all = "camelCase")]
344#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
345pub struct MultiScaleSectionResult {
346    /// Confirmed sections (min_activities met)
347    pub sections: Vec<FrequentSection>,
348    /// Potential sections (1-2 activities, suggestions for user)
349    pub potentials: Vec<PotentialSection>,
350    /// Statistics about detection
351    pub stats: DetectionStats,
352}
353
354/// Statistics from section detection
355#[derive(Debug, Clone, Serialize, Deserialize)]
356#[serde(rename_all = "camelCase")]
357#[cfg_attr(feature = "ffi", derive(uniffi::Record))]
358pub struct DetectionStats {
359    /// Total activities processed
360    pub activities_processed: u32,
361    /// Total overlaps found across all scales
362    pub overlaps_found: u32,
363    /// Sections per scale
364    pub sections_by_scale: HashMap<String, u32>,
365    /// Potentials per scale
366    pub potentials_by_scale: HashMap<String, u32>,
367}
368
369/// Process a single cluster into a FrequentSection.
370fn process_cluster(
371    idx: usize,
372    cluster: OverlapCluster,
373    sport_type: &str,
374    track_map: &HashMap<String, Vec<GpsPoint>>,
375    activity_to_route: &HashMap<&str, &str>,
376    config: &SectionConfig,
377    scale_name: Option<&str>,
378) -> Option<FrequentSection> {
379    // Select medoid - an ACTUAL GPS trace
380    let (representative_id, representative_polyline) = select_medoid(&cluster);
381
382    if representative_polyline.is_empty() {
383        return None;
384    }
385
386    let distance_meters = calculate_route_distance(&representative_polyline);
387
388    // Filter by max length - sections shouldn't be whole routes
389    if distance_meters > config.max_section_length {
390        return None;
391    }
392
393    // Compute activity portions for pace comparison
394    let activity_portions =
395        compute_activity_portions(&cluster, &representative_polyline, track_map, config);
396
397    // Collect route IDs
398    let route_ids: Vec<String> = cluster
399        .activity_ids
400        .iter()
401        .filter_map(|aid| activity_to_route.get(aid.as_str()).map(|s| s.to_string()))
402        .collect::<HashSet<_>>()
403        .into_iter()
404        .collect();
405
406    // Pre-compute activity traces
407    let activity_id_vec: Vec<String> = cluster.activity_ids.iter().cloned().collect();
408    let activity_traces =
409        extract_all_activity_traces(&activity_id_vec, &representative_polyline, track_map);
410
411    // Collect all traces for consensus computation
412    let all_traces: Vec<Vec<GpsPoint>> = activity_traces.values().cloned().collect();
413
414    // Compute consensus polyline from all overlapping tracks
415    let consensus = compute_consensus_polyline(
416        &representative_polyline,
417        &all_traces,
418        config.proximity_threshold,
419    );
420
421    // Reject sparse sections - a valid LineString requires at least 2 points
422    if consensus.polyline.len() < 2 {
423        return None;
424    }
425
426    // Use consensus polyline and update distance
427    let consensus_distance = calculate_route_distance(&consensus.polyline);
428
429    // Filter by min length - consensus might collapse to fewer points
430    if consensus_distance < config.min_section_length {
431        return None;
432    }
433
434    // Compute stability: more observations + tighter spread = more stable
435    let stability = compute_initial_stability(
436        consensus.observation_count,
437        consensus.average_spread,
438        config.proximity_threshold,
439    );
440
441    // Count activity_ids before moving
442    let activity_count = cluster.activity_ids.len();
443
444    Some(FrequentSection {
445        id: format!("sec_{}_{}", sport_type.to_lowercase(), idx),
446        name: None,
447        sport_type: sport_type.to_string(),
448        polyline: consensus.polyline,
449        representative_activity_id: representative_id,
450        activity_ids: cluster.activity_ids.into_iter().collect(),
451        activity_portions,
452        route_ids,
453        // visit_count should equal unique activities
454        visit_count: activity_count as u32,
455        distance_meters: consensus_distance,
456        activity_traces,
457        confidence: consensus.confidence,
458        observation_count: consensus.observation_count,
459        average_spread: consensus.average_spread,
460        point_density: consensus.point_density,
461        scale: scale_name.map(|s| s.to_string()),
462        // Evolution fields (callers should set created_at if needed)
463        version: 1,
464        is_user_defined: false,
465        created_at: None,
466        updated_at: None,
467        stability,
468    })
469}
470
471/// Detect frequent sections from FULL GPS tracks.
472/// This is the main entry point for section detection.
473pub fn detect_sections_from_tracks(
474    tracks: &[(String, Vec<GpsPoint>)], // (activity_id, full_gps_points)
475    sport_types: &HashMap<String, String>,
476    groups: &[RouteGroup],
477    config: &SectionConfig,
478) -> Vec<FrequentSection> {
479    info!("[Sections] Detecting from {} full GPS tracks", tracks.len());
480
481    if tracks.len() < config.min_activities as usize {
482        return vec![];
483    }
484
485    // Filter to only groups with 2+ activities (these are the ones shown in Routes list)
486    let significant_groups: Vec<&RouteGroup> = groups
487        .iter()
488        .filter(|g| g.activity_ids.len() >= 2)
489        .collect();
490
491    // Build activity_id -> route_id mapping (only for significant groups)
492    let activity_to_route: HashMap<&str, &str> = significant_groups
493        .iter()
494        .flat_map(|g| {
495            g.activity_ids
496                .iter()
497                .map(|aid| (aid.as_str(), g.group_id.as_str()))
498        })
499        .collect();
500
501    // Debug: log the groups we received
502    info!(
503        "[Sections] Received {} groups, {} with 2+ activities, {} total activity mappings",
504        groups.len(),
505        significant_groups.len(),
506        activity_to_route.len()
507    );
508
509    // Build track lookup
510    let track_map: HashMap<String, Vec<GpsPoint>> = tracks
511        .iter()
512        .map(|(id, pts)| (id.clone(), pts.clone()))
513        .collect();
514
515    // Group tracks by sport type
516    let mut tracks_by_sport: HashMap<String, Vec<(&str, &[GpsPoint])>> = HashMap::new();
517    for (activity_id, points) in tracks {
518        let sport = sport_types
519            .get(activity_id)
520            .cloned()
521            .unwrap_or_else(|| "Unknown".to_string());
522        tracks_by_sport
523            .entry(sport)
524            .or_default()
525            .push((activity_id.as_str(), points.as_slice()));
526    }
527
528    let mut all_sections: Vec<FrequentSection> = Vec::new();
529    let mut section_counter = 0;
530
531    // Process each sport type
532    for (sport_type, sport_tracks) in &tracks_by_sport {
533        if sport_tracks.len() < config.min_activities as usize {
534            continue;
535        }
536
537        info!(
538            "[Sections] Processing {} {} tracks",
539            sport_tracks.len(),
540            sport_type
541        );
542
543        let rtree_start = std::time::Instant::now();
544        #[cfg(feature = "parallel")]
545        let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
546            .par_iter()
547            .map(|(_, pts)| build_rtree(pts))
548            .collect();
549
550        #[cfg(not(feature = "parallel"))]
551        let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
552            .iter()
553            .map(|(_, pts)| build_rtree(pts))
554            .collect();
555
556        info!(
557            "[Sections] Built {} R-trees in {}ms",
558            rtrees.len(),
559            rtree_start.elapsed().as_millis()
560        );
561
562        // Find pairwise overlaps - PARALLELIZED with rayon
563        let overlap_start = std::time::Instant::now();
564
565        // Generate all pairs
566        let pairs: Vec<(usize, usize)> = (0..sport_tracks.len())
567            .flat_map(|i| ((i + 1)..sport_tracks.len()).map(move |j| (i, j)))
568            .collect();
569
570        let total_pairs = pairs.len();
571
572        // Process pairs (parallel if feature enabled)
573        #[cfg(feature = "parallel")]
574        let overlaps: Vec<FullTrackOverlap> = pairs
575            .into_par_iter()
576            .filter_map(|(i, j)| {
577                let (id_a, track_a) = sport_tracks[i];
578                let (id_b, track_b) = sport_tracks[j];
579
580                // Quick bounding box check
581                if !bounds_overlap_tracks(track_a, track_b, config.proximity_threshold) {
582                    return None;
583                }
584
585                // Find overlap using R-tree
586                find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], config)
587            })
588            .collect();
589
590        #[cfg(not(feature = "parallel"))]
591        let overlaps: Vec<FullTrackOverlap> = pairs
592            .into_iter()
593            .filter_map(|(i, j)| {
594                let (id_a, track_a) = sport_tracks[i];
595                let (id_b, track_b) = sport_tracks[j];
596
597                // Quick bounding box check
598                if !bounds_overlap_tracks(track_a, track_b, config.proximity_threshold) {
599                    return None;
600                }
601
602                // Find overlap using R-tree
603                find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], config)
604            })
605            .collect();
606
607        info!(
608            "[Sections] Found {} pairwise overlaps for {} ({} pairs) in {}ms",
609            overlaps.len(),
610            sport_type,
611            total_pairs,
612            overlap_start.elapsed().as_millis()
613        );
614
615        // Cluster overlaps
616        let cluster_start = std::time::Instant::now();
617        let clusters = cluster_overlaps(overlaps, config);
618
619        // Filter to clusters with enough activities
620        let significant_clusters: Vec<_> = clusters
621            .into_iter()
622            .filter(|c| c.activity_ids.len() >= config.min_activities as usize)
623            .collect();
624
625        info!(
626            "[Sections] {} significant clusters ({}+ activities) for {} in {}ms",
627            significant_clusters.len(),
628            config.min_activities,
629            sport_type,
630            cluster_start.elapsed().as_millis()
631        );
632
633        // Convert clusters to sections - PARALLELIZED with rayon
634        let section_convert_start = std::time::Instant::now();
635
636        // Prepare data for parallel processing
637        let cluster_data: Vec<_> = significant_clusters.into_iter().enumerate().collect();
638
639        // Process clusters (parallel if feature enabled)
640        #[cfg(feature = "parallel")]
641        let sport_sections: Vec<FrequentSection> = cluster_data
642            .into_par_iter()
643            .filter_map(|(idx, cluster)| {
644                process_cluster(
645                    idx,
646                    cluster,
647                    sport_type,
648                    &track_map,
649                    &activity_to_route,
650                    config,
651                    None,
652                )
653            })
654            .collect();
655
656        #[cfg(not(feature = "parallel"))]
657        let sport_sections: Vec<FrequentSection> = cluster_data
658            .into_iter()
659            .filter_map(|(idx, cluster)| {
660                process_cluster(
661                    idx,
662                    cluster,
663                    sport_type,
664                    &track_map,
665                    &activity_to_route,
666                    config,
667                    None,
668                )
669            })
670            .collect();
671
672        info!(
673            "[Sections] Converted {} sections for {} in {}ms",
674            sport_sections.len(),
675            sport_type,
676            section_convert_start.elapsed().as_millis()
677        );
678
679        // Post-process step 1: Split sections that fold back on themselves (out-and-back)
680        let fold_start = std::time::Instant::now();
681        let fold_sections = split_folding_sections(sport_sections, config);
682        info!(
683            "[Sections] After fold splitting: {} sections in {}ms",
684            fold_sections.len(),
685            fold_start.elapsed().as_millis()
686        );
687
688        // Post-process step 1b: Split sections at heading inflection points
689        let heading_start = std::time::Instant::now();
690        let heading_sections = split_at_heading_changes(fold_sections, config);
691        info!(
692            "[Sections] After heading splitting: {} sections in {}ms",
693            heading_sections.len(),
694            heading_start.elapsed().as_millis()
695        );
696
697        // Post-process step 1c: Split sections at gradient changes (if elevation available)
698        let gradient_start = std::time::Instant::now();
699        let gradient_sections = split_at_gradient_changes(heading_sections, config);
700        info!(
701            "[Sections] After gradient splitting: {} sections in {}ms",
702            gradient_sections.len(),
703            gradient_start.elapsed().as_millis()
704        );
705
706        // Post-process step 2: Merge sections that are nearby (reversed, parallel, GPS drift)
707        let merge_start = std::time::Instant::now();
708        let merged_sections = merge_nearby_sections(gradient_sections, config);
709        info!(
710            "[Sections] After nearby merge: {} sections in {}ms",
711            merged_sections.len(),
712            merge_start.elapsed().as_millis()
713        );
714
715        // Post-process step 3: Remove sections that contain or are contained by others
716        let dedup_start = std::time::Instant::now();
717        let deduped_sections = remove_overlapping_sections(merged_sections, config);
718        info!(
719            "[Sections] After dedup: {} unique sections in {}ms",
720            deduped_sections.len(),
721            dedup_start.elapsed().as_millis()
722        );
723
724        // Post-process step 4: Split sections with high-traffic portions
725        // This creates new sections from portions that are used by many activities
726        let split_start = std::time::Instant::now();
727        let final_sections = split_high_variance_sections(deduped_sections, &track_map, config);
728        info!(
729            "[Sections] After density splitting: {} sections in {}ms",
730            final_sections.len(),
731            split_start.elapsed().as_millis()
732        );
733
734        // Re-number sections
735        for (i, mut section) in final_sections.into_iter().enumerate() {
736            section.id = format!("sec_{}_{}", sport_type.to_lowercase(), section_counter + i);
737            all_sections.push(section);
738        }
739        section_counter += all_sections.len();
740    }
741
742    // Sort by visit count (most visited first)
743    all_sections.sort_by(|a, b| b.visit_count.cmp(&a.visit_count));
744
745    info!("[Sections] Detected {} total sections", all_sections.len());
746
747    all_sections
748}
749
750/// Result from processing a single scale preset
751struct ScaleResult {
752    sections: Vec<FrequentSection>,
753    potentials: Vec<PotentialSection>,
754    overlaps_found: u32,
755    scale_name: String,
756}
757
758/// Process a single scale preset (extracted for parallel execution)
759fn process_scale_preset(
760    preset: &ScalePreset,
761    tracks_by_sport: &HashMap<String, Vec<(&str, &[GpsPoint])>>,
762    track_map: &HashMap<String, Vec<GpsPoint>>,
763    activity_to_route: &HashMap<&str, &str>,
764    config: &SectionConfig,
765) -> ScaleResult {
766    info!(
767        "[MultiScale] Processing {} scale: {}-{}m, min {} activities",
768        preset.name, preset.min_length, preset.max_length, preset.min_activities
769    );
770
771    let scale_config = SectionConfig {
772        min_section_length: preset.min_length,
773        max_section_length: preset.max_length,
774        min_activities: preset.min_activities,
775        ..config.clone()
776    };
777
778    let mut scale_sections: Vec<FrequentSection> = Vec::new();
779    let mut scale_potentials: Vec<PotentialSection> = Vec::new();
780    let mut overlaps_found = 0u32;
781
782    // Process each sport type
783    for (sport_type, sport_tracks) in tracks_by_sport {
784        // For potentials, we only need 1 activity; for sections, use preset.min_activities
785        let min_tracks_for_processing = if config.include_potentials {
786            1
787        } else {
788            preset.min_activities as usize
789        };
790        if sport_tracks.len() < min_tracks_for_processing {
791            continue;
792        }
793
794        #[cfg(feature = "parallel")]
795        let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
796            .par_iter()
797            .map(|(_, pts)| build_rtree(pts))
798            .collect();
799
800        #[cfg(not(feature = "parallel"))]
801        let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
802            .iter()
803            .map(|(_, pts)| build_rtree(pts))
804            .collect();
805
806        // Generate pairs and find overlaps
807        let pairs: Vec<(usize, usize)> = (0..sport_tracks.len())
808            .flat_map(|i| ((i + 1)..sport_tracks.len()).map(move |j| (i, j)))
809            .collect();
810
811        #[cfg(feature = "parallel")]
812        let overlaps: Vec<FullTrackOverlap> = pairs
813            .into_par_iter()
814            .filter_map(|(i, j)| {
815                let (id_a, track_a) = sport_tracks[i];
816                let (id_b, track_b) = sport_tracks[j];
817                if !bounds_overlap_tracks(track_a, track_b, scale_config.proximity_threshold) {
818                    return None;
819                }
820                find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], &scale_config)
821            })
822            .collect();
823
824        #[cfg(not(feature = "parallel"))]
825        let overlaps: Vec<FullTrackOverlap> = pairs
826            .into_iter()
827            .filter_map(|(i, j)| {
828                let (id_a, track_a) = sport_tracks[i];
829                let (id_b, track_b) = sport_tracks[j];
830                if !bounds_overlap_tracks(track_a, track_b, scale_config.proximity_threshold) {
831                    return None;
832                }
833                find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], &scale_config)
834            })
835            .collect();
836
837        overlaps_found += overlaps.len() as u32;
838
839        // Cluster overlaps
840        let clusters = cluster_overlaps(overlaps, &scale_config);
841
842        // Separate into confirmed sections and potential sections
843        let (significant, potential): (Vec<_>, Vec<_>) = clusters
844            .into_iter()
845            .partition(|c| c.activity_ids.len() >= preset.min_activities as usize);
846
847        // Process confirmed sections
848        #[cfg(feature = "parallel")]
849        let sport_sections: Vec<FrequentSection> = significant
850            .into_par_iter()
851            .enumerate()
852            .filter_map(|(idx, cluster)| {
853                process_cluster(
854                    idx,
855                    cluster,
856                    sport_type,
857                    track_map,
858                    activity_to_route,
859                    &scale_config,
860                    Some(&preset.name),
861                )
862            })
863            .collect();
864
865        #[cfg(not(feature = "parallel"))]
866        let sport_sections: Vec<FrequentSection> = significant
867            .into_iter()
868            .enumerate()
869            .filter_map(|(idx, cluster)| {
870                process_cluster(
871                    idx,
872                    cluster,
873                    sport_type,
874                    track_map,
875                    activity_to_route,
876                    &scale_config,
877                    Some(&preset.name),
878                )
879            })
880            .collect();
881
882        scale_sections.extend(sport_sections);
883
884        // Process potential sections if enabled
885        if config.include_potentials {
886            for (idx, cluster) in potential.into_iter().enumerate() {
887                // Only include clusters with 1-2 activities
888                let activity_count = cluster.activity_ids.len();
889                if activity_count >= 1
890                    && activity_count < preset.min_activities as usize
891                    && let Some((_rep_id, rep_polyline)) = Some(select_medoid(&cluster))
892                    && !rep_polyline.is_empty()
893                {
894                    let distance = calculate_route_distance(&rep_polyline);
895                    if distance >= preset.min_length && distance <= preset.max_length {
896                        scale_potentials.push(PotentialSection {
897                            id: format!(
898                                "pot_{}_{}_{}",
899                                preset.name,
900                                sport_type.to_lowercase(),
901                                idx
902                            ),
903                            sport_type: sport_type.to_string(),
904                            polyline: rep_polyline,
905                            activity_ids: cluster.activity_ids.into_iter().collect(),
906                            visit_count: activity_count as u32,
907                            distance_meters: distance,
908                            confidence: 0.3 + (activity_count as f64 * 0.2), // 0.5 for 1, 0.7 for 2
909                            scale: preset.name.clone(),
910                        });
911                    }
912                }
913            }
914        }
915    }
916
917    info!(
918        "[MultiScale] {} scale: {} sections, {} potentials",
919        preset.name,
920        scale_sections.len(),
921        scale_potentials.len()
922    );
923
924    ScaleResult {
925        sections: scale_sections,
926        potentials: scale_potentials,
927        overlaps_found,
928        scale_name: preset.name.clone(),
929    }
930}
931
932pub fn detect_sections_multiscale(
933    tracks: &[(String, Vec<GpsPoint>)],
934    sport_types: &HashMap<String, String>,
935    groups: &[RouteGroup],
936    config: &SectionConfig,
937) -> MultiScaleSectionResult {
938    info!(
939        "[MultiScale] Detecting from {} tracks with {} scale presets",
940        tracks.len(),
941        config.scale_presets.len()
942    );
943
944    let mut stats = DetectionStats {
945        activities_processed: tracks.len() as u32,
946        overlaps_found: 0,
947        sections_by_scale: HashMap::new(),
948        potentials_by_scale: HashMap::new(),
949    };
950
951    // If no scale presets, fall back to legacy single-scale detection
952    if config.scale_presets.is_empty() {
953        let sections = detect_sections_from_tracks(tracks, sport_types, groups, config);
954        stats
955            .sections_by_scale
956            .insert("legacy".to_string(), sections.len() as u32);
957        return MultiScaleSectionResult {
958            sections,
959            potentials: vec![],
960            stats,
961        };
962    }
963
964    // Build shared data structures once
965    let track_map: HashMap<String, Vec<GpsPoint>> = tracks
966        .iter()
967        .map(|(id, pts)| (id.clone(), pts.clone()))
968        .collect();
969
970    let significant_groups: Vec<&RouteGroup> = groups
971        .iter()
972        .filter(|g| g.activity_ids.len() >= 2)
973        .collect();
974
975    let activity_to_route: HashMap<&str, &str> = significant_groups
976        .iter()
977        .flat_map(|g| {
978            g.activity_ids
979                .iter()
980                .map(|aid| (aid.as_str(), g.group_id.as_str()))
981        })
982        .collect();
983
984    // Group tracks by sport type
985    let mut tracks_by_sport: HashMap<String, Vec<(&str, &[GpsPoint])>> = HashMap::new();
986    for (activity_id, points) in tracks {
987        let sport = sport_types
988            .get(activity_id)
989            .cloned()
990            .unwrap_or_else(|| "Unknown".to_string());
991        tracks_by_sport
992            .entry(sport)
993            .or_default()
994            .push((activity_id.as_str(), points.as_slice()));
995    }
996
997    // Process all scale presets IN PARALLEL
998    #[cfg(feature = "parallel")]
999    let scale_results: Vec<ScaleResult> = config
1000        .scale_presets
1001        .par_iter()
1002        .map(|preset| {
1003            process_scale_preset(
1004                preset,
1005                &tracks_by_sport,
1006                &track_map,
1007                &activity_to_route,
1008                config,
1009            )
1010        })
1011        .collect();
1012
1013    #[cfg(not(feature = "parallel"))]
1014    let scale_results: Vec<ScaleResult> = config
1015        .scale_presets
1016        .iter()
1017        .map(|preset| {
1018            process_scale_preset(
1019                preset,
1020                &tracks_by_sport,
1021                &track_map,
1022                &activity_to_route,
1023                config,
1024            )
1025        })
1026        .collect();
1027
1028    // Merge results from all scales
1029    let mut all_sections: Vec<FrequentSection> = Vec::new();
1030    let mut all_potentials: Vec<PotentialSection> = Vec::new();
1031
1032    for result in scale_results {
1033        stats.overlaps_found += result.overlaps_found;
1034        stats
1035            .sections_by_scale
1036            .insert(result.scale_name.clone(), result.sections.len() as u32);
1037        stats
1038            .potentials_by_scale
1039            .insert(result.scale_name, result.potentials.len() as u32);
1040        all_sections.extend(result.sections);
1041        all_potentials.extend(result.potentials);
1042    }
1043
1044    // Apply post-processing
1045    let fold_start = std::time::Instant::now();
1046    let fold_sections = split_folding_sections(all_sections, config);
1047    info!(
1048        "[MultiScale] After fold splitting: {} sections in {}ms",
1049        fold_sections.len(),
1050        fold_start.elapsed().as_millis()
1051    );
1052
1053    // Split at heading inflection points
1054    let heading_start = std::time::Instant::now();
1055    let heading_sections = split_at_heading_changes(fold_sections, config);
1056    info!(
1057        "[MultiScale] After heading splitting: {} sections in {}ms",
1058        heading_sections.len(),
1059        heading_start.elapsed().as_millis()
1060    );
1061
1062    // Split at gradient changes (if elevation available)
1063    let gradient_start = std::time::Instant::now();
1064    let gradient_sections = split_at_gradient_changes(heading_sections, config);
1065    info!(
1066        "[MultiScale] After gradient splitting: {} sections in {}ms",
1067        gradient_sections.len(),
1068        gradient_start.elapsed().as_millis()
1069    );
1070
1071    let merge_start = std::time::Instant::now();
1072    let merged_sections = merge_nearby_sections(gradient_sections, config);
1073    info!(
1074        "[MultiScale] After nearby merge: {} sections in {}ms",
1075        merged_sections.len(),
1076        merge_start.elapsed().as_millis()
1077    );
1078
1079    // Use hierarchical deduplication if preserve_hierarchy is true
1080    let dedup_start = std::time::Instant::now();
1081    let deduped_sections = if config.preserve_hierarchy {
1082        remove_overlapping_sections_hierarchical(merged_sections, config)
1083    } else {
1084        remove_overlapping_sections(merged_sections, config)
1085    };
1086    info!(
1087        "[MultiScale] After dedup: {} sections in {}ms",
1088        deduped_sections.len(),
1089        dedup_start.elapsed().as_millis()
1090    );
1091
1092    let split_start = std::time::Instant::now();
1093    let final_sections = split_high_variance_sections(deduped_sections, &track_map, config);
1094    info!(
1095        "[MultiScale] After density splitting: {} sections in {}ms",
1096        final_sections.len(),
1097        split_start.elapsed().as_millis()
1098    );
1099
1100    // Sort sections by visit count
1101    let mut sorted_sections = final_sections;
1102    sorted_sections.sort_by(|a, b| b.visit_count.cmp(&a.visit_count));
1103
1104    // Sort potentials by confidence
1105    let mut sorted_potentials = all_potentials;
1106    sorted_potentials.sort_by(|a, b| {
1107        b.confidence
1108            .partial_cmp(&a.confidence)
1109            .unwrap_or(std::cmp::Ordering::Equal)
1110    });
1111
1112    info!(
1113        "[MultiScale] Final: {} sections, {} potentials",
1114        sorted_sections.len(),
1115        sorted_potentials.len()
1116    );
1117
1118    MultiScaleSectionResult {
1119        sections: sorted_sections,
1120        potentials: sorted_potentials,
1121        stats,
1122    }
1123}
1124
1125/// Detect sections at multiple scales with progress reporting.
1126/// This version accepts a progress tracker for reporting status to the UI.
1127#[cfg(feature = "persistence")]
1128pub fn detect_sections_multiscale_with_progress(
1129    tracks: &[(String, Vec<GpsPoint>)],
1130    sport_types: &HashMap<String, String>,
1131    groups: &[RouteGroup],
1132    config: &SectionConfig,
1133    progress: &SectionDetectionProgress,
1134) -> MultiScaleSectionResult {
1135    info!(
1136        "[MultiScale] Detecting from {} tracks with {} scale presets (with progress)",
1137        tracks.len(),
1138        config.scale_presets.len()
1139    );
1140
1141    let mut all_sections: Vec<FrequentSection> = Vec::new();
1142    let mut all_potentials: Vec<PotentialSection> = Vec::new();
1143    let mut stats = DetectionStats {
1144        activities_processed: tracks.len() as u32,
1145        overlaps_found: 0,
1146        sections_by_scale: HashMap::new(),
1147        potentials_by_scale: HashMap::new(),
1148    };
1149
1150    // If no scale presets, fall back to legacy single-scale detection
1151    if config.scale_presets.is_empty() {
1152        progress.set_phase("finding_overlaps", tracks.len() as u32);
1153        let sections = detect_sections_from_tracks(tracks, sport_types, groups, config);
1154        stats
1155            .sections_by_scale
1156            .insert("legacy".to_string(), sections.len() as u32);
1157        progress.set_phase("complete", 0);
1158        return MultiScaleSectionResult {
1159            sections,
1160            potentials: vec![],
1161            stats,
1162        };
1163    }
1164
1165    // Group tracks by sport type
1166    let mut tracks_by_sport: HashMap<String, Vec<(&str, &[GpsPoint])>> = HashMap::new();
1167    for (activity_id, points) in tracks {
1168        let sport = sport_types
1169            .get(activity_id)
1170            .cloned()
1171            .unwrap_or_else(|| "Unknown".to_string());
1172        tracks_by_sport
1173            .entry(sport)
1174            .or_default()
1175            .push((activity_id.as_str(), points.as_slice()));
1176    }
1177
1178    // Build track lookup and activity_to_route mapping
1179    let track_map: HashMap<String, Vec<GpsPoint>> = tracks
1180        .iter()
1181        .map(|(id, pts)| (id.clone(), pts.clone()))
1182        .collect();
1183
1184    let significant_groups: Vec<&RouteGroup> = groups
1185        .iter()
1186        .filter(|g| g.activity_ids.len() >= 2)
1187        .collect();
1188
1189    let activity_to_route: HashMap<&str, &str> = significant_groups
1190        .iter()
1191        .flat_map(|g| {
1192            g.activity_ids
1193                .iter()
1194                .map(|aid| (aid.as_str(), g.group_id.as_str()))
1195        })
1196        .collect();
1197
1198    let total_presets = config.scale_presets.len();
1199    let mut section_counter = 0;
1200
1201    // Process each scale preset with progress updates
1202    for (preset_idx, preset) in config.scale_presets.iter().enumerate() {
1203        progress.set_phase(&format!("scale_{}", preset.name), total_presets as u32);
1204        progress
1205            .completed
1206            .store(preset_idx as u32, std::sync::atomic::Ordering::Relaxed);
1207
1208        info!(
1209            "[MultiScale] Processing {} scale ({}-{}m)",
1210            preset.name, preset.min_length, preset.max_length
1211        );
1212
1213        // Create scale-specific config
1214        let mut scale_config = config.clone();
1215        scale_config.min_section_length = preset.min_length;
1216        scale_config.max_section_length = preset.max_length;
1217        scale_config.min_activities = preset.min_activities;
1218
1219        let mut scale_sections = 0u32;
1220        let mut scale_potentials = 0u32;
1221
1222        // Process each sport type
1223        for (sport_type, sport_tracks) in &tracks_by_sport {
1224            let min_tracks_for_processing = if config.include_potentials {
1225                1
1226            } else {
1227                preset.min_activities as usize
1228            };
1229            if sport_tracks.len() < min_tracks_for_processing {
1230                continue;
1231            }
1232
1233            // Update progress for R-tree building
1234            progress.set_phase("building_rtrees", sport_tracks.len() as u32);
1235
1236            #[cfg(feature = "parallel")]
1237            let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
1238                .par_iter()
1239                .map(|(_, pts)| build_rtree(pts))
1240                .collect();
1241
1242            #[cfg(not(feature = "parallel"))]
1243            let rtrees: Vec<rstar::RTree<IndexedPoint>> = sport_tracks
1244                .iter()
1245                .enumerate()
1246                .map(|(i, (_, pts))| {
1247                    progress
1248                        .completed
1249                        .store(i as u32, std::sync::atomic::Ordering::Relaxed);
1250                    build_rtree(pts)
1251                })
1252                .collect();
1253
1254            // Generate pairs and find overlaps
1255            let pairs: Vec<(usize, usize)> = (0..sport_tracks.len())
1256                .flat_map(|i| ((i + 1)..sport_tracks.len()).map(move |j| (i, j)))
1257                .collect();
1258
1259            let total_pairs = pairs.len() as u32;
1260            progress.set_phase("finding_overlaps", total_pairs);
1261
1262            #[cfg(feature = "parallel")]
1263            let overlaps: Vec<FullTrackOverlap> = {
1264                use std::sync::atomic::AtomicU32;
1265                let counter = AtomicU32::new(0);
1266
1267                pairs
1268                    .into_par_iter()
1269                    .filter_map(|(i, j)| {
1270                        let count = counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1271                        if count.is_multiple_of(100) {
1272                            progress
1273                                .completed
1274                                .store(count, std::sync::atomic::Ordering::Relaxed);
1275                        }
1276
1277                        let (id_a, track_a) = sport_tracks[i];
1278                        let (id_b, track_b) = sport_tracks[j];
1279                        if !bounds_overlap_tracks(
1280                            track_a,
1281                            track_b,
1282                            scale_config.proximity_threshold,
1283                        ) {
1284                            return None;
1285                        }
1286                        find_full_track_overlap(
1287                            id_a,
1288                            track_a,
1289                            id_b,
1290                            track_b,
1291                            &rtrees[j],
1292                            &scale_config,
1293                        )
1294                    })
1295                    .collect()
1296            };
1297
1298            #[cfg(not(feature = "parallel"))]
1299            let overlaps: Vec<FullTrackOverlap> = pairs
1300                .into_iter()
1301                .enumerate()
1302                .filter_map(|(idx, (i, j))| {
1303                    if idx % 50 == 0 {
1304                        progress
1305                            .completed
1306                            .store(idx as u32, std::sync::atomic::Ordering::Relaxed);
1307                    }
1308
1309                    let (id_a, track_a) = sport_tracks[i];
1310                    let (id_b, track_b) = sport_tracks[j];
1311                    if !bounds_overlap_tracks(track_a, track_b, scale_config.proximity_threshold) {
1312                        return None;
1313                    }
1314                    find_full_track_overlap(id_a, track_a, id_b, track_b, &rtrees[j], &scale_config)
1315                })
1316                .collect();
1317
1318            stats.overlaps_found += overlaps.len() as u32;
1319
1320            // Cluster overlaps
1321            progress.set_phase("clustering", overlaps.len() as u32);
1322            let clusters = cluster_overlaps(overlaps, &scale_config);
1323
1324            // Separate into confirmed sections and potential sections
1325            let (significant, potential): (Vec<_>, Vec<_>) = clusters
1326                .into_iter()
1327                .partition(|c| c.activity_ids.len() >= preset.min_activities as usize);
1328
1329            // Process confirmed sections
1330            let cluster_count = significant.len() as u32;
1331            progress.set_phase("building_sections", cluster_count);
1332
1333            #[cfg(feature = "parallel")]
1334            let sport_sections_vec: Vec<FrequentSection> = {
1335                use std::sync::atomic::AtomicU32;
1336                let counter = AtomicU32::new(0);
1337
1338                significant
1339                    .into_par_iter()
1340                    .enumerate()
1341                    .filter_map(|(idx, cluster)| {
1342                        let count = counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1343                        progress
1344                            .completed
1345                            .store(count, std::sync::atomic::Ordering::Relaxed);
1346
1347                        process_cluster(
1348                            idx,
1349                            cluster,
1350                            sport_type,
1351                            &track_map,
1352                            &activity_to_route,
1353                            &scale_config,
1354                            Some(&preset.name),
1355                        )
1356                    })
1357                    .collect()
1358            };
1359
1360            #[cfg(not(feature = "parallel"))]
1361            let sport_sections_vec: Vec<FrequentSection> = significant
1362                .into_iter()
1363                .enumerate()
1364                .filter_map(|(idx, cluster)| {
1365                    progress
1366                        .completed
1367                        .store(idx as u32, std::sync::atomic::Ordering::Relaxed);
1368                    process_cluster(
1369                        idx,
1370                        cluster,
1371                        sport_type,
1372                        &track_map,
1373                        &activity_to_route,
1374                        &scale_config,
1375                        Some(&preset.name),
1376                    )
1377                })
1378                .collect();
1379
1380            scale_sections += sport_sections_vec.len() as u32;
1381
1382            // Re-number sections
1383            for (i, mut section) in sport_sections_vec.into_iter().enumerate() {
1384                section.id = format!(
1385                    "sec_{}_{}_{}_{}",
1386                    sport_type.to_lowercase(),
1387                    preset.name,
1388                    section_counter,
1389                    i
1390                );
1391                all_sections.push(section);
1392            }
1393            section_counter += 1;
1394
1395            // Process potential sections if enabled
1396            if config.include_potentials {
1397                for (idx, cluster) in potential.into_iter().enumerate() {
1398                    // Only include clusters with 1-2 activities
1399                    let activity_count = cluster.activity_ids.len();
1400                    if activity_count >= 1
1401                        && activity_count < preset.min_activities as usize
1402                        && let Some((_rep_id, rep_polyline)) = Some(select_medoid(&cluster))
1403                        && !rep_polyline.is_empty()
1404                    {
1405                        let distance = calculate_route_distance(&rep_polyline);
1406                        if distance >= preset.min_length && distance <= preset.max_length {
1407                            all_potentials.push(PotentialSection {
1408                                id: format!(
1409                                    "pot_{}_{}_{}",
1410                                    preset.name,
1411                                    sport_type.to_lowercase(),
1412                                    idx
1413                                ),
1414                                sport_type: sport_type.to_string(),
1415                                polyline: rep_polyline,
1416                                activity_ids: cluster.activity_ids.into_iter().collect(),
1417                                visit_count: activity_count as u32,
1418                                distance_meters: distance,
1419                                confidence: 0.3 + (activity_count as f64 * 0.2),
1420                                scale: preset.name.clone(),
1421                            });
1422                            scale_potentials += 1;
1423                        }
1424                    }
1425                }
1426            }
1427        }
1428
1429        stats
1430            .sections_by_scale
1431            .insert(preset.name.clone(), scale_sections);
1432        stats
1433            .potentials_by_scale
1434            .insert(preset.name.clone(), scale_potentials);
1435    }
1436
1437    // Post-processing
1438    progress.set_phase("postprocessing", 4);
1439
1440    let fold_start = std::time::Instant::now();
1441    let split_sections = split_folding_sections(all_sections, config);
1442    progress.increment();
1443    info!(
1444        "[MultiScale] After fold splitting: {} sections in {}ms",
1445        split_sections.len(),
1446        fold_start.elapsed().as_millis()
1447    );
1448
1449    let merge_start = std::time::Instant::now();
1450    let merged_sections = merge_nearby_sections(split_sections, config);
1451    progress.increment();
1452    info!(
1453        "[MultiScale] After nearby merge: {} sections in {}ms",
1454        merged_sections.len(),
1455        merge_start.elapsed().as_millis()
1456    );
1457
1458    let dedup_start = std::time::Instant::now();
1459    let deduped_sections = remove_overlapping_sections_hierarchical(merged_sections, config);
1460    progress.increment();
1461    info!(
1462        "[MultiScale] After hierarchical dedup: {} sections in {}ms",
1463        deduped_sections.len(),
1464        dedup_start.elapsed().as_millis()
1465    );
1466
1467    let split_start = std::time::Instant::now();
1468    let final_sections = split_high_variance_sections(deduped_sections, &track_map, config);
1469    progress.increment();
1470    info!(
1471        "[MultiScale] After density splitting: {} sections in {}ms",
1472        final_sections.len(),
1473        split_start.elapsed().as_millis()
1474    );
1475
1476    // Sort sections by visit count
1477    let mut sorted_sections = final_sections;
1478    sorted_sections.sort_by(|a, b| b.visit_count.cmp(&a.visit_count));
1479
1480    // Sort potentials by confidence
1481    let mut sorted_potentials = all_potentials;
1482    sorted_potentials.sort_by(|a, b| {
1483        b.confidence
1484            .partial_cmp(&a.confidence)
1485            .unwrap_or(std::cmp::Ordering::Equal)
1486    });
1487
1488    info!(
1489        "[MultiScale] Final: {} sections, {} potentials",
1490        sorted_sections.len(),
1491        sorted_potentials.len()
1492    );
1493
1494    progress.set_phase("complete", 0);
1495
1496    MultiScaleSectionResult {
1497        sections: sorted_sections,
1498        potentials: sorted_potentials,
1499        stats,
1500    }
1501}
1502
1503/// Deduplication that preserves hierarchical sections.
1504/// Short sections inside longer ones are kept if they're at different scales.
1505fn remove_overlapping_sections_hierarchical(
1506    mut sections: Vec<FrequentSection>,
1507    config: &SectionConfig,
1508) -> Vec<FrequentSection> {
1509    if sections.len() <= 1 {
1510        return sections;
1511    }
1512
1513    // Sort by length descending
1514    sections.sort_by(|a, b| {
1515        b.distance_meters
1516            .partial_cmp(&a.distance_meters)
1517            .unwrap_or(std::cmp::Ordering::Equal)
1518    });
1519
1520    // PRE-COMPUTE all R-trees once upfront (O(k) builds instead of O(k²))
1521    #[cfg(feature = "parallel")]
1522    let rtrees: Vec<rstar::RTree<IndexedPoint>> = {
1523        use rayon::prelude::*;
1524        sections
1525            .par_iter()
1526            .map(|s| build_rtree(&s.polyline))
1527            .collect()
1528    };
1529
1530    #[cfg(not(feature = "parallel"))]
1531    let rtrees: Vec<rstar::RTree<IndexedPoint>> =
1532        sections.iter().map(|s| build_rtree(&s.polyline)).collect();
1533
1534    let mut keep = vec![true; sections.len()];
1535
1536    for i in 0..sections.len() {
1537        if !keep[i] {
1538            continue;
1539        }
1540
1541        let tree_i = &rtrees[i]; // Use pre-computed R-tree (O(1) lookup)
1542
1543        for j in (i + 1)..sections.len() {
1544            if !keep[j] {
1545                continue;
1546            }
1547
1548            // Check if shorter section (j) is contained in longer section (i)
1549            let containment = compute_polyline_containment_with_rtree(
1550                &sections[j].polyline,
1551                tree_i,
1552                config.proximity_threshold,
1553            );
1554
1555            // Length ratio
1556            let length_ratio = sections[j].distance_meters / sections[i].distance_meters;
1557
1558            // Only remove if:
1559            // 1. >90% contained
1560            // 2. Same length class (ratio > 0.7) - meaning it's a true duplicate, not hierarchical
1561            // 3. Same scale OR no scale info
1562            let same_scale = match (&sections[i].scale, &sections[j].scale) {
1563                (Some(a), Some(b)) => a == b,
1564                _ => true, // If either has no scale, treat as same
1565            };
1566
1567            if containment > 0.9 && length_ratio > 0.7 && same_scale {
1568                keep[j] = false;
1569            }
1570        }
1571    }
1572
1573    sections
1574        .into_iter()
1575        .zip(keep)
1576        .filter_map(|(s, k)| if k { Some(s) } else { None })
1577        .collect()
1578}
1579
1580/// Compute what fraction of polyline A is contained within proximity of polyline B (using R-tree).
1581/// O(a * log b) instead of O(a * b).
1582fn compute_polyline_containment_with_rtree(
1583    polyline_a: &[GpsPoint],
1584    tree_b: &rstar::RTree<IndexedPoint>,
1585    proximity_threshold: f64,
1586) -> f64 {
1587    use rstar::PointDistance;
1588
1589    if polyline_a.is_empty() {
1590        return 0.0;
1591    }
1592
1593    let threshold_deg = proximity_threshold / 111_000.0;
1594    let threshold_deg_sq = threshold_deg * threshold_deg;
1595
1596    let mut contained_count = 0;
1597    for point_a in polyline_a {
1598        let query = [point_a.latitude, point_a.longitude];
1599        if let Some(nearest) = tree_b.nearest_neighbor(&query)
1600            && nearest.distance_2(&query) <= threshold_deg_sq
1601        {
1602            contained_count += 1;
1603        }
1604    }
1605
1606    contained_count as f64 / polyline_a.len() as f64
1607}
1608
1609/// Compute what fraction of polyline A is contained within proximity of polyline B
1610/// (Brute-force version - kept for backward compatibility)
1611#[allow(dead_code)]
1612fn compute_polyline_containment(
1613    polyline_a: &[GpsPoint],
1614    polyline_b: &[GpsPoint],
1615    proximity_threshold: f64,
1616) -> f64 {
1617    use crate::geo_utils::haversine_distance;
1618
1619    if polyline_a.is_empty() || polyline_b.is_empty() {
1620        return 0.0;
1621    }
1622
1623    let mut contained_count = 0;
1624    for point_a in polyline_a {
1625        let min_dist = polyline_b
1626            .iter()
1627            .map(|point_b| haversine_distance(point_a, point_b))
1628            .fold(f64::MAX, |a, b| a.min(b));
1629
1630        if min_dist <= proximity_threshold {
1631            contained_count += 1;
1632        }
1633    }
1634
1635    contained_count as f64 / polyline_a.len() as f64
1636}
tracematch/sections/mod.rs

tracematch/sections/
mod.rs