cortex_runtime/navigation/
cluster.rs1use crate::map::types::{PageType, SiteMap, FEATURE_DIM};
4
5pub fn compute_clusters(map: &mut SiteMap) {
10 let n = map.nodes.len();
11 if n == 0 {
12 return;
13 }
14
15 let k = if n < 30 {
16 1.max(n / 3)
17 } else {
18 3.max(((n as f64 / 10.0).sqrt()) as usize)
19 };
20 let k = k.min(n);
21
22 let mut centroids: Vec<[f32; FEATURE_DIM]> = Vec::with_capacity(k);
24 for i in 0..k {
25 let idx = i * n / k;
26 centroids.push(map.features[idx]);
27 }
28
29 let mut assignments = vec![0u16; n];
30
31 for _ in 0..20 {
33 let mut changed = false;
34
35 for (i, feat) in map.features.iter().enumerate() {
37 let mut best_cluster = 0u16;
38 let mut best_dist = f32::MAX;
39 for (c, centroid) in centroids.iter().enumerate() {
40 let dist: f32 = feat
41 .iter()
42 .zip(centroid.iter())
43 .map(|(a, b)| (a - b) * (a - b))
44 .sum();
45 if dist < best_dist {
46 best_dist = dist;
47 best_cluster = c as u16;
48 }
49 }
50 if assignments[i] != best_cluster {
51 assignments[i] = best_cluster;
52 changed = true;
53 }
54 }
55
56 if !changed {
57 break;
58 }
59
60 let mut sums = vec![[0.0f32; FEATURE_DIM]; k];
62 let mut counts = vec![0u32; k];
63 for (i, feat) in map.features.iter().enumerate() {
64 let c = assignments[i] as usize;
65 counts[c] += 1;
66 for (d, &val) in feat.iter().enumerate() {
67 sums[c][d] += val;
68 }
69 }
70 for c in 0..k {
71 if counts[c] > 0 {
72 for (d, sum_val) in sums[c].iter().enumerate() {
73 centroids[c][d] = sum_val / counts[c] as f32;
74 }
75 }
76 }
77 }
78
79 map.cluster_assignments = assignments;
80 map.cluster_centroids = centroids;
81 map.header.cluster_count = k as u16;
82}
83
84pub fn cluster_type(map: &SiteMap, cluster_id: u16) -> PageType {
86 let mut type_counts = std::collections::HashMap::new();
87
88 for (i, &assignment) in map.cluster_assignments.iter().enumerate() {
89 if assignment == cluster_id {
90 *type_counts.entry(map.nodes[i].page_type).or_insert(0u32) += 1;
91 }
92 }
93
94 type_counts
95 .into_iter()
96 .max_by_key(|&(_, count)| count)
97 .map(|(pt, _)| pt)
98 .unwrap_or(PageType::Unknown)
99}
100
101pub fn cluster_members(map: &SiteMap, cluster_id: u16) -> Vec<u32> {
103 map.cluster_assignments
104 .iter()
105 .enumerate()
106 .filter(|(_, &a)| a == cluster_id)
107 .map(|(i, _)| i as u32)
108 .collect()
109}
110
111#[cfg(test)]
112mod tests {
113 use super::*;
114 use crate::map::builder::SiteMapBuilder;
115 use crate::map::types::*;
116
117 #[test]
118 fn test_compute_clusters() {
119 let mut builder = SiteMapBuilder::new("test.com");
120
121 for i in 0..10 {
123 let mut feats = [0.0f32; FEATURE_DIM];
124 feats[0] = 10.0 + (i as f32 * 0.1);
125 feats[5] = 0.0;
126 builder.add_node(
127 &format!("https://test.com/a/{i}"),
128 PageType::Article,
129 feats,
130 200,
131 );
132 }
133
134 for i in 0..10 {
136 let mut feats = [0.0f32; FEATURE_DIM];
137 feats[0] = 0.0;
138 feats[5] = 10.0 + (i as f32 * 0.1);
139 builder.add_node(
140 &format!("https://test.com/p/{i}"),
141 PageType::ProductDetail,
142 feats,
143 200,
144 );
145 }
146
147 let mut map = builder.build();
148 compute_clusters(&mut map);
149
150 assert!(!map.cluster_centroids.is_empty());
151 assert_eq!(map.cluster_assignments.len(), 20);
152
153 let group_a_clusters: std::collections::HashSet<u16> =
156 (0..10).map(|i| map.cluster_assignments[i]).collect();
157 let group_b_clusters: std::collections::HashSet<u16> =
158 (10..20).map(|i| map.cluster_assignments[i]).collect();
159
160 assert!(
162 group_a_clusters.is_disjoint(&group_b_clusters),
163 "Group A clusters {:?} should not overlap with Group B clusters {:?}",
164 group_a_clusters,
165 group_b_clusters,
166 );
167 }
168
169 #[test]
170 fn test_cluster_type() {
171 let mut builder = SiteMapBuilder::new("test.com");
172 let feats = [0.0f32; FEATURE_DIM];
173
174 builder.add_node("https://test.com/a1", PageType::Article, feats, 200);
175 builder.add_node("https://test.com/a2", PageType::Article, feats, 200);
176 builder.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
177
178 let mut map = builder.build();
179 map.cluster_assignments = vec![0, 0, 0];
181
182 let dominant = cluster_type(&map, 0);
183 assert_eq!(dominant, PageType::Article); }
185
186 #[test]
187 fn test_cluster_members() {
188 let mut builder = SiteMapBuilder::new("test.com");
189 let feats = [0.0f32; FEATURE_DIM];
190
191 builder.add_node("https://test.com/a", PageType::Article, feats, 200);
192 builder.add_node("https://test.com/b", PageType::Article, feats, 200);
193 builder.add_node("https://test.com/c", PageType::Article, feats, 200);
194
195 let mut map = builder.build();
196 map.cluster_assignments = vec![0, 1, 0];
197
198 let members = cluster_members(&map, 0);
199 assert_eq!(members, vec![0, 2]);
200
201 let members = cluster_members(&map, 1);
202 assert_eq!(members, vec![1]);
203 }
204}