tokensave 4.2.0

Code intelligence tool that builds a semantic knowledge graph from Rust, Go, Java, Scala, TypeScript, Python, C, C++, Kotlin, C#, Swift, and many more codebases
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
//! Structural health analysis algorithms.
//!
//! Provides file-level DAG construction, Gini coefficient computation,
//! Tarjan's SCC-based acyclicity scoring, dependency depth analysis,
//! modularity estimation, and composite health scoring.

use std::collections::{HashMap, HashSet, VecDeque};
use std::hash::BuildHasher;

// ---------------------------------------------------------------------------
// Task 2: Gini Coefficient
// ---------------------------------------------------------------------------

/// Computes the Gini coefficient for a slice of non-negative values.
/// Returns 0.0 for empty slices, single-element slices, or all-zero slices.
/// Result is in \[0.0, 1.0\] where 0.0 = perfect equality.
pub fn gini_coefficient(values: &[f64]) -> f64 {
    if values.len() <= 1 {
        return 0.0;
    }

    let sum: f64 = values.iter().sum();
    if sum == 0.0 {
        return 0.0;
    }

    let n = values.len() as f64;
    let mut sorted = values.to_vec();
    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));

    // G = (2 * Σ(i * x_i)) / (n * Σ(x_i)) - (n + 1) / n  (i is 1-indexed)
    let weighted_sum: f64 = sorted
        .iter()
        .enumerate()
        .map(|(idx, &x)| (idx as f64 + 1.0) * x)
        .sum();

    (2.0 * weighted_sum) / (n * sum) - (n + 1.0) / n
}

/// Returns a human-readable label for a Gini coefficient value.
/// - <0.20  → "low inequality (healthy)"
/// - <0.40  → "moderate inequality"
/// - <0.60  → "high inequality"
/// - >=0.60 → "extreme inequality (god files likely)"
pub fn gini_label(gini: f64) -> &'static str {
    if gini < 0.20 {
        "low inequality (healthy)"
    } else if gini < 0.40 {
        "moderate inequality"
    } else if gini < 0.60 {
        "high inequality"
    } else {
        "extreme inequality (god files likely)"
    }
}

// ---------------------------------------------------------------------------
// Task 3: Tarjan's SCC / Acyclicity Score
// ---------------------------------------------------------------------------

struct TarjanState<'a> {
    adj: &'a HashMap<String, HashSet<String>>,
    index_counter: usize,
    stack: Vec<String>,
    on_stack: HashSet<String>,
    index: HashMap<String, usize>,
    lowlink: HashMap<String, usize>,
    sccs: Vec<Vec<String>>,
}

impl<'a> TarjanState<'a> {
    fn new(adj: &'a HashMap<String, HashSet<String>>) -> Self {
        TarjanState {
            adj,
            index_counter: 0,
            stack: Vec::new(),
            on_stack: HashSet::new(),
            index: HashMap::new(),
            lowlink: HashMap::new(),
            sccs: Vec::new(),
        }
    }

    fn strongconnect(&mut self, v: &str) {
        let v = v.to_string();
        self.index.insert(v.clone(), self.index_counter);
        self.lowlink.insert(v.clone(), self.index_counter);
        self.index_counter += 1;
        self.stack.push(v.clone());
        self.on_stack.insert(v.clone());

        // Collect neighbors first to avoid borrow conflicts
        let neighbors: Vec<String> = self
            .adj
            .get(&v)
            .map(|s| s.iter().cloned().collect())
            .unwrap_or_default();

        for w in neighbors {
            if !self.index.contains_key(&w) {
                self.strongconnect(&w.clone());
                let w_low = self.lowlink[&w];
                let v_low = self.lowlink[&v];
                self.lowlink.insert(v.clone(), v_low.min(w_low));
            } else if self.on_stack.contains(&w) {
                let w_idx = self.index[&w];
                let v_low = self.lowlink[&v];
                self.lowlink.insert(v.clone(), v_low.min(w_idx));
            }
        }

        // If v is a root node, pop the stack and generate an SCC
        if self.lowlink[&v] == self.index[&v] {
            let mut scc = Vec::new();
            while let Some(w) = self.stack.pop() {
                self.on_stack.remove(&w);
                let is_v = w == v;
                scc.push(w);
                if is_v {
                    break;
                }
            }
            self.sccs.push(scc);
        }
    }
}

/// Runs Tarjan's SCC algorithm on the adjacency map.
/// Returns a list of strongly connected components (each is a list of node IDs).
fn tarjan_scc(adj: &HashMap<String, HashSet<String>>) -> Vec<Vec<String>> {
    let mut state = TarjanState::new(adj);

    // Collect all nodes (both keys and targets) so isolated nodes are included
    let mut all_nodes: HashSet<String> = adj.keys().cloned().collect();
    for targets in adj.values() {
        all_nodes.extend(targets.iter().cloned());
    }

    for node in all_nodes {
        if !state.index.contains_key(&node) {
            state.strongconnect(&node);
        }
    }

    state.sccs
}

/// Computes the acyclicity score for a directed graph.
/// Uses Tarjan's SCC algorithm. Score = 1.0 - (`edges_in_nontrivial_SCCs` / `total_edges`).
/// Returns (score, `number_of_edges_in_cycles`).
pub fn acyclicity_score<S1: BuildHasher, S2: BuildHasher>(
    adj: &HashMap<String, HashSet<String, S2>, S1>,
) -> (f64, usize) {
    let total_edges: usize = adj.values().map(HashSet::len).sum();

    if total_edges == 0 {
        return (1.0, 0);
    }

    // Build a plain HashMap for tarjan_scc (which uses default hasher)
    let plain_adj: HashMap<String, HashSet<String>> = adj
        .iter()
        .map(|(k, v)| (k.clone(), v.iter().cloned().collect()))
        .collect();

    let sccs = tarjan_scc(&plain_adj);

    // Build a set of nodes in nontrivial SCCs (size > 1)
    let mut in_cycle: HashSet<&str> = HashSet::new();
    for scc in &sccs {
        if scc.len() > 1 {
            for node in scc {
                in_cycle.insert(node.as_str());
            }
        }
    }

    // Count edges where both endpoints are in nontrivial SCCs
    let edges_in_cycles: usize = adj
        .iter()
        .filter(|(src, _)| in_cycle.contains(src.as_str()))
        .map(|(_src, targets)| {
            targets
                .iter()
                .filter(|tgt| in_cycle.contains(tgt.as_str()))
                .count()
        })
        .sum();

    let score = 1.0 - (edges_in_cycles as f64 / total_edges as f64);
    (score, edges_in_cycles)
}

// ---------------------------------------------------------------------------
// Task 4: Dependency Depth
// ---------------------------------------------------------------------------

/// A chain entry representing a file and the longest dependency chain reaching it.
pub struct DepthChain {
    pub file: String,
    pub depth: usize,
    pub chain: Vec<String>,
}

/// Result of the dependency depth analysis.
pub struct DepthResult {
    pub max_depth: usize,
    /// `ceil(log2(file_count))`
    pub ideal_depth: usize,
    pub chains: Vec<DepthChain>,
}

/// Computes longest dependency chains. Breaks cycles via Tarjan's SCC
/// (collapses each SCC to a single node), then runs topo sort + DP.
pub fn dependency_depth<S1: BuildHasher, S2: BuildHasher>(
    adj: &HashMap<String, HashSet<String, S2>, S1>,
    limit: usize,
) -> DepthResult {
    // Collect all nodes
    let mut all_nodes: HashSet<String> = adj.keys().cloned().collect();
    for targets in adj.values() {
        all_nodes.extend(targets.iter().cloned());
    }
    let file_count = all_nodes.len();

    if file_count == 0 {
        return DepthResult {
            max_depth: 0,
            ideal_depth: 0,
            chains: Vec::new(),
        };
    }

    let ideal_depth = if file_count <= 1 {
        0
    } else {
        (file_count as f64).log2().ceil() as usize
    };

    // Build a plain HashMap for tarjan_scc
    let plain_adj: HashMap<String, HashSet<String>> = adj
        .iter()
        .map(|(k, v)| (k.clone(), v.iter().cloned().collect()))
        .collect();

    // Step 1: Run Tarjan's SCC, map each node to its SCC index
    let sccs = tarjan_scc(&plain_adj);
    let mut node_to_scc: HashMap<String, usize> = HashMap::new();
    for (idx, scc) in sccs.iter().enumerate() {
        for node in scc {
            node_to_scc.insert(node.clone(), idx);
        }
    }

    // Step 2: Build DAG over SCC indices
    let scc_count = sccs.len();
    let mut scc_adj: HashMap<usize, HashSet<usize>> = HashMap::new();
    for (src, targets) in adj {
        let src_scc = node_to_scc[src];
        for tgt in targets {
            let tgt_scc = node_to_scc[tgt];
            if src_scc != tgt_scc {
                scc_adj.entry(src_scc).or_default().insert(tgt_scc);
            }
        }
    }

    // Step 3: Kahn's algorithm for topological sort
    let mut in_degree = vec![0usize; scc_count];
    for targets in scc_adj.values() {
        for &tgt in targets {
            in_degree[tgt] += 1;
        }
    }

    let mut queue: VecDeque<usize> = (0..scc_count).filter(|&i| in_degree[i] == 0).collect();

    let mut topo_order: Vec<usize> = Vec::new();
    while let Some(node) = queue.pop_front() {
        topo_order.push(node);
        if let Some(neighbors) = scc_adj.get(&node) {
            for &nb in neighbors {
                in_degree[nb] -= 1;
                if in_degree[nb] == 0 {
                    queue.push_back(nb);
                }
            }
        }
    }

    // Step 4: DP for longest path with predecessor tracking
    let mut dist = vec![0usize; scc_count];
    let mut pred = vec![usize::MAX; scc_count];

    for &u in &topo_order {
        if let Some(neighbors) = scc_adj.get(&u) {
            for &v in neighbors {
                if dist[u] + 1 > dist[v] {
                    dist[v] = dist[u] + 1;
                    pred[v] = u;
                }
            }
        }
    }

    // Step 5: Reconstruct chains (use first node of each SCC as representative)
    let mut max_depth = 0;
    let mut results: Vec<DepthChain> = Vec::new();

    for scc_idx in 0..scc_count {
        let depth = dist[scc_idx];
        if depth > max_depth {
            max_depth = depth;
        }

        if results.len() < limit {
            // Reconstruct the chain by walking predecessors
            let mut chain_sccs: Vec<usize> = Vec::new();
            let mut cur = scc_idx;
            loop {
                chain_sccs.push(cur);
                let p = pred[cur];
                if p == usize::MAX {
                    break;
                }
                cur = p;
            }
            chain_sccs.reverse();

            // Map SCC indices back to representative file names
            let chain: Vec<String> = chain_sccs.iter().map(|&si| sccs[si][0].clone()).collect();

            let representative = sccs[scc_idx][0].clone();
            results.push(DepthChain {
                file: representative,
                depth,
                chain,
            });
        }
    }

    // Sort by depth descending for convenience
    results.sort_by_key(|ch| std::cmp::Reverse(ch.depth));

    DepthResult {
        max_depth,
        ideal_depth,
        chains: results,
    }
}

/// Score = min(1.0, ideal\_depth / max\_depth). Shallower is better.
/// Returns 1.0 when `max_depth == 0`.
pub fn depth_score(max_depth: usize, ideal_depth: usize) -> f64 {
    if max_depth == 0 {
        return 1.0;
    }
    (ideal_depth as f64 / max_depth as f64).min(1.0)
}

// ---------------------------------------------------------------------------
// Task 5: Modularity Score
// ---------------------------------------------------------------------------

/// Estimates modularity by removing hub nodes and counting connected components.
/// Hub nodes = files with (fan\_in + fan\_out) > mean + 2\*stddev.
/// Score = 1.0 - (1.0 / component\_count), clamped to \[0, 1\].
/// Returns (score, component\_count\_after\_hub\_removal).
pub fn modularity_score<S1: BuildHasher, S2: BuildHasher>(
    adj: &HashMap<String, HashSet<String, S2>, S1>,
) -> (f64, usize) {
    if adj.is_empty() {
        return (1.0, 0);
    }

    // Collect all nodes
    let mut all_nodes: HashSet<String> = adj.keys().cloned().collect();
    for targets in adj.values() {
        all_nodes.extend(targets.iter().cloned());
    }

    if all_nodes.is_empty() {
        return (1.0, 0);
    }

    // Build undirected connectivity count per node (fan_in + fan_out)
    let mut connectivity: HashMap<&str, usize> = HashMap::new();
    for node in &all_nodes {
        connectivity.insert(node.as_str(), 0);
    }
    for (src, targets) in adj {
        *connectivity.entry(src.as_str()).or_insert(0) += targets.len();
        for tgt in targets {
            *connectivity.entry(tgt.as_str()).or_insert(0) += 1;
        }
    }

    // Compute mean and stddev
    let n = connectivity.len() as f64;
    let values: Vec<f64> = connectivity.values().map(|&v| v as f64).collect();
    let mean = values.iter().sum::<f64>() / n;
    let variance = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
    let stddev = variance.sqrt();
    let threshold = mean + 2.0 * stddev;

    // Identify hub nodes
    let hubs: HashSet<&str> = connectivity
        .iter()
        .filter(|(_, &v)| v as f64 > threshold)
        .map(|(&k, _)| k)
        .collect();

    // Build undirected graph without hubs
    let non_hub_nodes: Vec<&str> = all_nodes
        .iter()
        .map(String::as_str)
        .filter(|n| !hubs.contains(n))
        .collect();

    if non_hub_nodes.is_empty() {
        return (1.0, 0);
    }

    let mut undirected: HashMap<&str, HashSet<&str>> = HashMap::new();
    for &node in &non_hub_nodes {
        undirected.entry(node).or_default();
    }
    for (src, targets) in adj {
        if hubs.contains(src.as_str()) {
            continue;
        }
        for tgt in targets {
            if hubs.contains(tgt.as_str()) {
                continue;
            }
            undirected
                .entry(src.as_str())
                .or_default()
                .insert(tgt.as_str());
            undirected
                .entry(tgt.as_str())
                .or_default()
                .insert(src.as_str());
        }
    }

    // Count connected components via BFS
    let mut visited: HashSet<&str> = HashSet::new();
    let mut components = 0;

    for &start in &non_hub_nodes {
        if visited.contains(start) {
            continue;
        }
        components += 1;
        let mut queue = VecDeque::new();
        queue.push_back(start);
        visited.insert(start);
        while let Some(curr) = queue.pop_front() {
            if let Some(neighbors) = undirected.get(curr) {
                for &nb in neighbors {
                    if !visited.contains(nb) {
                        visited.insert(nb);
                        queue.push_back(nb);
                    }
                }
            }
        }
    }

    let score = (1.0 - 1.0 / components as f64).clamp(0.0, 1.0);
    (score, components)
}

// ---------------------------------------------------------------------------
// Task 6: Composite Health Score
// ---------------------------------------------------------------------------

/// All five health dimensions, each in \[0.0, 1.0\].
#[derive(Debug, Clone)]
pub struct HealthDimensions {
    pub acyclicity: f64,
    pub depth: f64,
    pub equality: f64,
    pub redundancy: f64,
    pub modularity: f64,
}

/// Computes quality signal (0–10000) from geometric mean of all five dimensions.
/// Formula: `(product of all 5).powf(1.0/5.0) * 10000.0`, rounded.
/// Zero in any dimension → 0.
pub fn compute_composite_health(dims: &HealthDimensions) -> u32 {
    let product = dims.acyclicity * dims.depth * dims.equality * dims.redundancy * dims.modularity;

    if product <= 0.0 {
        return 0;
    }

    (product.powf(1.0 / 5.0) * 10_000.0).round() as u32
}