ruvector-mincut 2.0.6

World's first subpolynomial dynamic min-cut: self-healing networks, AI optimization, real-time graph analysis
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
//! Parallel distribution for 256-core agentic chip
//!
//! Distributes minimum cut computation across WASM cores.

// Internal optimization module - docs on public API in lib.rs
#![allow(missing_docs)]

use crate::compact::{
    BitSet256, CompactCoreState, CompactEdge, CompactVertexId, CompactWitness, CoreResult,
    MAX_EDGES_PER_CORE,
};
use core::sync::atomic::{AtomicU16, AtomicU8, Ordering};

// SIMD functions (inlined for non-wasm, uses wasm::simd when available)
#[cfg(feature = "wasm")]
use crate::wasm::simd::{simd_boundary_size, simd_popcount};

#[cfg(not(feature = "wasm"))]
#[inline]
fn simd_popcount(bits: &[u64; 4]) -> u32 {
    bits.iter().map(|b| b.count_ones()).sum()
}

#[cfg(not(feature = "wasm"))]
#[inline]
fn simd_boundary_size(set_a: &BitSet256, edges: &[(CompactVertexId, CompactVertexId)]) -> u16 {
    let mut count = 0u16;
    for &(src, tgt) in edges {
        let src_in = set_a.contains(src);
        let tgt_in = set_a.contains(tgt);
        if src_in != tgt_in {
            count += 1;
        }
    }
    count
}

/// Number of WASM cores
pub const NUM_CORES: usize = 256;

/// Number of geometric ranges per core
pub const RANGES_PER_CORE: usize = 1;

/// Total ranges = NUM_CORES × RANGES_PER_CORE
pub const TOTAL_RANGES: usize = NUM_CORES * RANGES_PER_CORE;

/// Range factor (1.2 from paper)
pub const RANGE_FACTOR: f32 = 1.2;

/// Core assignment strategy
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum CoreStrategy {
    /// Each core handles one geometric range [1.2^i, 1.2^(i+1)]
    GeometricRanges = 0,
    /// Cores handle graph partitions (for very large graphs)
    GraphPartition = 1,
    /// Work stealing with dynamic assignment
    WorkStealing = 2,
}

/// Message types for inter-core communication (4 bytes)
#[derive(Clone, Copy)]
#[repr(C)]
pub struct CoreMessage {
    pub msg_type: u8,
    pub src_core: u8,
    pub payload: u16,
}

impl CoreMessage {
    pub const TYPE_IDLE: u8 = 0;
    pub const TYPE_WORK_REQUEST: u8 = 1;
    pub const TYPE_WORK_AVAILABLE: u8 = 2;
    pub const TYPE_RESULT: u8 = 3;
    pub const TYPE_SYNC: u8 = 4;
    pub const TYPE_STEAL_REQUEST: u8 = 5;
    pub const TYPE_STEAL_RESPONSE: u8 = 6;
}

/// Lock-free work queue entry
#[derive(Clone, Copy, Default)]
#[repr(C)]
pub struct WorkItem {
    /// Range index to process
    pub range_idx: u16,
    /// Priority (lower = higher priority)
    pub priority: u8,
    /// Status
    pub status: u8,
}

impl WorkItem {
    pub const STATUS_PENDING: u8 = 0;
    pub const STATUS_IN_PROGRESS: u8 = 1;
    pub const STATUS_COMPLETE: u8 = 2;
}

/// Shared state for coordination (fits in shared memory)
#[repr(C, align(64))]
pub struct SharedCoordinator {
    /// Global minimum cut found so far
    pub global_min_cut: AtomicU16,
    /// Number of cores that have completed (u16 to support NUM_CORES=256)
    pub completed_cores: AtomicU16,
    /// Current phase
    pub phase: AtomicU8,
    /// Work queue head (for work stealing)
    pub queue_head: AtomicU16,
    /// Work queue tail
    pub queue_tail: AtomicU16,
    /// Best result core ID
    pub best_core: AtomicU8,
    /// Padding for alignment
    _pad: [u8; 52],
}

impl SharedCoordinator {
    pub const PHASE_INIT: u8 = 0;
    pub const PHASE_DISTRIBUTE: u8 = 1;
    pub const PHASE_COMPUTE: u8 = 2;
    pub const PHASE_COLLECT: u8 = 3;
    pub const PHASE_DONE: u8 = 4;

    pub fn new() -> Self {
        Self {
            global_min_cut: AtomicU16::new(u16::MAX),
            completed_cores: AtomicU16::new(0),
            phase: AtomicU8::new(Self::PHASE_INIT),
            queue_head: AtomicU16::new(0),
            queue_tail: AtomicU16::new(0),
            best_core: AtomicU8::new(0),
            _pad: [0; 52],
        }
    }

    /// Try to update global minimum (atomic compare-and-swap)
    pub fn try_update_min(&self, new_min: u16, core_id: u8) -> bool {
        let mut current = self.global_min_cut.load(Ordering::Acquire);
        loop {
            if new_min >= current {
                return false;
            }
            match self.global_min_cut.compare_exchange_weak(
                current,
                new_min,
                Ordering::AcqRel,
                Ordering::Acquire,
            ) {
                Ok(_) => {
                    self.best_core.store(core_id, Ordering::Release);
                    return true;
                }
                Err(c) => current = c,
            }
        }
    }

    /// Mark core as completed
    pub fn mark_completed(&self) -> u16 {
        self.completed_cores.fetch_add(1, Ordering::AcqRel) + 1
    }

    /// Check if all cores completed
    pub fn all_completed(&self) -> bool {
        self.completed_cores.load(Ordering::Acquire) >= NUM_CORES as u16
    }
}

/// Compute range bounds for a core
#[inline]
pub fn compute_core_range(core_id: u8) -> (u16, u16) {
    let i = core_id as u32;
    let lambda_min = (RANGE_FACTOR.powi(i as i32)).floor() as u16;
    let lambda_max = (RANGE_FACTOR.powi((i + 1) as i32)).floor() as u16;
    (lambda_min.max(1), lambda_max.max(1))
}

/// Distribute graph across cores based on strategy
pub struct CoreDistributor {
    pub strategy: CoreStrategy,
    pub num_vertices: u16,
    pub num_edges: u16,
}

impl CoreDistributor {
    pub fn new(strategy: CoreStrategy, num_vertices: u16, num_edges: u16) -> Self {
        Self {
            strategy,
            num_vertices,
            num_edges,
        }
    }

    /// Determine which core should handle a vertex
    #[inline]
    pub fn vertex_to_core(&self, v: CompactVertexId) -> u8 {
        match self.strategy {
            CoreStrategy::GeometricRanges => {
                // All vertices go to all cores (replicated)
                0
            }
            CoreStrategy::GraphPartition => {
                // Partition by vertex ID
                ((v as u32 * NUM_CORES as u32) / self.num_vertices as u32) as u8
            }
            CoreStrategy::WorkStealing => {
                // Dynamic assignment
                0
            }
        }
    }

    /// Get the range of vertices for a core
    pub fn core_vertex_range(&self, core_id: u8) -> (CompactVertexId, CompactVertexId) {
        match self.strategy {
            CoreStrategy::GeometricRanges => (0, self.num_vertices),
            CoreStrategy::GraphPartition => {
                let n = self.num_vertices as u32;
                let start = (core_id as u32 * n) / NUM_CORES as u32;
                let end = ((core_id as u32 + 1) * n) / NUM_CORES as u32;
                (start as u16, end as u16)
            }
            CoreStrategy::WorkStealing => (0, self.num_vertices),
        }
    }
}

/// Per-core execution context
pub struct CoreExecutor<'a> {
    /// Core identifier (0-255)
    pub core_id: u8,
    /// Core state containing graph and witness data
    pub state: CompactCoreState,
    /// Reference to shared coordinator for cross-core synchronization
    pub coordinator: Option<&'a SharedCoordinator>,
}

impl<'a> CoreExecutor<'a> {
    /// Initialize core with its assigned range
    pub fn init(core_id: u8, coordinator: Option<&'a SharedCoordinator>) -> Self {
        let (lambda_min, lambda_max) = compute_core_range(core_id);

        let state = CompactCoreState {
            adjacency: Default::default(),
            edges: [CompactEdge::default(); MAX_EDGES_PER_CORE],
            num_vertices: 0,
            num_edges: 0,
            min_cut: u16::MAX,
            best_witness: CompactWitness::default(),
            lambda_min,
            lambda_max,
            core_id,
            status: CompactCoreState::STATUS_IDLE,
        };

        Self {
            core_id,
            state,
            coordinator,
        }
    }

    /// Add edge to this core's local graph
    pub fn add_edge(&mut self, src: CompactVertexId, tgt: CompactVertexId, weight: u16) {
        if self.state.num_edges as usize >= 512 {
            return; // Full
        }

        let idx = self.state.num_edges as usize;
        self.state.edges[idx] = CompactEdge {
            source: src,
            target: tgt,
            weight,
            flags: CompactEdge::FLAG_ACTIVE,
        };
        self.state.num_edges += 1;

        // Track vertices
        self.state.num_vertices = self.state.num_vertices.max(src + 1).max(tgt + 1);
    }

    /// Process this core's assigned range
    pub fn process(&mut self) -> CoreResult {
        self.state.status = CompactCoreState::STATUS_PROCESSING;

        // Simple minimum cut via minimum degree heuristic
        // (Full algorithm would use LocalKCut here)
        let mut min_degree = u16::MAX;
        let mut min_vertex = 0u16;

        for v in 0..self.state.num_vertices {
            let degree = self.compute_degree(v);
            if degree > 0 && degree < min_degree {
                min_degree = degree;
                min_vertex = v;
            }
        }

        // Check if in our range
        if min_degree >= self.state.lambda_min && min_degree <= self.state.lambda_max {
            self.state.min_cut = min_degree;

            // Create witness
            let mut membership = BitSet256::new();
            membership.insert(min_vertex);
            self.state.best_witness = CompactWitness::new(min_vertex, membership, min_degree);

            // Try to update global minimum
            if let Some(coord) = self.coordinator {
                coord.try_update_min(min_degree, self.core_id);
            }
        }

        self.state.status = CompactCoreState::STATUS_DONE;

        // Report result
        if let Some(coord) = self.coordinator {
            coord.mark_completed();
        }

        CoreResult {
            core_id: self.core_id,
            status: self.state.status,
            min_cut: self.state.min_cut,
            witness_hash: self.state.best_witness.hash,
            witness_seed: self.state.best_witness.seed,
            witness_cardinality: self.state.best_witness.cardinality,
            witness_boundary: self.state.best_witness.boundary_size,
            padding: [0; 4],
        }
    }

    /// Compute degree of a vertex
    fn compute_degree(&self, v: CompactVertexId) -> u16 {
        let mut degree = 0u16;
        for i in 0..self.state.num_edges as usize {
            let edge = &self.state.edges[i];
            if edge.is_active() && (edge.source == v || edge.target == v) {
                // Sum weights for weighted min-cut (not edge count)
                degree = degree.saturating_add(edge.weight);
            }
        }
        degree
    }

    /// SIMD-accelerated boundary computation for a vertex set
    ///
    /// Uses WASM SIMD128 when available for parallel edge checking
    #[inline]
    pub fn compute_boundary_simd(&self, set: &BitSet256) -> u16 {
        // Collect active edges as (source, target) pairs
        let edges: Vec<(CompactVertexId, CompactVertexId)> = self.state.edges
            [..self.state.num_edges as usize]
            .iter()
            .filter(|e| e.is_active())
            .map(|e| (e.source, e.target))
            .collect();

        // Use SIMD-accelerated boundary computation
        simd_boundary_size(set, &edges)
    }

    /// SIMD-accelerated population count for membership sets
    #[inline]
    pub fn membership_count_simd(&self, set: &BitSet256) -> u32 {
        simd_popcount(&set.bits)
    }
}

/// Result aggregator for collecting results from all cores
pub struct ResultAggregator {
    /// Results from each core
    pub results: [CoreResult; NUM_CORES],
    /// Index of the best result
    pub best_idx: usize,
    /// Global minimum cut value found
    pub global_min: u16,
}

impl ResultAggregator {
    /// Create a new result aggregator
    pub fn new() -> Self {
        Self {
            results: [CoreResult::default(); NUM_CORES],
            best_idx: 0,
            global_min: u16::MAX,
        }
    }

    /// Add a result from a core and update the best if needed
    pub fn add_result(&mut self, result: CoreResult) {
        let idx = result.core_id as usize;
        self.results[idx] = result;

        if result.min_cut < self.global_min {
            self.global_min = result.min_cut;
            self.best_idx = idx;
        }
    }

    /// Get the best result (lowest minimum cut)
    pub fn best_result(&self) -> &CoreResult {
        &self.results[self.best_idx]
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_compute_core_range() {
        let (min0, max0) = compute_core_range(0);
        assert_eq!(min0, 1);
        assert_eq!(max0, 1);

        let (min10, max10) = compute_core_range(10);
        assert_eq!(min10, 6);
        assert_eq!(max10, 7);
    }

    #[test]
    fn test_shared_coordinator() {
        let coord = SharedCoordinator::new();

        assert!(coord.try_update_min(100, 0));
        assert_eq!(coord.global_min_cut.load(Ordering::Acquire), 100);

        assert!(coord.try_update_min(50, 1));
        assert_eq!(coord.global_min_cut.load(Ordering::Acquire), 50);

        assert!(!coord.try_update_min(60, 2)); // 60 > 50
        assert_eq!(coord.global_min_cut.load(Ordering::Acquire), 50);
    }

    #[test]
    fn test_core_executor() {
        let coord = SharedCoordinator::new();
        let mut exec = CoreExecutor::init(0, Some(&coord));

        exec.add_edge(0, 1, 1);
        exec.add_edge(1, 2, 1);

        let result = exec.process();
        assert_eq!(result.core_id, 0);
    }

    #[test]
    fn test_result_aggregator() {
        let mut agg = ResultAggregator::new();

        agg.add_result(CoreResult {
            core_id: 0,
            min_cut: 100,
            ..Default::default()
        });

        agg.add_result(CoreResult {
            core_id: 1,
            min_cut: 50,
            ..Default::default()
        });

        assert_eq!(agg.global_min, 50);
        assert_eq!(agg.best_idx, 1);
    }
}