kovan-map 0.1.15

Lock-free concurrent hash maps using kovan memory reclamation
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
//! High-Performance Lock-Free Concurrent Hash Map (FoldHash + resizable table).
//!
//! # Strategy
//!
//! 1. **FoldHash**: `foldhash::fast::FixedState` for fast, quality hashing.
//! 2. **Resizable bucket table**: the bucket array lives in a single
//!    allocation (header + inline buckets) swapped atomically on resize and
//!    reclaimed through kovan. The map grows when the load factor exceeds
//!    3/4 and shrinks below 1/4 (never under its initial capacity),
//!    mirroring `HopscotchMap`'s resize protocol.
//! 3. **Optimized Node Layout**: fields ordered `hash -> key -> value -> next`
//!    to optimize cache line usage during checks.
//!
//! # Architecture
//! - **Table**: kovan-retired object holding the bucket array (atomic head
//!   pointers). Readers snapshot the table under a guard and never block.
//! - **Nodes**: singly linked chains, CAS-based lock-free insert/remove.
//! - **Resize**: a single resizer (CAS on `resizing`) clones all entries
//!   into a new table, swaps the table pointer, and retires the old table;
//!   the old table's destructor frees its remaining chains exactly once at
//!   reclamation time. Writers wait out an active resize and re-validate
//!   after success so no update is lost to a concurrent migration.

extern crate alloc;

#[cfg(feature = "std")]
extern crate std;

use alloc::boxed::Box;
use core::borrow::Borrow;
use core::hash::{BuildHasher, Hash};
use core::sync::atomic::{AtomicBool, AtomicIsize, Ordering, fence};
use foldhash::fast::FixedState;
use kovan::{Atomic, RetiredNode, Shared, pin, retire};

// vertexia: `resizing` gates `try_resize` (CAS to claim the resize) and is
// spun on by every other writer via `wait_for_resize`/`clear`'s CAS-retry
// while a resize is in flight. That spin has no *other* yield point in it,
// which is a problem under shuttle two levels deep:
//
// 1. Without any instrumented op in the loop, shuttle can't preempt out of
//    it at all -- a genuine hang once a writer observes `resizing == true`.
// 2. Instrumenting the field itself (an earlier version of this fix swapped
//    `AtomicBool` for shuttle's) fixes (1) but isn't enough for *fairness*:
//    PCT keeps a thread's priority fixed except at a handful of preselected
//    "change points" or an explicit yield, so a plain instrumented `.load()`
//    in a spin loop can still be rescheduled indefinitely if it happens to
//    hold the higher priority, starving the resizer and running out
//    shuttle's step budget ("exceeded max_steps bound", an unfair schedule,
//    not a real bug).
//
// The fix needs a *yield*, not an instrumented load: `resize_spin_hint`
// (below) calls `shuttle::hint::spin_loop`, which also calls
// `shuttle::thread::yield_now`, which PCT treats as an explicit change
// point, demoting the spinner's priority so the resizer is guaranteed a
// turn -- independent of whether the *condition* it's spinning on is
// instrumented. So `resizing` itself stays a plain `AtomicBool` under every
// build, shuttle included: swapping its type is unnecessary for either
// correctness or fairness here, and empirically, doing so anyway
// introduced its own unrelated shuttle-only heap corruption in this crate's
// shuttle test (reproduced independent of any resize ever triggering,
// isolated by bisection, still unexplained -- plausibly a layout hazard
// from shuttle's `AtomicBool` being a much larger `RefCell`-based type
// instead of a 1-byte one; not chased further since the type swap was
// never actually required).
#[inline(always)]
fn resize_spin_hint() {
    #[cfg(feature = "shuttle")]
    {
        shuttle::hint::spin_loop();
    }
    #[cfg(not(feature = "shuttle"))]
    {
        core::hint::spin_loop();
    }
}

/// Default number of buckets for `new()`. Matches the previous fixed-table
/// sizing (zero collisions for ~100k items, fits in L3); maps created with
/// `new()` keep exactly the old memory/performance profile and additionally
/// grow past it on demand. Use [`HashMap::with_capacity`] for small elastic
/// maps.
const DEFAULT_CAPACITY: usize = 524_288;

/// Minimum number of buckets; the map never shrinks below this.
const MIN_CAPACITY: usize = 64;

// Load-factor thresholds (implemented as integer comparisons on the hot
// paths): grow when count/capacity > 3/4, shrink when count/capacity < 1/4
// (never below the map's floor capacity). Same thresholds as HopscotchMap.

/// A simple exponential backoff for reducing contention.
struct Backoff {
    step: u32,
}

impl Backoff {
    #[inline(always)]
    fn new() -> Self {
        Self { step: 0 }
    }

    #[inline(always)]
    fn spin(&mut self) {
        for _ in 0..(1 << self.step.min(6)) {
            core::hint::spin_loop();
        }
        if self.step <= 6 {
            self.step += 1;
        }
    }
}

/// Node in the lock-free linked list.
#[repr(C)]
struct Node<K, V> {
    retired: RetiredNode,
    hash: u64,
    key: K,
    value: V,
    next: Atomic<Node<K, V>>,
}

// SAFETY (kovan retirement rule): a retired Node's destructor may run on
// any thread, and nodes (with K and V inside) move between threads — hence
// `K: Send, V: Send` for Send. Unlike exclusive-transfer containers,
// lookups DO produce `&K`/`&V` from a shared `&Node` (get() clones V
// through &V under concurrent readers), so Sync additionally requires
// `K: Sync, V: Sync` — the same bounds the map-level Sync impl below has
// always required for sharing the map.
unsafe impl<K: Send, V: Send> Send for Node<K, V> {}
unsafe impl<K: Send + Sync, V: Send + Sync> Sync for Node<K, V> {}

// ---------------------------------------------------------------------------
// Harris-style logical deletion
// ---------------------------------------------------------------------------
//
// Removing (or replacing) a node first TAGS the victim's `next` pointer
// (low bit set) — the logical delete — and only then unlinks it from its
// predecessor. The thread whose tag-CAS succeeded exclusively owns the node
// and is the only one to `retire()` it. This closes two races a plain
// unlink-CAS protocol has:
//
//  * insert-after-removed-tail: a tail insert CASes `tail.next: null -> new`;
//    if the tail was concurrently unlinked and retired, the new node is
//    spliced onto dead memory — the insert is lost and the node leaks.
//    With tagging, the remover first turns the tail's `next` into
//    tagged-null, so the insert's CAS (expecting untagged null) fails.
//
//  * adjacent removes: removing B (A->B->C) and C (B->C->D) concurrently
//    can unlink C from the already-detached B while C is still reachable
//    through A, retiring a reachable node (use-after-free for later
//    readers). With tagging, C's remover owns C via the tag; walkers
//    observe `B.next` tagged and never operate relative to deleted nodes.
//
// Invariants:
//  * tags appear only on `Node.next` fields, never on bucket heads
//    (snipping stores the untagged successor);
//  * a node whose `next` is tagged has been retired by its tag owner —
//    `clear()`, the migration sweep, and `Table::drop` must skip it;
//  * every traversal untags before following a `next` pointer.

#[inline(always)]
fn tagged<K, V>(p: *mut Node<K, V>) -> *mut Node<K, V> {
    (p as usize | 1) as *mut Node<K, V>
}

#[inline(always)]
fn untag<K, V>(p: *mut Node<K, V>) -> *mut Node<K, V> {
    (p as usize & !1) as *mut Node<K, V>
}

#[inline(always)]
fn is_tagged<K, V>(p: *const Node<K, V>) -> bool {
    (p as usize) & 1 != 0
}

// ---------------------------------------------------------------------------
// Single-allocation table: [TableHeader][Atomic<Node>; capacity]
// ---------------------------------------------------------------------------
//
// The header and the bucket array share one allocation, so the read path is
// `table ptr -> header line (mask, hot in cache) -> bucket line` — the same
// number of cold dereferences as a fixed embedded array. The table pointer
// itself is swapped atomically on resize.
//
// Reclamation goes through a tiny boxed `TableProxy` (RetiredNode at offset
// 0, as `retire()` requires): retiring the proxy defers until every guard
// that could observe the old table has been released; the proxy's destructor
// then frees the table's remaining chains and the allocation itself.
//
// The proxy is built eagerly, in `TableRef::alloc`, at the SAME time as the
// table it guards, not lazily when the table is finally retired. See the
// long comment on `alloc` for why: a proxy built at retire time stamps its
// birth_epoch too late, and kovan can then judge a straggler writer as not
// needing protection for a table it is still actively CASing into.

#[repr(C)]
struct TableHeader {
    mask: usize,
    capacity: usize,
    /// Type-erased `*mut TableProxy<K, V>` for this table's eventual
    /// retirement (see `TableRef::alloc`). Zero means already taken/freed.
    proxy: usize,
}

/// Borrowed view of a table allocation.
struct TableRef<K: 'static, V: 'static> {
    header: *mut TableHeader,
    _marker: core::marker::PhantomData<(K, V)>,
}

impl<K, V> Clone for TableRef<K, V> {
    fn clone(&self) -> Self {
        *self
    }
}
impl<K, V> Copy for TableRef<K, V> {}

impl<K: 'static, V: 'static> TableRef<K, V> {
    #[inline(always)]
    fn from_raw(header: *mut TableHeader) -> Self {
        Self {
            header,
            _marker: core::marker::PhantomData,
        }
    }

    fn layout(capacity: usize) -> (core::alloc::Layout, usize) {
        let header = core::alloc::Layout::new::<TableHeader>();
        let buckets = core::alloc::Layout::array::<Atomic<Node<K, V>>>(capacity)
            .expect("bucket array layout overflow");
        let (layout, offset) = header.extend(buckets).expect("table layout overflow");
        (layout.pad_to_align(), offset)
    }

    /// Allocate a zero-initialized table (`Atomic` buckets zero == null) and
    /// eagerly build its retirement proxy.
    ///
    /// The proxy is built *here*, at the table's own birth, not lazily at
    /// `try_resize` time. `RetiredNode::new()` stamps `birth_epoch` from the
    /// calling thread's cached epoch at construction time (kovan's
    /// contract: "birth_epoch must be set at allocation time, not
    /// retirement time" (see `kovan::RetiredNode::new`). A table can live
    /// through many other threads' operations before it is ever resized
    /// away; if its proxy were only constructed when the resizer finally
    /// retires it, the proxy's birth_epoch would be the *resizer's* current
    /// epoch, which can be arbitrarily newer than the epoch a straggler
    /// writer published the last time it observed this table via
    /// `self.table.load()`. kovan's eligibility check
    /// (`slot.epoch >= min_epoch`) would then wrongly judge that straggler
    /// as not needing protection, and its `TableProxy` could be reclaimed
    /// while the straggler is still mid-CAS on the table's own memory.
    /// Stamping the proxy at the table's own allocation predates every
    /// straggler that could ever see this table, closing the gap: the
    /// same guarantee `HopscotchMap::try_resize` gets for free by retiring
    /// its table struct directly (`RetiredNode` embedded at construction).
    fn alloc(capacity: usize) -> Self {
        let capacity = capacity.next_power_of_two().max(MIN_CAPACITY);
        let (layout, _) = Self::layout(capacity);
        // SAFETY: layout is non-zero sized; zeroed AtomicUsize == null bucket.
        let header = unsafe { alloc::alloc::alloc_zeroed(layout) as *mut TableHeader };
        assert!(!header.is_null(), "table allocation failed");
        unsafe {
            (*header).mask = capacity - 1;
            (*header).capacity = capacity;
        }
        let table = Self::from_raw(header);
        let proxy = Box::into_raw(Box::new(TableProxy {
            retired: RetiredNode::new(),
            table,
        }));
        unsafe { (*header).proxy = proxy as usize };
        table
    }

    /// Take this table's pre-built retirement proxy, for a single upcoming
    /// `retire()` call. Must be called at most once per table.
    #[inline(always)]
    fn take_proxy(self) -> *mut TableProxy<K, V> {
        let proxy = unsafe { (*self.header).proxy };
        assert_ne!(proxy, 0, "kovan-map: table proxy already taken");
        unsafe { (*self.header).proxy = 0 };
        proxy as *mut TableProxy<K, V>
    }

    /// Free this table's proxy directly (without running its `Drop`, which
    /// would call back into `free`/`free_array_only` on this same table) if
    /// it was never retired. Used on every path where a table dies without
    /// ever being resized away: `HashMap::drop`, `IntoIter`, and a
    /// discarded, never-published resize target. No-op if `take_proxy` was
    /// already called (the normal resize-retirement path).
    #[inline(always)]
    unsafe fn drop_unused_proxy(self) {
        let proxy = unsafe { (*self.header).proxy };
        if proxy != 0 {
            unsafe {
                (*self.header).proxy = 0;
                alloc::alloc::dealloc(
                    proxy as *mut u8,
                    core::alloc::Layout::new::<TableProxy<K, V>>(),
                );
            }
        }
    }

    /// Free remaining chains (skipping tagged nodes — already retired by
    /// their tag owners) and the allocation itself.
    ///
    /// Free only the table allocation, not the chains (caller already drained
    /// the live nodes, e.g. `IntoIter`). Tagged nodes remain kovan-owned.
    ///
    /// # Safety
    /// Exclusive access; the chains must already be drained.
    unsafe fn free_array_only(self) {
        unsafe { self.drop_unused_proxy() };
        let capacity = unsafe { (*self.header).capacity };
        let (layout, _) = Self::layout(capacity);
        unsafe { alloc::alloc::dealloc(self.header as *mut u8, layout) };
    }

    /// # Safety
    /// Caller must have exclusive access (map drop, or proxy reclamation
    /// after guard quiescence).
    unsafe fn free(self) {
        unsafe { self.drop_unused_proxy() };
        let capacity = unsafe { (*self.header).capacity };
        let guard = pin();
        for i in 0..capacity {
            let mut current = self.bucket(i).load(Ordering::Relaxed, &guard).as_raw();
            while !current.is_null() {
                unsafe {
                    let next = (*current).next.load(Ordering::Relaxed, &guard).as_raw();
                    if !is_tagged(next) {
                        drop(Box::from_raw(current));
                    }
                    current = untag(next);
                }
            }
        }
        drop(guard);
        let (layout, _) = Self::layout(capacity);
        unsafe { alloc::alloc::dealloc(self.header as *mut u8, layout) };
    }

    #[inline(always)]
    fn as_raw(self) -> *mut TableHeader {
        self.header
    }

    #[inline(always)]
    fn capacity(self) -> usize {
        unsafe { (*self.header).capacity }
    }

    #[inline(always)]
    fn buckets(self) -> *const Atomic<Node<K, V>> {
        let (_, offset) = Self::layout_offset();
        unsafe { (self.header as *const u8).add(offset) as *const Atomic<Node<K, V>> }
    }

    /// Header/bucket offset is capacity-independent; compute it once.
    #[inline(always)]
    fn layout_offset() -> ((), usize) {
        let header = core::alloc::Layout::new::<TableHeader>();
        let one = core::alloc::Layout::new::<Atomic<Node<K, V>>>();
        let (_, offset) = header.extend(one).expect("layout");
        ((), offset)
    }

    #[inline(always)]
    fn bucket_index(self, hash: u64) -> usize {
        (hash as usize) & unsafe { (*self.header).mask }
    }

    #[inline(always)]
    fn bucket(self, idx: usize) -> &'static Atomic<Node<K, V>> {
        // SAFETY: idx is masked or bounded by capacity; the 'static is a
        // lie scoped by the caller's guard (same discipline as Shared).
        unsafe { &*self.buckets().add(idx) }
    }
}

/// Reclamation proxy for a table allocation (RetiredNode at offset 0).
#[repr(C)]
struct TableProxy<K: 'static, V: 'static> {
    retired: RetiredNode,
    table: TableRef<K, V>,
}

// SAFETY (kovan retirement rule): the proxy's destructor (running on any
// thread) frees the table's nodes, hence K, V: Send.
unsafe impl<K: Send, V: Send> Send for TableProxy<K, V> {}
unsafe impl<K: Send + Sync, V: Send + Sync> Sync for TableProxy<K, V> {}

impl<K, V> Drop for TableProxy<K, V> {
    fn drop(&mut self) {
        // SAFETY: kovan reclaimed the proxy only after every guard that
        // could observe the old table has been released.
        unsafe { self.table.free() };
    }
}

/// High-Performance Lock-Free Map with automatic grow/shrink.
pub struct HashMap<K: 'static, V: 'static, S = FixedState> {
    table: Atomic<TableHeader>,
    /// Approximate live-entry count driving the resize thresholds.
    /// Signed: a transient negative under racing removes is harmless and
    /// avoids a CAS loop (fetch_update) on the hot remove path.
    count: AtomicIsize,
    /// Single-resizer latch; writers wait while a resize is in flight.
    resizing: AtomicBool,
    /// Shrink floor: the initial capacity. The map never shrinks below the
    /// size it was created with, preserving the caller's sizing intent (and
    /// the historical fixed-table behavior for `new()`).
    floor: usize,
    hasher: S,
    _marker: core::marker::PhantomData<(K, V)>,
}

#[cfg(feature = "std")]
impl<K, V> HashMap<K, V, FixedState>
where
    K: Hash + Eq + Clone + 'static,
    V: Clone + 'static,
{
    /// Creates a new empty hash map with FoldHash (FixedState).
    pub fn new() -> Self {
        Self::with_hasher(FixedState::default())
    }

    /// Creates a new empty hash map with at least `capacity` buckets.
    pub fn with_capacity(capacity: usize) -> Self {
        Self::with_capacity_and_hasher(capacity, FixedState::default())
    }
}

impl<K, V, S> HashMap<K, V, S>
where
    K: Hash + Eq + Clone + 'static,
    V: Clone + 'static,
    S: BuildHasher,
{
    /// Creates a new hash map with custom hasher.
    pub fn with_hasher(hasher: S) -> Self {
        Self::with_capacity_and_hasher(DEFAULT_CAPACITY, hasher)
    }

    /// Creates a new hash map with at least `capacity` buckets and a custom hasher.
    ///
    /// The map grows when its load factor exceeds 0.75 and shrinks when it
    /// falls below 0.25 — but never below `capacity`.
    pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self {
        let table = TableRef::<K, V>::alloc(capacity);
        let floor = table.capacity();
        Self {
            table: Atomic::new(table.as_raw()),
            count: AtomicIsize::new(0),
            resizing: AtomicBool::new(false),
            floor,
            hasher,
            _marker: core::marker::PhantomData,
        }
    }

    /// Returns the current number of buckets.
    pub fn capacity(&self) -> usize {
        let guard = pin();
        let table = TableRef::<K, V>::from_raw(self.table.load(Ordering::Acquire, &guard).as_raw());
        table.capacity()
    }

    /// Spin until any in-flight resize completes.
    #[inline]
    fn wait_for_resize(&self) {
        while self.resizing.load(Ordering::Acquire) {
            resize_spin_hint();
        }
    }

    /// Optimized get operation. Never blocks — reads the current table
    /// snapshot under a guard, even while a resize is in flight.
    pub fn get<Q>(&self, key: &Q) -> Option<V>
    where
        K: Borrow<Q>,
        Q: Hash + Eq + ?Sized,
    {
        let hash = self.hasher.hash_one(key);
        let guard = pin();
        let table = TableRef::<K, V>::from_raw(self.table.load(Ordering::Acquire, &guard).as_raw());
        let bucket = table.bucket(table.bucket_index(hash));

        let mut current = bucket.load(Ordering::Acquire, &guard).as_raw();
        while !current.is_null() {
            unsafe {
                let node = &*current;
                // Check hash first (integer compare is fast). Matching a
                // logically-deleted node is linearizable (the read happened
                // before the delete), so no tag check on the match path.
                if node.hash == hash && node.key.borrow() == key {
                    return Some(node.value.clone());
                }
                // Untag: the pointer may carry the deletion tag.
                current = untag(node.next.load(Ordering::Acquire, &guard).as_raw());
            }
        }
        None
    }

    /// Checks if the key exists.
    pub fn contains_key<Q>(&self, key: &Q) -> bool
    where
        K: Borrow<Q>,
        Q: Hash + Eq + ?Sized,
    {
        self.get(key).is_some()
    }

    /// Insert a key-value pair.
    pub fn insert(&self, key: K, value: V) -> Option<V> {
        let hash = self.hasher.hash_one(&key);
        let mut backoff = Backoff::new();
        // Count a new key exactly once across re-validation retries
        // (mirrors HopscotchMap: prevents both under-count, which causes
        // cascading resizes, and double-count).
        let mut counted = false;
        // The first successful op's previous value is the linearized result;
        // re-validation retries may replace a migrated clone of it.
        let mut result: Option<Option<V>> = None;

        'outer: loop {
            self.wait_for_resize();

            let guard = pin();
            let table_raw = self.table.load(Ordering::Acquire, &guard).as_raw();
            let table = TableRef::<K, V>::from_raw(table_raw);
            if self.resizing.load(Ordering::Acquire) {
                continue;
            }

            let bucket = table.bucket(table.bucket_index(hash));

            // 1. Search for existing key to update (snip-walk: physically
            //    unlink logically-deleted nodes as we pass them).
            let mut prev_link = bucket;
            let mut current = prev_link.load(Ordering::Acquire, &guard).as_raw();

            while !current.is_null() {
                unsafe {
                    let node = &*current;
                    let next = node.next.load(Ordering::Acquire, &guard).as_raw();

                    if is_tagged(next) {
                        // Logically deleted: snip it out (its tag owner has
                        // already retired it). On contention restart the scan.
                        if prev_link
                            .compare_exchange(
                                Shared::from_raw(current),
                                Shared::from_raw(untag(next)),
                                Ordering::AcqRel,
                                Ordering::Relaxed,
                                &guard,
                            )
                            .is_err()
                        {
                            backoff.spin();
                            continue 'outer;
                        }
                        current = untag(next);
                        continue;
                    }

                    if node.hash == hash && node.key == key {
                        // Replace: logically delete the old node (tag-CAS
                        // makes us its exclusive owner), then swing the
                        // predecessor to the replacement in one step.
                        let old_value = node.value.clone();
                        if node
                            .next
                            .compare_exchange(
                                Shared::from_raw(next),
                                Shared::from_raw(tagged(next)),
                                Ordering::AcqRel,
                                Ordering::Relaxed,
                                &guard,
                            )
                            .is_err()
                        {
                            // Someone else deleted/replaced it first.
                            backoff.spin();
                            continue 'outer;
                        }
                        // We own the old node now — we retire it, exactly once.
                        let new_node = Box::into_raw(Box::new(Node {
                            retired: RetiredNode::new(),
                            hash,
                            key: key.clone(),
                            value: value.clone(),
                            next: Atomic::new(next),
                        }));
                        let swapped = prev_link
                            .compare_exchange(
                                Shared::from_raw(current),
                                Shared::from_raw(new_node),
                                Ordering::AcqRel,
                                Ordering::Relaxed,
                                &guard,
                            )
                            .is_ok();
                        // SAFETY: tag ownership; Node is #[repr(C)] with
                        // RetiredNode at offset 0.
                        retire(current);
                        if result.is_none() {
                            result = Some(Some(old_value));
                        }
                        if !swapped {
                            // A helper snipped the old node before our swing;
                            // the replacement is not installed — retry the
                            // whole op (the removal already linearized).
                            drop(Box::from_raw(new_node));
                            backoff.spin();
                            continue 'outer;
                        }
                        // Dekker/SB fence: pairs with the matching fence in
                        // `try_resize`, placed right after it claims
                        // `resizing`. Without both fences, the swing-CAS
                        // above (a store) and the resizing/table loads just
                        // below are this thread's store-then-load half of a
                        // race against the resizer's own store-then-load
                        // half (claim `resizing`, then read this bucket
                        // during the sweep): plain AcqRel/Acquire lets both
                        // sides observe the pre-update value of the other's
                        // write (the classic store-buffering litmus test),
                        // so the sweep could miss this mutation while we
                        // simultaneously miss that a resize is in flight,
                        // silently orphaning the update in the table being
                        // retired. The fence forces this CAS and the
                        // resizer's `resizing` claim into the same SeqCst
                        // total order, so at least one side is guaranteed to
                        // observe the other. x86 TSO hides the gap (every
                        // CAS there is already a full fence); ARM's AcqRel
                        // is not.
                        fence(Ordering::SeqCst);
                        // Re-validate: if a resize started (or completed)
                        // since we loaded the table, the migration may have
                        // cloned the entry before our update — redo the op
                        // on the new table so the update is not lost.
                        if self.resizing.load(Ordering::SeqCst)
                            || self.table.load(Ordering::SeqCst, &guard).as_raw() != table_raw
                        {
                            continue 'outer;
                        }
                        return result.unwrap();
                    }

                    prev_link = &node.next;
                    current = next;
                }
            }

            // 2. Key not found. Insert at TAIL (prev_link). The CAS expects
            //    an untagged null, so it fails if the tail node was
            //    concurrently logically deleted.
            let new_node_ptr = Box::into_raw(Box::new(Node {
                retired: RetiredNode::new(),
                hash,
                key: key.clone(),
                value: value.clone(),
                next: Atomic::null(),
            }));

            match prev_link.compare_exchange(
                unsafe { Shared::from_raw(core::ptr::null_mut()) },
                unsafe { Shared::from_raw(new_node_ptr) },
                Ordering::Release,
                Ordering::Relaxed,
                &guard,
            ) {
                Ok(_) => {
                    if !counted {
                        counted = true;
                        self.count.fetch_add(1, Ordering::Relaxed);
                    }
                    if result.is_none() {
                        result = Some(None);
                    }
                    // Dekker/SB fence pairing with try_resize's claim-side
                    // fence (full justification on the replace path above):
                    // the tail-append CAS above is this thread's store-side
                    // of the same store-load race.
                    fence(Ordering::SeqCst);
                    // Re-validate against a concurrent migration (see above).
                    if self.resizing.load(Ordering::SeqCst)
                        || self.table.load(Ordering::SeqCst, &guard).as_raw() != table_raw
                    {
                        continue 'outer;
                    }

                    // Grow check (only when we actually added an entry).
                    let new_count = self.count.load(Ordering::Relaxed).max(0) as usize;
                    let capacity = table.capacity();
                    // Integer load-factor check: count/cap > 3/4.
                    if 4 * new_count > 3 * capacity {
                        drop(guard);
                        self.try_resize(capacity * 2);
                    }
                    return result.unwrap();
                }
                Err(_) => {
                    // Contention at the tail — retry the search/append loop.
                    unsafe {
                        drop(Box::from_raw(new_node_ptr));
                    }
                    backoff.spin();
                    continue 'outer;
                }
            }
        }
    }

    /// Insert a key-value pair only if the key does not exist.
    /// Returns `None` if inserted, `Some(existing_value)` if the key already exists.
    pub fn insert_if_absent(&self, key: K, value: V) -> Option<V> {
        let hash = self.hasher.hash_one(&key);
        let mut backoff = Backoff::new();
        let mut counted = false;

        'outer: loop {
            self.wait_for_resize();

            let guard = pin();
            let table_raw = self.table.load(Ordering::Acquire, &guard).as_raw();
            let table = TableRef::<K, V>::from_raw(table_raw);
            if self.resizing.load(Ordering::Acquire) {
                continue;
            }

            let bucket = table.bucket(table.bucket_index(hash));

            // 1. Search for existing key (snip-walk).
            let mut prev_link = bucket;
            let mut current = prev_link.load(Ordering::Acquire, &guard).as_raw();

            while !current.is_null() {
                unsafe {
                    let node = &*current;
                    let next = node.next.load(Ordering::Acquire, &guard).as_raw();

                    if is_tagged(next) {
                        if prev_link
                            .compare_exchange(
                                Shared::from_raw(current),
                                Shared::from_raw(untag(next)),
                                Ordering::AcqRel,
                                Ordering::Relaxed,
                                &guard,
                            )
                            .is_err()
                        {
                            backoff.spin();
                            continue 'outer;
                        }
                        current = untag(next);
                        continue;
                    }

                    if node.hash == hash && node.key == key {
                        // Found on a retry. This may be our own migrated
                        // clone OR another caller's entry that landed while
                        // the table swapped - the two are indistinguishable
                        // here, and reporting None for someone else's entry
                        // would admit a second winner. Return the canonical
                        // value either way (for our own clone that is a
                        // clone of the value we just inserted), matching
                        // HopscotchMap's retry semantics: under a concurrent
                        // resize a successful insert may report
                        // Some(its own value); callers must treat the
                        // returned value as canonical.
                        return Some(node.value.clone());
                    }
                    prev_link = &node.next;
                    current = next;
                }
            }

            // 2. Key not found (or our pre-migration insert was not carried
            //    over) — insert at TAIL. Untagged-null expectation makes the
            //    CAS fail if the tail was concurrently logically deleted.
            let new_node_ptr = Box::into_raw(Box::new(Node {
                retired: RetiredNode::new(),
                hash,
                key: key.clone(),
                value: value.clone(),
                next: Atomic::null(),
            }));

            match prev_link.compare_exchange(
                unsafe { Shared::from_raw(core::ptr::null_mut()) },
                unsafe { Shared::from_raw(new_node_ptr) },
                Ordering::Release,
                Ordering::Relaxed,
                &guard,
            ) {
                Ok(_) => {
                    if !counted {
                        counted = true;
                        self.count.fetch_add(1, Ordering::Relaxed);
                    }
                    // Dekker/SB fence pairing with try_resize's claim-side
                    // fence (full justification in HashMap::insert's replace
                    // path): the tail-append CAS above is this thread's
                    // store-side of the same store-load race.
                    fence(Ordering::SeqCst);
                    // Re-validate against a concurrent migration.
                    if self.resizing.load(Ordering::SeqCst)
                        || self.table.load(Ordering::SeqCst, &guard).as_raw() != table_raw
                    {
                        continue 'outer;
                    }

                    let new_count = self.count.load(Ordering::Relaxed).max(0) as usize;
                    let capacity = table.capacity();
                    // Integer load-factor check: count/cap > 3/4.
                    if 4 * new_count > 3 * capacity {
                        drop(guard);
                        self.try_resize(capacity * 2);
                    }
                    return None;
                }
                Err(actual_val) => {
                    // Contention at the tail.
                    unsafe {
                        let appended_ptr = actual_val.as_raw();
                        drop(Box::from_raw(new_node_ptr));
                        if !is_tagged(appended_ptr) && !appended_ptr.is_null() {
                            let appended = &*appended_ptr;
                            if appended.hash == hash && appended.key == key {
                                // Race lost, key exists now. Same canonical-
                                // value rule as the retry-found path above:
                                // even if a pre-migration attempt of ours
                                // inserted, the surviving entry is what
                                // every caller must converge on.
                                return Some(appended.value.clone());
                            }
                        }
                    }
                    backoff.spin();
                    continue 'outer;
                }
            }
        }
    }

    /// Returns the value corresponding to the key, or inserts the given value if the key is not present.
    ///
    /// This is linearizable: concurrent callers for the same key are guaranteed to
    /// agree on which value was inserted (exactly one thread's CAS succeeds at the
    /// list tail, and all others see that node on retry).
    pub fn get_or_insert(&self, key: K, value: V) -> V {
        match self.insert_if_absent(key, value.clone()) {
            Some(existing) => existing,
            None => value,
        }
    }

    /// Remove a key-value pair.
    pub fn remove<Q>(&self, key: &Q) -> Option<V>
    where
        K: Borrow<Q>,
        Q: Hash + Eq + ?Sized,
    {
        let hash = self.hasher.hash_one(key);
        let mut backoff = Backoff::new();
        // First successful removal's value is the linearized result;
        // re-validation retries only evict migrated clones.
        let mut result: Option<V> = None;

        'outer: loop {
            self.wait_for_resize();

            let guard = pin();
            let table_raw = self.table.load(Ordering::Acquire, &guard).as_raw();
            let table = TableRef::<K, V>::from_raw(table_raw);
            if self.resizing.load(Ordering::Acquire) {
                continue;
            }

            let bucket = table.bucket(table.bucket_index(hash));

            let mut prev_link = bucket;
            let mut current = prev_link.load(Ordering::Acquire, &guard).as_raw();

            while !current.is_null() {
                unsafe {
                    let node = &*current;
                    let next = node.next.load(Ordering::Acquire, &guard).as_raw();

                    if is_tagged(next) {
                        // Logically deleted by someone else: snip and move on.
                        if prev_link
                            .compare_exchange(
                                Shared::from_raw(current),
                                Shared::from_raw(untag(next)),
                                Ordering::AcqRel,
                                Ordering::Relaxed,
                                &guard,
                            )
                            .is_err()
                        {
                            backoff.spin();
                            continue 'outer;
                        }
                        current = untag(next);
                        continue;
                    }

                    if node.hash == hash && node.key.borrow() == key {
                        let old_value = node.value.clone();

                        // Logical delete: tag the victim's next. The tag
                        // owner — and only the tag owner — retires the node,
                        // and the tag makes concurrent tail-inserts onto
                        // this node fail.
                        if node
                            .next
                            .compare_exchange(
                                Shared::from_raw(next),
                                Shared::from_raw(tagged(next)),
                                Ordering::AcqRel,
                                Ordering::Relaxed,
                                &guard,
                            )
                            .is_err()
                        {
                            backoff.spin();
                            continue 'outer;
                        }

                        // Physical unlink (best effort — if it fails, a
                        // later walker snips it).
                        let _ = prev_link.compare_exchange(
                            Shared::from_raw(current),
                            Shared::from_raw(next),
                            Ordering::AcqRel,
                            Ordering::Relaxed,
                            &guard,
                        );

                        // SAFETY: tag ownership; Node is #[repr(C)] with
                        // RetiredNode at offset 0.
                        retire(current);
                        if result.is_none() {
                            result = Some(old_value);
                        }

                        // Single atomic decrement (signed counter — cannot
                        // wrap; a transient negative just clamps to 0 below).
                        let new_count =
                            (self.count.fetch_sub(1, Ordering::Relaxed) - 1).max(0) as usize;
                        // Integer load-factor check: count/cap < 1/4.
                        let shrink_to = (4 * new_count < table.capacity()
                            && table.capacity() > self.floor)
                            .then_some(table.capacity() / 2);

                        // Dekker/SB fence pairing with try_resize's
                        // claim-side fence (full justification in
                        // HashMap::insert's replace path): the tag-CAS above
                        // is this thread's store-side of the same
                        // store-load race, so the same "sweep misses the
                        // mutation and we miss the resize" window applies
                        // here, and the resurrection this re-validation
                        // exists to catch would otherwise go undetected.
                        fence(Ordering::SeqCst);
                        // Re-validate: a concurrent migration may have cloned
                        // this entry into the new table before we deleted it
                        // here — redo the removal on the current table so the
                        // key does not resurrect.
                        if self.resizing.load(Ordering::SeqCst)
                            || self.table.load(Ordering::SeqCst, &guard).as_raw() != table_raw
                        {
                            continue 'outer;
                        }

                        if let Some(cap) = shrink_to {
                            drop(guard);
                            self.try_resize(cap);
                        }
                        return result;
                    }

                    prev_link = &node.next;
                    current = next;
                }
            }

            // Key not present in the current table.
            return result;
        }
    }

    /// Remove **all** nodes matching `key`, returning the most recent value
    /// if the key was present.
    ///
    /// [`remove`](Self::remove) unlinks only the first matching entry.
    /// Insert/remove races can transiently leave more than one entry for
    /// the same key ("versions"); after a plain `remove()` an older version
    /// would become visible again. This method keeps removing until a full
    /// scan finds no match, so the key is guaranteed absent at the
    /// linearization point of the final scan.
    ///
    /// Use `remove()` for single-version removal semantics and
    /// `force_remove()` when the key must be fully evicted.
    ///
    /// Note: a concurrent `insert` of the same key can land after the final
    /// scan, as with any removal under contention.
    pub fn force_remove<Q>(&self, key: &Q) -> Option<V>
    where
        K: Borrow<Q>,
        Q: Hash + Eq + ?Sized,
    {
        let mut newest = None;
        loop {
            match self.remove(key) {
                Some(v) => {
                    // The first removal unlinks the first match in scan
                    // order — the live (most recent) version.
                    if newest.is_none() {
                        newest = Some(v);
                    }
                }
                None => return newest,
            }
        }
    }

    /// Clear the map.
    pub fn clear(&self) {
        // Take the resize latch so the table cannot be swapped (and no
        // writer is mid-migration) while we unlink the chains.
        while self
            .resizing
            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
            .is_err()
        {
            resize_spin_hint();
        }

        let guard = pin();
        let table = TableRef::<K, V>::from_raw(self.table.load(Ordering::Acquire, &guard).as_raw());

        for i in 0..table.capacity() {
            let bucket = table.bucket(i);
            loop {
                let head = bucket.load(Ordering::Acquire, &guard);
                if head.is_null() {
                    break;
                }

                // Try to unlink the whole chain at once
                match bucket.compare_exchange(
                    head,
                    unsafe { Shared::from_raw(core::ptr::null_mut()) },
                    Ordering::Release,
                    Ordering::Relaxed,
                    &guard,
                ) {
                    Ok(_) => {
                        // Retire the chain's live nodes. Tagged nodes were
                        // already retired by their tag owners — skip them.
                        unsafe {
                            let mut current = head.as_raw();
                            while !current.is_null() {
                                let next = (*current).next.load(Ordering::Relaxed, &guard).as_raw();
                                if !is_tagged(next) {
                                    // SAFETY: allocated via Box::into_raw;
                                    // Node is #[repr(C)], RetiredNode first.
                                    retire(current);
                                }
                                current = untag(next);
                            }
                        }
                        break;
                    }
                    Err(_) => {
                        // Contention, retry
                        continue;
                    }
                }
            }
        }

        self.count.store(0, Ordering::Release);
        self.resizing.store(false, Ordering::Release);
    }

    /// Returns true if the map is empty.
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Returns the number of elements in the map.
    ///
    /// O(1): maintained by insert/remove. Approximate while concurrent
    /// updates are in flight (exact in quiescence), like `HopscotchMap`.
    pub fn len(&self) -> usize {
        self.count.load(Ordering::Relaxed).max(0) as usize
    }

    /// Resize the table to `new_capacity` buckets (single resizer wins).
    ///
    /// Clones every entry into a new table, swaps the table pointer, then
    /// retires the old table. The old table's destructor frees whatever
    /// nodes remain in its chains at reclamation time — entries removed or
    /// replaced in the meantime were unlinked and retired individually, so
    /// nothing is freed twice and nothing leaks.
    fn try_resize(&self, new_capacity: usize) {
        if self
            .resizing
            .compare_exchange(false, true, Ordering::SeqCst, Ordering::Relaxed)
            .is_err()
        {
            return;
        }

        // Dekker/SB fence: pairs with the matching fence every writer
        // (insert/insert_if_absent/remove) executes right before its
        // resizing/table re-validation check. This claim-CAS and the
        // migration sweep's bucket reads below are this thread's
        // store-then-load half of the same store-load race a writer's
        // data-mutating CAS and its own re-validation load form the other
        // half of; without a fence on both sides, AcqRel/Acquire permits
        // both this sweep and that writer's check to observe the pre-update
        // value of the other's write (the classic store-buffering litmus
        // test), silently losing the writer's update to the table being
        // retired. x86 TSO hides the gap (every CAS there is already a full
        // fence); ARM's AcqRel is not.
        fence(Ordering::SeqCst);

        let new_capacity = new_capacity
            .next_power_of_two()
            .max(MIN_CAPACITY)
            .max(self.floor);
        let guard = pin();
        let old_raw = self.table.load(Ordering::Acquire, &guard).as_raw();
        let old_table = TableRef::<K, V>::from_raw(old_raw);

        if old_table.capacity() == new_capacity {
            self.resizing.store(false, Ordering::Release);
            return;
        }

        let new_table = TableRef::<K, V>::alloc(new_capacity);

        // Migrate: clone every live entry (logically-deleted nodes — tagged
        // next — are skipped). We are the only writer of the new table (it
        // is unpublished), so plain stores are sufficient.
        for i in 0..old_table.capacity() {
            let bucket = old_table.bucket(i);
            let mut current = bucket.load(Ordering::Acquire, &guard).as_raw();
            while !current.is_null() {
                let node = unsafe { &*current };
                let next = node.next.load(Ordering::Acquire, &guard).as_raw();
                if !is_tagged(next) {
                    let dst = new_table.bucket(new_table.bucket_index(node.hash));
                    let head = dst.load(Ordering::Relaxed, &guard);
                    let clone = Box::into_raw(Box::new(Node {
                        retired: RetiredNode::new(),
                        hash: node.hash,
                        key: node.key.clone(),
                        value: node.value.clone(),
                        next: Atomic::new(head.as_raw()),
                    }));
                    dst.store(unsafe { Shared::from_raw(clone) }, Ordering::Relaxed);
                }
                current = untag(next);
            }
        }

        match self.table.compare_exchange(
            unsafe { Shared::from_raw(old_raw) },
            unsafe { Shared::from_raw(new_table.as_raw()) },
            Ordering::Release,
            Ordering::Relaxed,
            &guard,
        ) {
            Ok(_) => {
                // Retire the old table through its proxy (built eagerly
                // back when this table was allocated, see `TableRef::alloc`,
                // so its birth_epoch predates every straggler that could
                // have observed this table): reclamation (which frees the
                // remaining chains and the allocation) is deferred until
                // every guard that could observe it is gone.
                let proxy = old_table.take_proxy();
                // SAFETY: TableProxy is #[repr(C)] with RetiredNode at
                // offset 0, allocated via Box::into_raw.
                unsafe { retire(proxy) };
            }
            Err(_) => {
                // Table changed under us (cannot normally happen — we hold
                // the resize latch). Discard the unpublished new table.
                unsafe { new_table.free() };
            }
        }

        self.resizing.store(false, Ordering::Release);
    }

    /// Returns an iterator over the map entries.
    /// Yields (K, V) clones from a table snapshot taken at creation.
    pub fn iter(&self) -> Iter<'_, K, V, S> {
        let guard = pin();
        let table = TableRef::<K, V>::from_raw(self.table.load(Ordering::Acquire, &guard).as_raw());
        Iter {
            _map: self,
            table,
            bucket_idx: 0,
            current: core::ptr::null(),
            guard,
        }
    }

    /// Returns an iterator over the map keys.
    /// Yields K clones.
    pub fn keys(&self) -> Keys<'_, K, V, S> {
        Keys { iter: self.iter() }
    }

    /// Returns an iterator over the map values (clones `V`).
    pub fn values(&self) -> Values<'_, K, V, S> {
        Values { iter: self.iter() }
    }

    /// Insert all `(K, V)` pairs from `iter`. Takes `&self` (concurrent map).
    pub fn extend<I: IntoIterator<Item = (K, V)>>(&self, iter: I) {
        for (k, v) in iter {
            self.insert(k, v);
        }
    }

    /// Get the underlying hasher itself.
    pub fn hasher(&self) -> &S {
        &self.hasher
    }
}

/// Iterator over HashMap entries.
///
/// Field ordering matters for drop safety.
/// Rust drops struct fields in declaration order.
/// The `guard` must be dropped *after* `current`/`table` so that the epoch
/// pin covering the snapshot is not released before we're done with the raw
/// pointers.
pub struct Iter<'a, K: 'static, V: 'static, S> {
    _map: &'a HashMap<K, V, S>,
    table: TableRef<K, V>,
    bucket_idx: usize,
    current: *const Node<K, V>,
    guard: kovan::Guard,
}

impl<'a, K, V, S> Iterator for Iter<'a, K, V, S>
where
    K: Clone,
    V: Clone,
{
    type Item = (K, V);

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            if !self.current.is_null() {
                unsafe {
                    let node = &*self.current;
                    let next = node.next.load(Ordering::Acquire, &self.guard).as_raw();
                    // Advance current (the pointer may carry a deletion tag).
                    self.current = untag(next);
                    if is_tagged(next) {
                        // Logically deleted — do not yield.
                        continue;
                    }
                    return Some((node.key.clone(), node.value.clone()));
                }
            }

            // Move to next bucket
            let table = self.table;
            if self.bucket_idx >= table.capacity() {
                return None;
            }

            let bucket = table.bucket(self.bucket_idx);
            self.bucket_idx += 1;
            self.current = bucket.load(Ordering::Acquire, &self.guard).as_raw();
        }
    }
}

/// Iterator over HashMap keys.
pub struct Keys<'a, K: 'static, V: 'static, S> {
    iter: Iter<'a, K, V, S>,
}

impl<'a, K, V, S> Iterator for Keys<'a, K, V, S>
where
    K: Clone,
    V: Clone,
{
    type Item = K;

    fn next(&mut self) -> Option<Self::Item> {
        self.iter.next().map(|(k, _)| k)
    }
}

impl<'a, K, V, S> IntoIterator for &'a HashMap<K, V, S>
where
    K: Hash + Eq + Clone + 'static,
    V: Clone + 'static,
    S: BuildHasher,
{
    type Item = (K, V);
    type IntoIter = Iter<'a, K, V, S>;

    fn into_iter(self) -> Self::IntoIter {
        self.iter()
    }
}

/// Iterator over HashMap values (clones `V`).
pub struct Values<'a, K: 'static, V: 'static, S> {
    iter: Iter<'a, K, V, S>,
}

impl<'a, K, V, S> Iterator for Values<'a, K, V, S>
where
    K: Clone,
    V: Clone,
{
    type Item = V;

    #[inline]
    fn next(&mut self) -> Option<V> {
        self.iter.next().map(|(_, v)| v)
    }
}

/// Owned iterator yielding `(K, V)` by value — moves out of the nodes, no
/// clone. Consuming the map gives exclusive access, so no guard protection of
/// the yielded values is needed.
pub struct IntoIter<K: 'static, V: 'static> {
    table: TableRef<K, V>,
    bucket_idx: usize,
    current: *mut Node<K, V>,
    guard: kovan::Guard,
}

impl<K, V> Iterator for IntoIter<K, V> {
    type Item = (K, V);

    fn next(&mut self) -> Option<(K, V)> {
        loop {
            if !self.current.is_null() {
                let node = self.current;
                let next = unsafe { (*node).next.load(Ordering::Acquire, &self.guard).as_raw() };
                self.current = untag(next);
                if is_tagged(next) {
                    continue; // logically deleted, owned by kovan
                }
                // Move K and V out, then free the shell without running drop.
                let k = unsafe { core::ptr::read(&(*node).key) };
                let v = unsafe { core::ptr::read(&(*node).value) };
                unsafe {
                    alloc::alloc::dealloc(
                        node as *mut u8,
                        core::alloc::Layout::new::<Node<K, V>>(),
                    );
                }
                return Some((k, v));
            }
            if self.bucket_idx >= self.table.capacity() {
                return None;
            }
            let bucket = self.table.bucket(self.bucket_idx);
            self.bucket_idx += 1;
            self.current = bucket.load(Ordering::Acquire, &self.guard).as_raw();
        }
    }
}

impl<K, V> Drop for IntoIter<K, V> {
    fn drop(&mut self) {
        while self.next().is_some() {} // drop remaining live K/V + free shells
        unsafe { self.table.free_array_only() };
    }
}

impl<K, V, S> IntoIterator for HashMap<K, V, S>
where
    K: 'static,
    V: 'static,
{
    type Item = (K, V);
    type IntoIter = IntoIter<K, V>;

    fn into_iter(self) -> IntoIter<K, V> {
        let mut me = core::mem::ManuallyDrop::new(self);
        let guard = pin();
        let table = TableRef::<K, V>::from_raw(me.table.load(Ordering::Relaxed, &guard).as_raw());
        // Suppress HashMap::drop (we own the table now); drop only the hasher —
        // the remaining fields are atomics / usize / ZST marker.
        unsafe { core::ptr::drop_in_place(&mut me.hasher) };
        IntoIter {
            table,
            bucket_idx: 0,
            current: core::ptr::null_mut(),
            guard,
        }
    }
}

impl<K, V, S> core::iter::FromIterator<(K, V)> for HashMap<K, V, S>
where
    K: Hash + Eq + Clone + Send + 'static,
    V: Clone + Send + 'static,
    S: BuildHasher + Default,
{
    fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
        let map = Self::with_capacity_and_hasher(MIN_CAPACITY, S::default());
        for (k, v) in iter {
            map.insert(k, v);
        }
        map
    }
}

#[cfg(feature = "std")]
impl<K, V> Default for HashMap<K, V, FixedState>
where
    K: Hash + Eq + Clone + 'static,
    V: Clone + 'static,
{
    fn default() -> Self {
        Self::new()
    }
}

// SAFETY: HashMap is Send if K, V, S are Send (moving ownership between threads).
// HashMap is Sync if K, V, S are Send+Sync. The stronger bound on Sync is needed
// because concurrent `get()` calls clone V through a `&V` reference across threads;
// if V were Send but not Sync, sharing `&HashMap` could transmit a non-Sync V reference
// to another thread via clone(), violating thread-safety.
unsafe impl<K: Send, V: Send, S: Send> Send for HashMap<K, V, S> {}
unsafe impl<K: Send + Sync, V: Send + Sync, S: Send + Sync> Sync for HashMap<K, V, S> {}

impl<K: 'static, V: 'static, S> Drop for HashMap<K, V, S> {
    fn drop(&mut self) {
        // SAFETY: `drop(&mut self)` guarantees exclusive ownership — no concurrent
        // readers can exist.  Rust's type system enforces this: `Iter<'a, …>` borrows
        // `&'a HashMap`, so it cannot outlive the `HashMap`.  The Table's destructor
        // frees its chains.
        let guard = pin();
        let table = TableRef::<K, V>::from_raw(self.table.load(Ordering::Relaxed, &guard).as_raw());
        drop(guard);
        // SAFETY: exclusive access; frees remaining chains + the allocation.
        unsafe { table.free() };

        // Flush nodes/tables previously retired by concurrent operations
        kovan::flush();
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_insert_and_get() {
        let map = HashMap::new();
        assert_eq!(map.insert(1, 100), None);
        assert_eq!(map.get(&1), Some(100));
        assert_eq!(map.get(&2), None);
    }

    #[test]
    fn test_insert_replace() {
        let map = HashMap::new();
        assert_eq!(map.insert(1, 100), None);
        assert_eq!(map.insert(1, 200), Some(100));
        assert_eq!(map.get(&1), Some(200));
    }

    #[test]
    fn test_grow() {
        let map = HashMap::with_capacity(64);
        assert_eq!(map.capacity(), 64);
        for i in 0..1000u64 {
            map.insert(i, i * 2);
        }
        assert!(map.capacity() > 64, "map should have grown");
        for i in 0..1000u64 {
            assert_eq!(map.get(&i), Some(i * 2));
        }
        assert_eq!(map.len(), 1000);
    }

    #[test]
    fn test_shrink() {
        let map = HashMap::with_capacity(64);
        for i in 0..1000u64 {
            map.insert(i, i);
        }
        let grown = map.capacity();
        assert!(grown > 64);
        for i in 0..1000u64 {
            map.remove(&i);
        }
        assert!(
            map.capacity() < grown,
            "map should have shrunk (capacity {} -> {})",
            grown,
            map.capacity()
        );
        assert!(map.capacity() >= 64, "never below the initial capacity");
        assert_eq!(map.len(), 0);
    }

    #[test]
    fn test_no_shrink_below_floor() {
        let map = HashMap::with_capacity(4096);
        for i in 0..100u64 {
            map.insert(i, i);
        }
        for i in 0..100u64 {
            map.remove(&i);
        }
        assert_eq!(map.capacity(), 4096, "floor preserves sizing intent");
    }

    #[test]
    fn test_concurrent_inserts() {
        use alloc::sync::Arc;
        extern crate std;
        use std::thread;

        let map = Arc::new(HashMap::new());
        let mut handles = alloc::vec::Vec::new();

        for thread_id in 0..4 {
            let map_clone = Arc::clone(&map);
            let handle = thread::spawn(move || {
                for i in 0..1000 {
                    let key = thread_id * 1000 + i;
                    map_clone.insert(key, key * 2);
                }
            });
            handles.push(handle);
        }

        for handle in handles {
            handle.join().unwrap();
        }

        for thread_id in 0..4 {
            for i in 0..1000 {
                let key = thread_id * 1000 + i;
                assert_eq!(map.get(&key), Some(key * 2));
            }
        }
    }

    #[test]
    fn test_concurrent_grow() {
        use alloc::sync::Arc;
        extern crate std;
        use std::thread;

        let map = Arc::new(HashMap::with_capacity(64));
        let mut handles = alloc::vec::Vec::new();

        for thread_id in 0..8u64 {
            let map_clone = Arc::clone(&map);
            handles.push(thread::spawn(move || {
                for i in 0..2000u64 {
                    let key = thread_id * 10_000 + i;
                    map_clone.insert(key, key);
                }
            }));
        }
        for handle in handles {
            handle.join().unwrap();
        }

        for thread_id in 0..8u64 {
            for i in 0..2000u64 {
                let key = thread_id * 10_000 + i;
                assert_eq!(map.get(&key), Some(key), "lost key {key} during growth");
            }
        }
        assert!(map.capacity() >= 16_000);
    }
}