1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
//! Memory handling for the LC-3 simulator.
//! 
//! This module consists of:
//! - [`Word`]: A mutable memory location.
//! - [`MemArray`]: The memory array.
//! - [`RegFile`]: The register file.

use rand::rngs::StdRng;
use rand::Rng;

use crate::ast::Reg;

/// A memory location that can be read and written to.
/// 
/// # Reading
/// 
/// A word's value can be read with:
/// - [`Word::get`] to directly access the value, ignoring any initialization state
/// - [`Word::get_if_init`] to directly access the value after verifying initialization state
/// 
/// See the respective functions for more details.
/// 
/// Both functions return the unsigned representation of the word.
/// If needed, this can be converted to a signed integer with typical `as` casting (`data as i16`).
/// 
/// # Writing
/// 
/// A word can be written into with a value or with another word:
/// - [`Word::set`] to read a value into this word
/// - [`Word::set_if_init`] to read a word into this word
/// 
/// [`Word::set_if_init`] may be more useful in situations where initialization state needs to be preserved
/// or when it needs to be verified.
/// 
/// See the respective functions for more details.
/// 
/// Words can also be written to by applying assign operations (e.g., add, sub, and, etc.).
/// All arithmetic operations that can be applied to words are assumed to be wrapping.
/// See those implementations for more details.
/// 
/// # Initialization
/// 
/// Internally, each memory location keeps track of two fields:
/// 1. its data (i.e., the value stored at this location)
/// 2. which bits of its data are truly "initialized" (as in the program knows what values are present there)
/// 
/// This second field is not used except for when the simulator is set to strict mode.
/// Then, this second field is leveraged to detect if uninitialized memory is being
/// written to places it shouldn't be (e.g., PC, addresses, registers and memory).
/// 
/// When a `Word` is created for memory/register files (i.e., via [`Word::new_uninit`]), 
/// it is created with the initialization bits set to fully uninitialized.
/// The data associated with this `Word` is decided by the creation strategy 
/// (see [`super::MachineInitStrategy`] for details).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Word {
    data: u16,
    init: u16
}

const NO_BITS:  u16 = 0;
const ALL_BITS: u16 = 1u16.wrapping_neg();

impl Word {
    /// Creates a new word that is considered uninitialized.
    pub fn new_uninit<F: WordFiller + ?Sized>(fill: &mut F) -> Self {
        Self {
            data: fill.generate(),
            init: NO_BITS,
        }
    }
    /// Creates a new word that is initialized with a given data value.
    pub fn new_init(data: u16) -> Self {
        Self {
            data,
            init: ALL_BITS,
        }
    }

    /// Reads the word, returning its unsigned representation.
    /// 
    /// The data is returned without checking for initialization state.
    /// If the initialization state should be checked before trying to query the data,
    /// then [`Word::get_if_init`] should be used instead.
    pub fn get(&self) -> u16 {
        self.data
    }
    /// Reads the word if it is properly initialized under strictness requirements, returning its unsigned representation.
    /// 
    /// This function is more cognizant of word initialization than [`Word::get`].
    /// - In non-strict mode (`strict == false`), this function unconditionally allows access to the data regardless of initialization state.
    /// - In strict mode (`strict == true`), this function verifies `self` is fully initialized, raising the provided error if not.
    pub fn get_if_init<E>(&self, strict: bool, err: E) -> Result<u16, E> {
        match !strict || self.is_init() {
            true  => Ok(self.data),
            false => Err(err)
        }
    }

    /// Writes to the word.
    /// 
    /// This sets the word to the `data` value assuming it is **fully** initialized
    /// and correspondingly sets the initialization state to be fully initialized.
    /// 
    /// If the initialization state of the `data` value should be checked before
    /// trying to write to the word, then [`Word::set_if_init`] should be used instead.
    pub fn set(&mut self, data: u16) {
        self.data = data;
        self.init = ALL_BITS;
    }
    /// Writes to the word while verifying the data stored is properly initialized under strictness requirements.
    /// 
    /// This function is more cognizant of word initialization than [`Word::set`].
    /// - In non-strict mode, this function preserves the initialization data of the `data` argument.
    /// - In strict mode, this function verifies `data` is fully initialized, raising the provided error if not.
    pub fn set_if_init<E>(&mut self, data: Word, strict: bool, err: E) -> Result<(), E> {
        match !strict || data.is_init() {
            true => {
                *self = data;
                Ok(())
            },
            false => Err(err)
        }
    }

    /// Checks that a word is fully initialized
    pub fn is_init(&self) -> bool {
        self.init == ALL_BITS
    }
    /// Clears initialization of this word.
    pub fn clear_init(&mut self) {
        self.init = NO_BITS;
    }
}
impl From<u16> for Word {
    /// Creates a fully initialized word.
    fn from(value: u16) -> Self {
        Word::new_init(value)
    }
}
impl From<i16> for Word {
    /// Creates a fully initialized word.
    fn from(value: i16) -> Self {
        Word::new_init(value as u16)
    }
}

impl std::ops::Not for Word {
    type Output = Word;

    /// Inverts the data on this word, preserving any initialization state.
    fn not(self) -> Self::Output {
        // Initialization state should stay the same after this.
        let Self { data, init } = self;
        Self { data: !data, init }
    }
}


impl std::ops::Add for Word {
    type Output = Word;

    /// Adds two words together (wrapping if overflow occurs).
    /// 
    /// If the two words are fully initialized, 
    /// the resulting word will also be fully initialized.
    /// Otherwise, the resulting word is fully uninitialized.
    fn add(self, rhs: Self) -> Self::Output {
        let Self { data: ldata, init: linit } = self;
        let Self { data: rdata, init: rinit } = rhs;

        if rdata == 0 && rinit == ALL_BITS { return self; }
        if ldata == 0 && linit == ALL_BITS { return rhs; }

        let data = ldata.wrapping_add(rdata);

        // Close enough calculation:
        // If both are fully init, consider this word fully init.
        // Otherwise, consider it fully uninit.
        let init = match linit == ALL_BITS && rinit == ALL_BITS {
            true  => ALL_BITS,
            false => NO_BITS,
        };

        Self { data, init }
    }
}
impl std::ops::AddAssign for Word {
    fn add_assign(&mut self, rhs: Self) {
        *self = *self + rhs;
    }
}
impl std::ops::AddAssign<u16> for Word {
    /// Increments the word by the provided value.
    /// 
    /// If the word was fully initialized,
    /// its updated value is also fully initialized.
    /// Otherwise, the resulting word is fully uninitialized.
    fn add_assign(&mut self, rhs: u16) {
        *self = *self + Word::from(rhs);
    }
}
impl std::ops::AddAssign<i16> for Word {
    /// Increments the word by the provided value.
    /// 
    /// If the word was fully initialized,
    /// its updated value is also fully initialized.
    /// Otherwise, the resulting word is fully uninitialized.
    fn add_assign(&mut self, rhs: i16) {
        *self = *self + Word::from(rhs);
    }
}


impl std::ops::Sub for Word {
    type Output = Word;

    /// Subtracts two words together (wrapping if overflow occurs).
    /// 
    /// If the two words are fully initialized, 
    /// the resulting word will also be fully initialized.
    /// Otherwise, the resulting word is fully uninitialized.
    fn sub(self, rhs: Self) -> Self::Output {
        let Self { data: ldata, init: linit } = self;
        let Self { data: rdata, init: rinit } = rhs;

        // This is (self - 0) == self.
        if rdata == 0 && rinit == ALL_BITS { return self; }

        let data = ldata.wrapping_sub(rdata);
        // Very lazy initialization scheme.
        // If both are fully init, consider this word fully init.
        // Otherwise, consider it fully uninit.
        let init = match linit == ALL_BITS && rinit == ALL_BITS {
            true  => ALL_BITS,
            false => NO_BITS,
        };

        Self { data, init }
    }
}
impl std::ops::SubAssign for Word {
    fn sub_assign(&mut self, rhs: Self) {
        *self = *self - rhs;
    }
}
impl std::ops::SubAssign<u16> for Word {
    /// Decrements the word by the provided value.
    /// 
    /// If the word was fully initialized,
    /// its updated value is also fully initialized.
    /// Otherwise, the resulting word is fully uninitialized.
    fn sub_assign(&mut self, rhs: u16) {
        *self = *self - Word::new_init(rhs);
    }
}
impl std::ops::SubAssign<i16> for Word {
    /// Decrements the word by the provided value.
    /// 
    /// If the word was fully initialized,
    /// its updated value is also fully initialized.
    /// Otherwise, the resulting word is fully uninitialized.
    fn sub_assign(&mut self, rhs: i16) {
        *self = *self - Word::new_init(rhs as _);
    }
}


impl std::ops::BitAnd for Word {
    type Output = Word;

    /// Applies a bitwise AND across two words.
    /// 
    /// This will also compute the correct initialization
    /// for the resulting word, taking into account bit clearing.
    fn bitand(self, rhs: Self) -> Self::Output {
        let Self { data: ldata, init: linit } = self;
        let Self { data: rdata, init: rinit } = rhs;

        let data = ldata & rdata;
        // A given bit of the result is init if:
        // - both the lhs and rhs bits are init
        // - either of the bits are data: 0, init: 1
        let init = (linit & rinit) | (!ldata & linit) | (!rdata & rinit);

        Self { data, init }
    }
}
impl std::ops::BitAndAssign for Word {
    fn bitand_assign(&mut self, rhs: Self) {
        *self = *self & rhs;
    }
}

/// Trait that describes types that can be used to create the data for an uninitialized [`Word`].
/// 
/// This is used with [`Word::new_uninit`] to create uninitialized Words.
pub trait WordFiller {
    /// Generate a word of data.
    fn generate(&mut self) -> u16;

    /// Generates an array of [`Word`]s.
    fn generate_array<const N: usize>(&mut self) -> [Word; N] {
        std::array::from_fn(|_| Word::new_uninit(self))
    }
    /// Generates a heap-allocated array of [`Word`]s.
    fn generate_boxed_array<const N: usize>(&mut self) -> Box<[Word; N]> {
        std::iter::repeat_with(|| Word::new_uninit(self))
            .take(N)
            .collect::<Box<_>>()
            .try_into()
            .unwrap_or_else(|_| unreachable!("iterator should have had {N} elements"))
    }
}
impl WordFiller for () {
    /// This creates unseeded, non-deterministic values.
    fn generate(&mut self) -> u16 {
        rand::random()
    }
}
impl WordFiller for u16 {
    /// Sets each word to the given value.
    fn generate(&mut self) -> u16 {
        *self
    }
}
impl WordFiller for StdRng {
    /// This creates values from the standard random number generator.
    /// 
    /// This can be used to create deterministic, seeded values.
    fn generate(&mut self) -> u16 {
        self.gen()
    }
}
/// Strategy used to initialize the `reg_file` and `mem` of the [`Simulator`].
/// 
/// These are used to set the initial state of the memory and registers,
/// which will be treated as uninitialized until they are properly initialized
/// by program code.
/// 
/// [`Simulator`]: super::Simulator
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
pub enum MachineInitStrategy {
    /// Initializes each word randomly and non-deterministically.
    #[default]
    Unseeded,

    /// Initializes each word randomly and deterministically.
    Seeded {
        /// The seed the RNG was initialized with.
        seed: u64
    },

    /// Initializes each word to a known value.
    Known {
        /// The value to initialize each value to.
        value: u16
    }
}

impl MachineInitStrategy {
    pub(super) fn generator(&self) -> impl WordFiller {
        use rand::SeedableRng;

        match self {
            MachineInitStrategy::Unseeded => WCGenerator::Unseeded,
            MachineInitStrategy::Seeded { seed } => WCGenerator::Seeded(Box::new(StdRng::seed_from_u64(*seed))),
            MachineInitStrategy::Known { value } => WCGenerator::Known(*value),
        }
    }
}

enum WCGenerator {
    Unseeded,
    Seeded(Box<rand::rngs::StdRng>),
    Known(u16)
}
impl WordFiller for WCGenerator {
    fn generate(&mut self) -> u16 {
        match self {
            WCGenerator::Unseeded  => ().generate(),
            WCGenerator::Seeded(r) => r.generate(),
            WCGenerator::Known(k)  => k.generate(),
        }
    }
}

/// Memory array.
/// 
/// This can be addressed with any `u16` (16-bit address).
/// 
/// This memory array *does* expose memory locations 0xFE00-0xFFFF,
/// however they are not accessible through normal Simulator operation 
/// (i.e., via [`Simulator::read_mem`]) and [`Simulator::write_mem`].
/// 
/// They can be read and edited via the typical Index traits.
/// If you wish to see the handling of memory-mapped IO, see the above
/// [`Simulator`] methods.
/// 
/// [`Simulator`]: super::Simulator
/// [`Simulator::read_mem`]: super::Simulator::read_mem
/// [`Simulator::write_mem`]: super::Simulator::write_mem
/// [`Simulator::default_mem_ctx`]: super::Simulator::default_mem_ctx
#[derive(Debug)]
pub struct MemArray(Box<[Word; 1 << 16]>);
impl MemArray {
    /// Creates a new memory with a provided word creation strategy.
    pub fn new(filler: &mut impl WordFiller) -> Self {
        Self(filler.generate_boxed_array())
    }

    /// Copies an object file block into this memory.
    pub(super) fn copy_obj_block(&mut self, mut start: u16, data: &[Option<u16>]) {
        let mem = &mut self.0;

        // chunk_by was added in Rust 1.77
        struct ChunkBy<'s, T, F>(&'s [T], F);
        impl<'s, T, F: FnMut(&T, &T) -> bool> Iterator for ChunkBy<'s, T, F> {
            type Item = &'s [T];
        
            fn next(&mut self) -> Option<Self::Item> {
                let (first, rest) = self.0.split_first()?;

                // find the first element that doesn't match pred (+1 for the first el that was removed)
                let pos = match rest.iter().position(|n| !(self.1)(first, n)) {
                    Some(i) => i + 1,
                    None => self.0.len(),
                };

                let (chunk, rest) = self.0.split_at(pos);

                self.0 = rest;
                Some(chunk)
            }
        }

        // separate data into chunks of initialized/uninitialized
        for chunk in ChunkBy(data, |a: &Option<_>, b: &Option<_>| a.is_some() == b.is_some()) {
            let end = start.wrapping_add(chunk.len() as u16);

            let si = usize::from(start);
            let ei = usize::from(end);
            let block_is_contiguous = start <= end;

            if chunk[0].is_some() { // if chunk is init, copy the data over
                let ch: Vec<_> = chunk.iter()
                    .map(|&opt| opt.unwrap())
                    .map(Word::new_init)
                    .collect();

                if block_is_contiguous {
                    mem[si..ei].copy_from_slice(&ch);
                } else {
                    let (left, right) = ch.split_at(start.wrapping_neg() as usize);
                    mem[si..].copy_from_slice(left);
                    mem[..ei].copy_from_slice(right)
                }
            } else { // if chunk is uninit, clear the initialization state
                if block_is_contiguous {
                    for word in &mut mem[si..ei] {
                        word.clear_init();
                    }
                } else {
                    for word in &mut mem[si..] {
                        word.clear_init();
                    }
                    for word in &mut mem[..ei] {
                        word.clear_init();
                    }
                }
            }

            start = end;
        }
    }

    pub(super) fn as_slice_mut(&mut self) -> &mut [Word] {
        &mut *self.0
    }
}
impl std::ops::Index<u16> for MemArray {
    type Output = Word;

    fn index(&self, index: u16) -> &Self::Output {
        &self.0[index as usize]
    }
}
impl std::ops::IndexMut<u16> for MemArray {
    fn index_mut(&mut self, index: u16) -> &mut Self::Output {
        &mut self.0[index as usize]
    }
}

/// The register file. 
/// 
/// This struct can be indexed with a [`Reg`].
/// 
/// # Example
/// 
/// ```
/// use lc3_ensemble::sim::mem::RegFile;
/// use lc3_ensemble::ast::Reg::R0;
/// 
/// let mut reg = RegFile::new(&mut ()); // never should have to initialize a reg file
/// reg[R0].set(11);
/// assert_eq!(reg[R0].get(), 11);
/// ```
#[derive(Debug, Clone)]
pub struct RegFile([Word; 8]);
impl RegFile {
    /// Creates a register file with uninitialized data.
    pub fn new(filler: &mut impl WordFiller) -> Self {
        Self(filler.generate_array())
    }
}
impl std::ops::Index<Reg> for RegFile {
    type Output = Word;

    fn index(&self, index: Reg) -> &Self::Output {
        &self.0[usize::from(index)]
    }
}
impl std::ops::IndexMut<Reg> for RegFile {
    fn index_mut(&mut self, index: Reg) -> &mut Self::Output {
        &mut self.0[usize::from(index)]
    }
}