aprender-core 0.31.2

Next-generation machine learning library in pure Rust
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
//! Hex dump and data flow visualization for tensor inspection (GH-122).
//!
// Allow format_push_string: cleaner code for string building without I/O concerns
#![allow(clippy::format_push_string)]
//!
//! Implements Toyota Way Principle 12 (Genchi Genbutsu): Go and see the actual
//! tensor values, not abstractions.
//!
//! # Features
//!
//! - Hex dump with ASCII sidebar
//! - Data flow visualization
//! - Model hierarchy tree view
//! - Tensor statistics summary
//!
//! # Example
//!
//! ```rust
//! use aprender::format::hexdump::{hex_dump, HexDumpConfig};
//!
//! let data = [0x41, 0x50, 0x52, 0x31]; // "APR1"
//! let dump = hex_dump(&data, &HexDumpConfig::default());
//! assert!(dump.contains("41 50 52 31"));
//! assert!(dump.contains("APR1"));
//! ```
//!
//! # PMAT Compliance
//!
//! - Zero `unwrap()` calls
//! - All string formatting is safe

// ============================================================================
// Configuration
// ============================================================================

/// Configuration for hex dump display
#[derive(Debug, Clone)]
pub struct HexDumpConfig {
    /// Bytes per line (default: 16)
    pub bytes_per_line: usize,
    /// Show ASCII sidebar (default: true)
    pub show_ascii: bool,
    /// Show offset column (default: true)
    pub show_offset: bool,
    /// Group bytes (e.g., 2 = pairs, 4 = quads)
    pub group_size: usize,
    /// Maximum bytes to dump (0 = unlimited)
    pub max_bytes: usize,
}

impl Default for HexDumpConfig {
    fn default() -> Self {
        Self {
            bytes_per_line: 16,
            show_ascii: true,
            show_offset: true,
            group_size: 1,
            max_bytes: 0,
        }
    }
}

impl HexDumpConfig {
    /// Compact hex dump (8 bytes per line, no offset)
    #[must_use]
    pub fn compact() -> Self {
        Self {
            bytes_per_line: 8,
            show_ascii: true,
            show_offset: false,
            group_size: 1,
            max_bytes: 256,
        }
    }

    /// Wide hex dump (32 bytes per line)
    #[must_use]
    pub fn wide() -> Self {
        Self {
            bytes_per_line: 32,
            show_ascii: true,
            show_offset: true,
            group_size: 4,
            max_bytes: 0,
        }
    }
}

// ============================================================================
// Hex Dump
// ============================================================================

/// Generate hex dump of byte slice.
///
/// Returns multi-line string with hex values and optional ASCII sidebar.
#[must_use]
pub fn hex_dump(data: &[u8], config: &HexDumpConfig) -> String {
    let mut result = String::new();
    let bytes_per_line = config.bytes_per_line.max(1);
    let max_bytes = if config.max_bytes > 0 {
        config.max_bytes.min(data.len())
    } else {
        data.len()
    };

    for (line_idx, chunk) in data[..max_bytes].chunks(bytes_per_line).enumerate() {
        if config.show_offset {
            result.push_str(&format!("{:08x}  ", line_idx * bytes_per_line));
        }
        format_hex_bytes(&mut result, chunk, bytes_per_line, config.group_size);
        format_ascii_sidebar(&mut result, chunk, bytes_per_line, config.show_ascii);
        result.push('\n');
    }

    if max_bytes < data.len() {
        result.push_str(&format!(
            "... ({} more bytes truncated)\n",
            data.len() - max_bytes
        ));
    }

    result
}

/// Format hex bytes with optional grouping and padding for incomplete lines.
fn format_hex_bytes(result: &mut String, chunk: &[u8], bytes_per_line: usize, group_size: usize) {
    for (i, byte) in chunk.iter().enumerate() {
        if group_size > 1 && i > 0 && i % group_size == 0 {
            result.push(' ');
        }
        result.push_str(&format!("{byte:02x} "));
    }
    for i in 0..(bytes_per_line - chunk.len()) {
        if group_size > 1 && (chunk.len() + i).is_multiple_of(group_size) {
            result.push(' ');
        }
        result.push_str("   ");
    }
}

/// Format ASCII sidebar with padding for incomplete lines.
fn format_ascii_sidebar(result: &mut String, chunk: &[u8], bytes_per_line: usize, show: bool) {
    if !show {
        return;
    }
    result.push_str(" |");
    for byte in chunk {
        if byte.is_ascii_graphic() || *byte == b' ' {
            result.push(*byte as char);
        } else {
            result.push('.');
        }
    }
    for _ in chunk.len()..bytes_per_line {
        result.push(' ');
    }
    result.push('|');
}

/// Generate hex dump of f32 tensor values.
///
/// Shows both raw bytes and float interpretation.
#[must_use]
pub fn tensor_hex_dump(tensor: &[f32], config: &HexDumpConfig) -> String {
    let mut result = String::new();
    let values_per_line = (config.bytes_per_line / 4).max(1);
    let max_values = if config.max_bytes > 0 {
        (config.max_bytes / 4).min(tensor.len())
    } else {
        tensor.len()
    };

    let truncated = max_values < tensor.len();

    for (line_idx, chunk) in tensor[..max_values].chunks(values_per_line).enumerate() {
        // Offset column (in float indices)
        if config.show_offset {
            result.push_str(&format!("[{:06}]  ", line_idx * values_per_line));
        }

        // Float values
        for val in chunk {
            result.push_str(&format!("{val:>12.6e} "));
        }

        // Hex representation
        result.push_str("  |");
        for val in chunk {
            let bytes = val.to_le_bytes();
            result.push_str(&format!(
                " {:02x}{:02x}{:02x}{:02x}",
                bytes[3], bytes[2], bytes[1], bytes[0]
            ));
        }
        result.push_str(" |");

        result.push('\n');
    }

    if truncated {
        result.push_str(&format!(
            "... ({} more values truncated)\n",
            tensor.len() - max_values
        ));
    }

    result
}

// ============================================================================
// Data Flow Visualization
// ============================================================================

/// Layer information for data flow visualization
#[derive(Debug, Clone)]
pub struct LayerInfo {
    /// Layer name
    pub name: String,
    /// Layer type (e.g., "Conv2d", "Linear", "`LayerNorm`")
    pub layer_type: String,
    /// Input shape
    pub input_shape: Vec<usize>,
    /// Output shape
    pub output_shape: Vec<usize>,
    /// Parameter count
    pub params: usize,
}

impl LayerInfo {
    /// Create new layer info
    #[must_use]
    pub fn new(
        name: impl Into<String>,
        layer_type: impl Into<String>,
        input_shape: Vec<usize>,
        output_shape: Vec<usize>,
        params: usize,
    ) -> Self {
        Self {
            name: name.into(),
            layer_type: layer_type.into(),
            input_shape,
            output_shape,
            params,
        }
    }

    /// Format shape as string
    fn format_shape(shape: &[usize]) -> String {
        if shape.is_empty() {
            "()".to_string()
        } else {
            format!(
                "({})",
                shape
                    .iter()
                    .map(usize::to_string)
                    .collect::<Vec<_>>()
                    .join(", ")
            )
        }
    }
}

/// Generate data flow visualization for model layers.
///
/// Returns ASCII art showing layer sequence with shapes.
#[must_use]
pub fn data_flow_diagram(layers: &[LayerInfo]) -> String {
    let mut result = String::new();

    result.push_str("Data Flow Diagram\n");
    result.push_str("=================\n\n");

    if layers.is_empty() {
        result.push_str("(no layers)\n");
        return result;
    }

    // Find max widths for alignment
    let max_name_len = layers.iter().map(|l| l.name.len()).max().unwrap_or(10);
    let max_type_len = layers
        .iter()
        .map(|l| l.layer_type.len())
        .max()
        .unwrap_or(10);

    for (i, layer) in layers.iter().enumerate() {
        let input_str = LayerInfo::format_shape(&layer.input_shape);
        let output_str = LayerInfo::format_shape(&layer.output_shape);

        // Input arrow (except first layer)
        if i == 0 {
            result.push_str(&format!("    Input: {input_str}\n"));
            result.push_str("       |\n");
            result.push_str("       v\n");
        }

        // Layer box
        result.push_str(&format!(
            "  +{:-<width$}+\n",
            "",
            width = max_name_len + max_type_len + 10
        ));
        result.push_str(&format!(
            "  | {:name_width$} [{:type_width$}] {:>8} |\n",
            layer.name,
            layer.layer_type,
            format_params(layer.params),
            name_width = max_name_len,
            type_width = max_type_len
        ));
        result.push_str(&format!(
            "  +{:-<width$}+\n",
            "",
            width = max_name_len + max_type_len + 10
        ));

        // Output arrow
        result.push_str("       |\n");
        if i == layers.len() - 1 {
            result.push_str("       v\n");
            result.push_str(&format!("    Output: {output_str}\n"));
        } else {
            result.push_str(&format!("       | {output_str}\n"));
            result.push_str("       v\n");
        }
    }

    // Summary
    let total_params: usize = layers.iter().map(|l| l.params).sum();
    result.push_str(&format!(
        "\nTotal parameters: {}\n",
        format_params(total_params)
    ));

    result
}

/// Format parameter count with K/M/B suffixes
fn format_params(count: usize) -> String {
    if count >= 1_000_000_000 {
        format!("{:.1}B", count as f64 / 1_000_000_000.0)
    } else if count >= 1_000_000 {
        format!("{:.1}M", count as f64 / 1_000_000.0)
    } else if count >= 1_000 {
        format!("{:.1}K", count as f64 / 1_000.0)
    } else {
        count.to_string()
    }
}

// ============================================================================
// Model Tree View
// ============================================================================

/// Node in model hierarchy tree
#[derive(Debug, Clone)]
pub struct TreeNode {
    /// Node name
    pub name: String,
    /// Node type
    pub node_type: String,
    /// Child nodes
    pub children: Vec<TreeNode>,
    /// Tensor shape (if leaf node)
    pub shape: Option<Vec<usize>>,
    /// Data type (if leaf node)
    pub dtype: Option<String>,
}

impl TreeNode {
    /// Create new tree node
    #[must_use]
    pub fn new(name: impl Into<String>, node_type: impl Into<String>) -> Self {
        Self {
            name: name.into(),
            node_type: node_type.into(),
            children: Vec::new(),
            shape: None,
            dtype: None,
        }
    }

    /// Create leaf node (tensor)
    #[must_use]
    pub fn tensor(name: impl Into<String>, shape: Vec<usize>, dtype: impl Into<String>) -> Self {
        Self {
            name: name.into(),
            node_type: "Tensor".to_string(),
            children: Vec::new(),
            shape: Some(shape),
            dtype: Some(dtype.into()),
        }
    }

    /// Add child node
    pub fn add_child(&mut self, child: TreeNode) {
        self.children.push(child);
    }

    /// Count total nodes
    #[must_use]
    pub fn count_nodes(&self) -> usize {
        1 + self
            .children
            .iter()
            .map(TreeNode::count_nodes)
            .sum::<usize>()
    }
}

/// Generate tree view of model hierarchy.
///
/// Returns ASCII tree representation.
#[must_use]
pub fn tree_view(root: &TreeNode) -> String {
    let mut result = String::new();
    tree_view_recursive(root, "", true, &mut result);
    result
}

include!("tensor_statistics.rs");