opendeviationbar-core 13.66.2

Core open deviation bar construction algorithm with temporal integrity guarantees
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
//! Real market data loading for integration testing
//!
//! ## Service Level Objectives (SLOs)
//!
//! ### Availability SLO: 100% file accessibility
//! - Propagate filesystem errors (no fallbacks, no defaults)
//! - Fail fast on missing files with clear error messages
//! - Validation: File existence check before opening
//!
//! ### Correctness SLO: 100% data integrity
//! - Parse all records without data loss or silent failures
//! - Strict schema validation (reject malformed records)
//! - No default values on parse errors (fail-fast)
//! - Validation: Record count = file line count - 1 (header)
//!
//! ### Observability SLO: 100% error traceability
//! - All errors include file path and line number context
//! - Use thiserror for structured error messages
//! - No silent failures or swallowed errors
//!
//! ### Maintainability SLO: Off-the-shelf components only
//! - Use csv crate (de facto Rust standard for CSV parsing)
//! - Use serde for deserialization (no custom parsing)
//! - Zero custom string manipulation or parsing logic
//!
//! ## Data Source
//!
//! - BTCUSDT: `test_data/BTCUSDT/BTCUSDT_aggTrades_20250901.csv` (5,000 trades)
//! - ETHUSDT: `test_data/ETHUSDT/ETHUSDT_aggTrades_20250901.csv` (10,000 trades)
//!
//! ## CSV Format (Binance aggTrades)
//!
//! ```csv
//! a,p,q,f,l,T,m
//! 1,50014.00859087,0.12019569,1,1,1756710002083,False
//! ```
//!
//! Columns:
//! - a: Aggregate trade ID
//! - p: Price (decimal string)
//! - q: Quantity (decimal string)
//! - f: First trade ID
//! - l: Last trade ID
//! - T: Timestamp (milliseconds, integer)
//! - m: Is buyer maker ("True"/"False" string)

use crate::FixedPoint;
use crate::trade::Tick;
use std::path::Path;
use thiserror::Error;

/// Test data loader errors
#[derive(Debug, Error)]
pub enum LoaderError {
    /// File I/O error (propagate without modification)
    #[error("File I/O error for {path}: {source}")]
    Io {
        path: String,
        #[source]
        source: std::io::Error,
    },

    /// CSV parsing error (include line context)
    #[error("CSV parse error at line {line} in {path}: {source}")]
    CsvParse {
        path: String,
        line: usize,
        #[source]
        source: csv::Error,
    },

    /// Fixed-point conversion error
    #[error("Fixed-point conversion error at line {line} in {path}: {source}")]
    FixedPoint {
        path: String,
        line: usize,
        #[source]
        source: crate::fixed_point::FixedPointError,
    },

    /// Record count validation error
    #[error("Record count mismatch in {path}: expected {expected}, got {actual}")]
    CountMismatch {
        path: String,
        expected: usize,
        actual: usize,
    },
}

/// Binance aggTrades CSV record (Spot/Futures format)
///
/// Matches CSV columns: a,p,q,f,l,T,m
#[derive(Debug, serde::Deserialize)]
struct TickRecord {
    /// Aggregate trade ID
    a: i64,
    /// Price (decimal string)
    p: String,
    /// Quantity (decimal string)
    q: String,
    /// First trade ID
    f: i64,
    /// Last trade ID
    l: i64,
    /// Timestamp (milliseconds)
    #[serde(rename = "T")]
    timestamp_ms: i64,
    /// Is buyer maker ("True"/"False" string)
    m: String,
}

impl TickRecord {
    /// Convert CSV record to Tick
    ///
    /// SLO: Fail-fast on parse errors (no defaults, no fallbacks)
    fn into_tick(self) -> Result<Tick, crate::fixed_point::FixedPointError> {
        Ok(Tick {
            ref_id: self.a,
            price: FixedPoint::from_str(&self.p)?,
            volume: FixedPoint::from_str(&self.q)?,
            first_sub_id: self.f,
            last_sub_id: self.l,
            timestamp: self.timestamp_ms,
            is_buyer_maker: self.m == "True",
            is_best_match: None, // Spot data has no best_match field
            best_bid: None,
            best_ask: None,
        })
    }
}

/// Load BTCUSDT test data (5,000 trades from 2025-09-01)
///
/// SLO: Fail-fast on any error, 100% data integrity
///
/// Path resolution: Searches workspace root via CARGO_MANIFEST_DIR
pub fn load_btcusdt_test_data() -> Result<Vec<Tick>, LoaderError> {
    let path = workspace_test_data_path("BTCUSDT/BTCUSDT_aggTrades_20250901.csv");
    load_test_data(path, 5000)
}

/// Load ETHUSDT test data (10,000 trades from 2025-09-01)
///
/// SLO: Fail-fast on any error, 100% data integrity
///
/// Path resolution: Searches workspace root via CARGO_MANIFEST_DIR
pub fn load_ethusdt_test_data() -> Result<Vec<Tick>, LoaderError> {
    let path = workspace_test_data_path("ETHUSDT/ETHUSDT_aggTrades_20250901.csv");
    load_test_data(path, 10000)
}

/// Resolve test_data path from workspace root
///
/// Strategy: Navigate from CARGO_MANIFEST_DIR to workspace root
/// CARGO_MANIFEST_DIR = /path/to/opendeviationbar/crates/opendeviationbar-core
/// Workspace root = ../../ (2 levels up)
fn workspace_test_data_path(relative_path: &str) -> std::path::PathBuf {
    let manifest_dir = env!("CARGO_MANIFEST_DIR");
    let workspace_root = std::path::Path::new(manifest_dir)
        .parent() // crates/
        .unwrap()
        .parent() // workspace root
        .unwrap();

    workspace_root.join("test_data").join(relative_path)
}

/// Resolve tests/fixtures path from workspace root
/// Issue #96: Support real market data fixtures for statistical test accuracy
fn workspace_fixtures_path(relative_path: &str) -> std::path::PathBuf {
    let manifest_dir = env!("CARGO_MANIFEST_DIR");
    let workspace_root = std::path::Path::new(manifest_dir)
        .parent() // crates/
        .unwrap()
        .parent() // workspace root
        .unwrap();

    workspace_root
        .join("tests")
        .join("fixtures")
        .join(relative_path)
}

/// Binance aggTrades CSV record for headerless fixtures (8 columns)
///
/// Issue #96: Real 10K fixture has no header row + includes is_best_match column
/// Columns: agg_trade_id, price, quantity, first_trade_id, last_trade_id, timestamp, is_buyer_maker, is_best_match
#[derive(Debug, serde::Deserialize)]
struct TickRecordHeaderless {
    ref_id: i64,
    price: String,
    quantity: String,
    first_sub_id: i64,
    last_sub_id: i64,
    timestamp: i64,
    is_buyer_maker: String,
    is_best_match: String,
}

impl TickRecordHeaderless {
    fn into_tick(self) -> Result<Tick, crate::fixed_point::FixedPointError> {
        Ok(Tick {
            ref_id: self.ref_id,
            price: FixedPoint::from_str(&self.price)?,
            volume: FixedPoint::from_str(&self.quantity)?,
            first_sub_id: self.first_sub_id,
            last_sub_id: self.last_sub_id,
            timestamp: self.timestamp,
            is_buyer_maker: self.is_buyer_maker == "True",
            is_best_match: Some(self.is_best_match == "True"),
            best_bid: None,
            best_ask: None,
        })
    }
}

/// Load real BTCUSDT 10K trade fixture (10,001 trades from 2024-01-01)
///
/// Issue #96: Provides real market data with natural statistical properties
/// (timestamp clustering, volume skew, directional flow, aggregation density)
/// for tests that require non-degenerate microstructure.
///
/// Source: tests/fixtures/BTCUSDT-aggTrades-sample-10k.csv (headerless, 8 columns)
pub fn load_real_btcusdt_10k() -> Result<Vec<Tick>, LoaderError> {
    let path = workspace_fixtures_path("BTCUSDT-aggTrades-sample-10k.csv");
    load_headerless_data(path, 10001)
}

/// Generic headerless CSV loader with record count validation
///
/// Issue #96: For fixture files without header rows (positional column mapping)
fn load_headerless_data<P: AsRef<Path>>(
    path: P,
    expected_count: usize,
) -> Result<Vec<Tick>, LoaderError> {
    let path_str = path.as_ref().to_string_lossy().to_string();

    let file = std::fs::File::open(&path).map_err(|e| LoaderError::Io {
        path: path_str.clone(),
        source: e,
    })?;

    let csv_buffer_size = (expected_count * 100).max(64 * 1024).min(2 * 1024 * 1024);

    let mut reader = csv::ReaderBuilder::new()
        .has_headers(false)
        .buffer_capacity(csv_buffer_size)
        .from_reader(file);

    let mut trades = Vec::with_capacity(expected_count);
    let mut line = 1; // No header, data starts at line 1

    for result in reader.deserialize() {
        let record: TickRecordHeaderless = result.map_err(|e| LoaderError::CsvParse {
            path: path_str.clone(),
            line,
            source: e,
        })?;

        let trade = record
            .into_tick()
            .map_err(|e| LoaderError::FixedPoint {
                path: path_str.clone(),
                line,
                source: e,
            })?;

        trades.push(trade);
        line += 1;
    }

    let actual_count = trades.len();
    if actual_count != expected_count {
        return Err(LoaderError::CountMismatch {
            path: path_str,
            expected: expected_count,
            actual: actual_count,
        });
    }

    Ok(trades)
}

/// Generic CSV loader with record count validation
///
/// SLO Guarantees:
/// - Availability: Propagates I/O errors without fallbacks
/// - Correctness: Validates record count matches expected_count
/// - Observability: All errors include file path and line number
/// - Maintainability: Uses csv crate (no custom parsing)
fn load_test_data<P: AsRef<Path>>(
    path: P,
    expected_count: usize,
) -> Result<Vec<Tick>, LoaderError> {
    let path_str = path.as_ref().to_string_lossy().to_string();

    // SLO: Availability - Propagate I/O errors with context
    let file = std::fs::File::open(&path).map_err(|e| LoaderError::Io {
        path: path_str.clone(),
        source: e,
    })?;

    // Issue #96 Task #74: Pre-size CSV reader buffer based on record count (1-3% speedup)
    // Typical Tick record is ~50-80 bytes, so buffer = expected_count * 100 bytes
    // Minimum 64KB (default), maximum 2MB
    let csv_buffer_size = (expected_count * 100).max(64 * 1024).min(2 * 1024 * 1024);

    // SLO: Maintainability - Use csv crate (off-the-shelf)
    let mut reader = csv::ReaderBuilder::new()
        .has_headers(true)
        .buffer_capacity(csv_buffer_size)
        .from_reader(file);

    let mut trades = Vec::with_capacity(expected_count);
    let mut line = 2; // Line 1 is header, data starts at line 2

    // SLO: Correctness - Strict parsing, no silent failures
    for result in reader.deserialize() {
        let record: TickRecord = result.map_err(|e| LoaderError::CsvParse {
            path: path_str.clone(),
            line,
            source: e,
        })?;

        let trade = record
            .into_tick()
            .map_err(|e| LoaderError::FixedPoint {
                path: path_str.clone(),
                line,
                source: e,
            })?;

        trades.push(trade);
        line += 1;
    }

    // SLO: Correctness - Validate record count (detect truncation/corruption)
    let actual_count = trades.len();
    if actual_count != expected_count {
        return Err(LoaderError::CountMismatch {
            path: path_str,
            expected: expected_count,
            actual: actual_count,
        });
    }

    Ok(trades)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_load_btcusdt_data() {
        let trades = load_btcusdt_test_data().expect("Failed to load BTCUSDT test data");

        // SLO: Correctness - Validate record count
        assert_eq!(
            trades.len(),
            5000,
            "BTCUSDT should have exactly 5000 trades"
        );

        // SLO: Correctness - Validate first trade data integrity
        let first = &trades[0];
        assert_eq!(first.ref_id, 1);
        assert_eq!(first.price.to_string(), "50014.00859087");
        assert_eq!(first.volume.to_string(), "0.12019569");
        assert_eq!(first.first_sub_id, 1);
        assert_eq!(first.last_sub_id, 1);
        assert_eq!(first.timestamp, 1756710002083);
        assert!(!first.is_buyer_maker);
    }

    #[test]
    fn test_load_ethusdt_data() {
        let trades = load_ethusdt_test_data().expect("Failed to load ETHUSDT test data");

        // SLO: Correctness - Validate record count
        assert_eq!(
            trades.len(),
            10000,
            "ETHUSDT should have exactly 10000 trades"
        );

        // SLO: Correctness - All trades should have valid data
        for trade in &trades {
            assert!(trade.price.0 > 0, "Price must be positive");
            assert!(trade.volume.0 > 0, "Volume must be positive");
            assert!(trade.timestamp > 0, "Timestamp must be positive");
        }
    }

    #[test]
    fn test_temporal_integrity() {
        let trades = load_btcusdt_test_data().unwrap();

        // SLO: Correctness - Validate monotonic timestamps
        for i in 1..trades.len() {
            assert!(
                trades[i].timestamp >= trades[i - 1].timestamp,
                "Temporal integrity violation at trade {}: {} < {}",
                i,
                trades[i].timestamp,
                trades[i - 1].timestamp
            );
        }
    }
}