o2_tools/market_data/market_data.rs
1use crate::{
2 market_data::order_book::{
3 Book,
4 BookConfig,
5 },
6 order_book::Side,
7};
8use anyhow::{
9 Result,
10 anyhow,
11};
12use fuels::types::{
13 AssetId,
14 Identity,
15};
16use std::{
17 fs::{
18 self,
19 File,
20 },
21 io::Read,
22 path::PathBuf,
23};
24
25#[derive(Debug, Clone)]
26pub struct OrderData {
27 pub price: u64,
28 pub quantity: u64,
29 pub side: Side,
30 pub trader_id: Identity,
31}
32
33pub struct MarketData {
34 cache_dir: PathBuf,
35}
36
37#[derive(Debug, Clone)]
38pub enum OrderBookAction {
39 CreateOrder(OrderData),
40 CancelOrder(u64),
41}
42
43pub mod order_book;
44
45impl MarketData {
46 pub fn new() -> Result<Self> {
47 let cache_dir = std::env::temp_dir().join("binance_market_data_cache");
48 fs::create_dir_all(&cache_dir)?;
49 Ok(Self { cache_dir })
50 }
51
52 pub async fn download_market_data(&self, pair: &str, date: &str) -> Result<PathBuf> {
53 let csv_path = self.cache_dir.join(format!("{pair}-trades-{date}.csv"));
54
55 if csv_path.exists() {
56 println!("Data already downloaded");
57 return Ok(csv_path);
58 }
59
60 let url = format!(
61 "https://data.binance.vision/data/spot/daily/trades/{pair}/{pair}-trades-{date}.zip"
62 );
63 let response = reqwest::get(&url).await?;
64 if !response.status().is_success() {
65 return Err(anyhow!("Failed to download: {}", response.status()));
66 }
67
68 let bytes = response.bytes().await?;
69 let reader = std::io::Cursor::new(bytes);
70 let mut archive = zip::ZipArchive::new(reader)?;
71
72 if archive.len() != 1 {
73 return Err(anyhow!("Expected exactly one file in the archive"));
74 }
75
76 let mut zip_file = archive.by_index(0)?;
77 let mut csv_data = Vec::new();
78 zip_file.read_to_end(&mut csv_data)?;
79
80 fs::write(&csv_path, csv_data)?;
81 Ok(csv_path)
82 }
83
84 pub fn get_order_data(
85 &self,
86 pair: &str,
87 date: &str,
88 limit: usize,
89 ) -> Result<Vec<OrderData>> {
90 let csv_path = self.cache_dir.join(format!("{pair}-trades-{date}.csv"));
91
92 if !csv_path.exists() {
93 return Err(anyhow!(
94 "Data not downloaded. Call download_market_data first."
95 ));
96 }
97
98 let file = File::open(csv_path)?;
99 let mut rdr = csv::Reader::from_reader(file);
100
101 let mut orders = Vec::new();
102 for (idx, result) in rdr.records().enumerate() {
103 if idx >= limit {
104 break;
105 }
106
107 let record = result?;
108
109 let price: f64 = record
110 .get(1)
111 .ok_or_else(|| anyhow!("Missing price field"))?
112 .parse()?;
113 let quantity: f64 = record
114 .get(2)
115 .ok_or_else(|| anyhow!("Missing quantity field"))?
116 .parse()?;
117 let is_buyer_maker_str = record
118 .get(5)
119 .ok_or_else(|| anyhow!("Missing is_buyer_maker field"))?;
120 let is_buyer_maker = is_buyer_maker_str.to_lowercase() == "true";
121 let price_u64 = (price * 1_000_000.0) as u64;
122 let quantity_u64 = (quantity * 1_000_000_000.0) as u64;
123
124 orders.push(OrderData {
125 trader_id: Identity::default(),
126 price: price_u64,
127 quantity: quantity_u64,
128 side: if is_buyer_maker {
129 Side::Sell
130 } else {
131 Side::Buy
132 },
133 });
134 }
135 Ok(orders)
136 }
137
138 pub async fn get_ethusdc_orders_data(
139 &self,
140 date: &str,
141 limit: usize,
142 ) -> Result<Vec<OrderData>> {
143 let pair = "ETHUSDC";
144 self.download_market_data(pair, date).await?;
145 self.get_order_data(pair, date, limit)
146 }
147
148 /// Downloads and retrieves summarized ETHUSDC order data by grouping trades.
149 ///
150 /// This method reduces the number of orders by grouping consecutive trades and calculating
151 /// statistical aggregates for each group. This is useful when you need fewer data points
152 /// while preserving the statistical characteristics of real market data.
153 ///
154 /// # How it works
155 ///
156 /// 1. Downloads all trade data for the specified date
157 /// 2. Calculates group size: `total_trades / limit` (rounded up)
158 /// 3. Groups consecutive trades into chunks of the calculated size
159 /// 4. For each group, creates a single summarized order with:
160 /// - **Price**: Average price of all trades in the group
161 /// - **Quantity**: Sum of all quantities in the group
162 /// - **Side**: Majority side (Buy/Sell) based on trade count
163 ///
164 /// # Parameters
165 ///
166 /// * `date` - Optional date string in "YYYY-MM-DD" format. Defaults to "2025-07-15"
167 /// * `limit` - Maximum number of summarized orders to return.
168 ///
169 /// # Returns
170 ///
171 /// Returns a `Result<Vec<OrderData>>` containing up to `limit` summarized orders.
172 /// Each order represents aggregated data from multiple real trades.
173 ///
174 /// # Errors
175 ///
176 /// Returns an error if:
177 /// - Network download fails
178 /// - File I/O operations fail
179 /// - CSV parsing fails
180 /// - Invalid date format
181 pub async fn get_ethusdc_orders_data_summrized(
182 &self,
183 date: &str,
184 limit: usize,
185 ) -> Result<Vec<OrderBookAction>> {
186 let pair = "ETHUSDC";
187 self.download_market_data(pair, date).await?;
188
189 // Get all order data first (without limit)
190 let all_data = self.get_order_data(pair, date, usize::MAX)?;
191
192 if all_data.is_empty() || limit == 0 {
193 return Ok(Vec::new());
194 }
195
196 let group_size = all_data.len() / limit;
197 let mut summarized_orders = Vec::new();
198
199 // Group trades and calculate averages
200 for chunk in all_data.chunks_exact(group_size) {
201 if chunk.is_empty() {
202 continue;
203 }
204
205 // Calculate average price
206 let total_price = chunk.iter().map(|order| order.price).sum::<u64>();
207 let total_quantity = chunk.iter().map(|order| order.quantity).sum::<u64>();
208 let avg_price = total_price / chunk.len() as u64;
209 let avg_quantity = total_quantity / chunk.len() as u64;
210
211 // Sum quantities
212 let buy_count = chunk
213 .iter()
214 .filter(|order| matches!(order.side, Side::Buy))
215 .count();
216 let sell_count = chunk.len() - buy_count;
217 let majority_side = if buy_count > sell_count {
218 Side::Buy
219 } else {
220 Side::Sell
221 };
222
223 let order = OrderBookAction::CreateOrder(OrderData {
224 trader_id: Identity::default(),
225 price: avg_price / 1_000_000,
226 quantity: avg_quantity / 1_000_000,
227 side: majority_side,
228 });
229 summarized_orders.push(order);
230 }
231
232 Ok(summarized_orders)
233 }
234
235 pub fn generate_orderbook_state(
236 base_asset: AssetId,
237 quote_asset: AssetId,
238 actions: &[OrderBookAction],
239 ) -> Book {
240 let mut order_book = Book::new(BookConfig {
241 base_asset,
242 base_decimals: 9,
243 quote_asset,
244 quote_decimals: 6,
245 taker_fee: 0,
246 maker_fee: 0,
247 });
248
249 for action in actions {
250 match action {
251 OrderBookAction::CreateOrder(order) => {
252 order_book.create_order(order.clone());
253 }
254 OrderBookAction::CancelOrder(order_id) => {
255 order_book.cancel(*order_id);
256 }
257 }
258 }
259
260 order_book
261 }
262}
263
264// #[cfg(test)]
265// mod tests {
266// use super::*;
267// use tokio;
268
269// #[tokio::test]
270// async fn test_download_and_cache() -> Result<()> {
271// let manager = MarketData::new()?;
272// let date = "2025-07-15";
273// let pair = "ETHUSDC";
274
275// let csv_path = manager.download_market_data(pair, date).await?;
276// assert!(csv_path.exists());
277
278// let file_metadata1 = fs::metadata(&csv_path)?;
279// let modified1 = file_metadata1.modified()?;
280
281// let csv_path2 = manager.download_market_data(pair, date).await?;
282// assert_eq!(csv_path, csv_path2);
283
284// let file_metadata2 = fs::metadata(&csv_path2)?;
285// let modified2 = file_metadata2.modified()?;
286// assert_eq!(modified1, modified2);
287
288// Ok(())
289// }
290
291// #[tokio::test]
292// async fn get_ethusdc_orders_data() -> Result<()> {
293// let manager = MarketData::new()?;
294// let date = "2025-07-15";
295// let orders = manager.get_ethusdc_orders_data(date, 10).await?;
296// assert_eq!(orders.len(), 10);
297
298// for order in &orders {
299// assert!(order.price > 0);
300// assert!(order.quantity > 0);
301// assert!(matches!(order.side, Side::Buy | Side::Sell));
302// }
303
304// Ok(())
305// }
306
307// #[tokio::test]
308// async fn test_get_ethusdc_orders_data_summrized() -> Result<()> {
309// let manager = MarketData::new()?;
310// let date = "2025-07-15";
311
312// // Test with a limit that should result in grouping
313// let summarized_orders =
314// manager.get_ethusdc_orders_data_summrized(date, 5).await?;
315// println!("summarized_orders: {:?}", summarized_orders.len());
316// assert!(summarized_orders.len() <= 5);
317
318// // Verify all orders have valid data
319// for order in &summarized_orders {
320// match order {
321// OrderBookAction::CreateOrder(order) => {
322// assert!(order.price > 0);
323// assert!(order.quantity > 0);
324// assert!(matches!(order.side, Side::Buy | Side::Sell));
325// }
326// OrderBookAction::CancelOrder(order_id) => {
327// assert!(*order_id > 0);
328// }
329// }
330// }
331// assert_eq!(summarized_orders.len(), 5);
332
333// Ok(())
334// }
335// }