import json
import csv
import random
import argparse
from datetime import datetime, timedelta
from typing import List, Dict, Tuple
from enum import Enum
class Urgency(Enum):
PASSIVE = "PASSIVE"
NORMAL = "NORMAL"
URGENT = "URGENT"
CRITICAL = "CRITICAL"
class VenueResult(Enum):
FILLED = "FILLED"
PARTIAL = "PARTIAL"
FADE = "FADE"
REJECT = "REJECT"
NO_CONN = "NO_CONNECTION"
def generate_final_vwap(
order_size: int = 1000000,
avg_slice_size: int = 500,
detail_level: str = "summary", fade_rate: float = 0.05,
partial_rate: float = 0.10
) -> List[Dict]:
snapshots = []
record_id = 0
num_slices = order_size // avg_slice_size
print(f"Generating VWAP for {order_size:,} shares")
print(f"Expected slices: ~{num_slices:,}")
print(f"Detail level: {detail_level}")
base_time = datetime.now().replace(hour=9, minute=0, second=0, microsecond=0)
current_time = base_time
market_price = 650.00
client_order = {
'order_id': 'CLIENT_001',
'parent_order_id': None,
'client_order_id': 'C20241216_MEGA',
'order_level': 0,
'order_type': 'CLIENT',
'ticker': 'ASML.AS',
'side': 'Buy',
'quantity': order_size,
'filled_quantity': 0,
'remaining_quantity': order_size,
'average_price': 0.0,
'state': 'PENDING',
'snapshot_time': current_time.isoformat(),
'event_type': 'NEW',
'record_id': f'REC_{record_id:08d}'
}
snapshots.append(client_order.copy())
record_id += 1
current_time += timedelta(seconds=1)
algo_parent = {
'order_id': 'ALGO_001',
'parent_order_id': 'CLIENT_001',
'client_order_id': 'C20241216_MEGA',
'order_level': 1,
'order_type': 'ALGO_PARENT',
'ticker': 'ASML.AS',
'side': 'Buy',
'quantity': order_size,
'filled_quantity': 0,
'remaining_quantity': order_size,
'average_price': 0.0,
'state': 'WORKING',
'algo_strategy': 'VWAP',
'snapshot_time': current_time.isoformat(),
'event_type': 'NEW',
'record_id': f'REC_{record_id:08d}'
}
if detail_level != "client_only":
snapshots.append(algo_parent.copy())
record_id += 1
total_filled = 0
total_value = 0.0
slice_counter = 1
sor_counter = 1
stats = {
'total_slices': 0,
'total_routes': 0,
'fade_count': 0,
'partial_count': 0,
'reject_count': 0
}
hours = 7 slices_per_hour = num_slices // hours
for hour in range(hours):
if total_filled >= order_size:
break
expected = (order_size // hours) * (hour + 1)
if total_filled < expected * 0.7:
urgency = Urgency.CRITICAL
elif total_filled < expected * 0.9:
urgency = Urgency.URGENT
else:
urgency = Urgency.NORMAL
hour_slices = slices_per_hour if hour < hours - 1 else (num_slices - slice_counter + 1)
for _ in range(min(hour_slices, 50)): if total_filled >= order_size:
break
current_time += timedelta(seconds=random.randint(30, 120))
if urgency == Urgency.CRITICAL:
slice_size = random.randint(avg_slice_size * 2, avg_slice_size * 4)
elif urgency == Urgency.URGENT:
slice_size = random.randint(avg_slice_size, avg_slice_size * 2)
else:
slice_size = random.randint(avg_slice_size // 2, avg_slice_size)
slice_size = min(slice_size, order_size - total_filled)
slice_order = {
'order_id': f'SLICE_{slice_counter:05d}',
'parent_order_id': 'ALGO_001',
'client_order_id': 'C20241216_MEGA',
'order_level': 2,
'order_type': 'ALGO_SLICE',
'ticker': 'ASML.AS',
'side': 'Buy',
'quantity': slice_size,
'filled_quantity': 0,
'remaining_quantity': slice_size,
'average_price': 0.0,
'state': 'PENDING',
'urgency': urgency.value,
'snapshot_time': current_time.isoformat(),
'event_type': 'NEW',
'record_id': f'REC_{record_id:08d}'
}
if detail_level == "full":
snapshots.append(slice_order.copy())
record_id += 1
stats['total_slices'] += 1
slice_counter += 1
slice_filled = 0
slice_value = 0.0
num_routes = 3 if urgency in [Urgency.CRITICAL, Urgency.URGENT] else 2
for route_num in range(num_routes):
if slice_filled >= slice_size:
break
venue = ['NYSE', 'NASDAQ', 'ARCA', 'BATS', 'DARK'][route_num % 5]
route_size = slice_size // num_routes
outcome_rand = random.random()
if outcome_rand < fade_rate:
result = VenueResult.FADE
filled_qty = 0
stats['fade_count'] += 1
elif outcome_rand < fade_rate + partial_rate:
result = VenueResult.PARTIAL
filled_qty = route_size // 2
stats['partial_count'] += 1
elif outcome_rand < fade_rate + partial_rate + 0.02:
result = VenueResult.NO_CONN if random.random() < 0.5 else VenueResult.REJECT
filled_qty = 0
stats['reject_count'] += 1
else:
result = VenueResult.FILLED
filled_qty = route_size
if filled_qty > 0:
if urgency == Urgency.CRITICAL:
fill_price = market_price + random.uniform(0.02, 0.05)
elif urgency == Urgency.URGENT:
fill_price = market_price + random.uniform(0.01, 0.03)
else:
fill_price = market_price + random.uniform(-0.01, 0.01)
else:
fill_price = 0
if detail_level == "full":
sor_order = {
'order_id': f'SOR_{sor_counter:06d}',
'parent_order_id': slice_order['order_id'],
'client_order_id': 'C20241216_MEGA',
'order_level': 3,
'order_type': 'ROUTE',
'ticker': 'ASML.AS',
'side': 'Buy',
'quantity': route_size,
'filled_quantity': filled_qty,
'remaining_quantity': route_size - filled_qty,
'average_price': fill_price,
'state': result.value,
'venue': venue,
'snapshot_time': (current_time + timedelta(milliseconds=route_num * 50)).isoformat(),
'event_type': f'VENUE_{result.value}',
'record_id': f'REC_{record_id:08d}'
}
if result == VenueResult.NO_CONN:
sor_order['reject_reason'] = f'No connection to {venue}-FIX-01'
elif result == VenueResult.FADE:
sor_order['fade_reason'] = 'Liquidity exhausted'
snapshots.append(sor_order)
record_id += 1
stats['total_routes'] += 1
sor_counter += 1
slice_filled += filled_qty
slice_value += filled_qty * fill_price
total_filled += slice_filled
total_value += slice_value
if detail_level == "summary" and slice_filled > 0:
slice_order['filled_quantity'] = slice_filled
slice_order['remaining_quantity'] = slice_size - slice_filled
slice_order['average_price'] = slice_value / slice_filled if slice_filled > 0 else 0
slice_order['state'] = 'FILLED' if slice_filled >= slice_size else 'PARTIAL'
slice_order['event_type'] = 'SLICE_SUMMARY'
snapshots.append(slice_order.copy())
record_id += 1
current_time += timedelta(seconds=1)
client_order['filled_quantity'] = total_filled
client_order['remaining_quantity'] = order_size - total_filled
client_order['average_price'] = total_value / total_filled if total_filled > 0 else 0
client_order['state'] = 'WORKING' if total_filled < order_size else 'FILLED'
client_order['snapshot_time'] = current_time.isoformat()
client_order['event_type'] = 'CLIENT_UPDATE'
client_order['record_id'] = f'REC_{record_id:08d}'
client_order['hour'] = hour + 1
client_order['urgency'] = urgency.value
snapshots.append(client_order.copy())
record_id += 1
print(f"\n{'='*60}")
print(f"EXECUTION COMPLETE:")
print(f" Filled: {total_filled:,}/{order_size:,} ({total_filled/order_size*100:.1f}%)")
print(f" VWAP: {total_value/total_filled if total_filled > 0 else 0:.2f}")
print(f" Total Slices: {stats['total_slices']:,}")
print(f" Total Routes: {stats['total_routes']:,}")
print(f" Fades: {stats['fade_count']}")
print(f" Partials: {stats['partial_count']}")
print(f" Rejects: {stats['reject_count']}")
print(f" Snapshots: {len(snapshots):,}")
return snapshots
def main():
parser = argparse.ArgumentParser(description='Generate production VWAP data')
parser.add_argument('--size', type=int, default=1000000, help='Order size')
parser.add_argument('--slice-size', type=int, default=500, help='Average slice size')
parser.add_argument('--detail', choices=['full', 'summary', 'client_only'],
default='summary', help='Level of detail')
parser.add_argument('--output', default='final_vwap', help='Output filename base')
args = parser.parse_args()
snapshots = generate_final_vwap(
order_size=args.size,
avg_slice_size=args.slice_size,
detail_level=args.detail
)
with open(f'{args.output}.json', 'w') as f:
json.dump(snapshots, f, indent=2, default=str)
if snapshots:
keys = set()
for s in snapshots:
keys.update(s.keys())
with open(f'{args.output}.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=sorted(keys))
writer.writeheader()
writer.writerows(snapshots)
print(f"\n✅ Saved to {args.output}.csv and {args.output}.json")
import os
csv_size = os.path.getsize(f'{args.output}.csv') / (1024 * 1024)
print(f"📁 CSV size: {csv_size:.2f} MB")
if len(snapshots) > 100000:
print("⚠️ Warning: Over 100k rows. Consider using --detail summary or --slice-size larger")
if __name__ == "__main__":
main()