chasm-cli 1.5.4

"""Deep comparison of working vs broken session JSONL structures.
Focuses on the first line (session header) and request/response structure.
"""
import sqlite3, json, os, glob, base64, hashlib

WS_BASE = r"C:\Users\adamm\AppData\Roaming\Code\User\workspaceStorage"
WORKING_HASH = "82cdabb21413f2ff42168423e82c8bdf"  # chasm
BROKEN_HASH = "5ec71800c69c79b96b06a37e38537907"  # Agentic

def read_db_key(db_path, key):
    try:
        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
        row = conn.execute("SELECT value FROM ItemTable WHERE key=?", (key,)).fetchone()
        conn.close()
        return json.loads(row[0]) if row else None
    except Exception as e:
        return f"ERROR: {e}"

def get_session_files(ws_hash):
    """Find session files - check both possible locations"""
    base = os.path.join(WS_BASE, ws_hash)
    results = {}
    
    # Check direct chatSessions/
    path1 = os.path.join(base, "chatSessions")
    if os.path.exists(path1):
        for f in glob.glob(os.path.join(path1, "*.jsonl")):
            sid = os.path.splitext(os.path.basename(f))[0]
            results[sid] = f
    
    # Check GitHub.copilot-chat/chatSessions/
    path2 = os.path.join(base, "GitHub.copilot-chat", "chatSessions")
    if os.path.exists(path2):
        for f in glob.glob(os.path.join(path2, "*.jsonl")):
            sid = os.path.splitext(os.path.basename(f))[0]
            results[sid] = f
    
    return results

def parse_jsonl(filepath):
    """Parse a JSONL file into list of JSON objects"""
    lines = []
    with open(filepath, 'r', encoding='utf-8') as f:
        for i, line in enumerate(f):
            stripped = line.strip()
            if not stripped:
                continue
            try:
                obj = json.loads(stripped)
                lines.append(obj)
            except json.JSONDecodeError as e:
                lines.append({"__PARSE_ERROR__": str(e), "__LINE__": i+1, "__RAW__": stripped[:200]})
    return lines

def analyze_session_structure(objs, label):
    """Analyze the key structure of a session's JSONL objects"""
    print(f"\n  === {label} ===")
    print(f"  Total JSONL objects: {len(objs)}")
    
    if not objs:
        print("  EMPTY SESSION")
        return
    
    # First object (session header)
    first = objs[0]
    print(f"\n  First object (header) keys: {sorted(first.keys())}")
    
    # Print detailed structure of first object
    for key in sorted(first.keys()):
        val = first[key]
        if isinstance(val, dict):
            print(f"    {key}: dict with keys {sorted(val.keys())}")
            for k2, v2 in sorted(val.items()):
                if isinstance(v2, dict):
                    print(f"      {k2}: dict with keys {sorted(v2.keys())}")
                elif isinstance(v2, list):
                    print(f"      {k2}: list[{len(v2)}]")
                elif isinstance(v2, str) and len(v2) > 100:
                    print(f"      {k2}: str({len(v2)} chars)")
                else:
                    print(f"      {k2}: {json.dumps(v2)}")
        elif isinstance(val, list):
            print(f"    {key}: list[{len(val)}]")
            if val and isinstance(val[0], dict):
                print(f"      [0] keys: {sorted(val[0].keys())}")
        elif isinstance(val, str) and len(val) > 100:
            print(f"    {key}: str({len(val)} chars)")
        else:
            print(f"    {key}: {json.dumps(val)}")
    
    # Categorize remaining objects by type
    req_count = 0
    resp_count = 0
    other_count = 0
    for obj in objs[1:]:
        keys = set(obj.keys())
        if "type" in obj:
            t = obj["type"]
            if t == "request":
                req_count += 1
            elif t == "response":
                resp_count += 1
            else:
                other_count += 1
        else:
            other_count += 1
    
    print(f"\n  Message counts: requests={req_count}, responses={resp_count}, other={other_count}")
    
    # Show structure of first request and first response
    for obj in objs[1:]:
        if obj.get("type") == "request":
            print(f"\n  First REQUEST keys: {sorted(obj.keys())}")
            for key in sorted(obj.keys()):
                val = obj[key]
                if isinstance(val, dict):
                    print(f"    {key}: dict keys={sorted(val.keys())}")
                elif isinstance(val, list):
                    print(f"    {key}: list[{len(val)}]")
                    if val and isinstance(val[0], dict):
                        print(f"      [0] keys: {sorted(val[0].keys())}")
                elif isinstance(val, str) and len(val) > 200:
                    print(f"    {key}: str({len(val)} chars)")
                else:
                    print(f"    {key}: {json.dumps(val)}")
            break
    
    for obj in objs[1:]:
        if obj.get("type") == "response":
            print(f"\n  First RESPONSE keys: {sorted(obj.keys())}")
            for key in sorted(obj.keys()):
                val = obj[key]
                if isinstance(val, dict):
                    print(f"    {key}: dict keys={sorted(val.keys())}")
                    for k2, v2 in sorted(val.items()):
                        if isinstance(v2, dict):
                            print(f"      {k2}: dict keys={sorted(v2.keys())}")
                        elif isinstance(v2, list):
                            print(f"      {k2}: list[{len(v2)}]")
                            if v2 and isinstance(v2[0], dict):
                                print(f"        [0] keys: {sorted(v2[0].keys())}")
                        else:
                            t = type(v2).__name__
                            if isinstance(v2, str) and len(v2) > 100:
                                print(f"      {k2}: str({len(v2)} chars)")
                            else:
                                print(f"      {k2}: {json.dumps(v2)}")
                elif isinstance(val, list):
                    print(f"    {key}: list[{len(val)}]")
                    if val and isinstance(val[0], dict):
                        print(f"      [0] keys: {sorted(val[0].keys())}")
                        # Show first response part structure
                        for k3, v3 in sorted(val[0].items()):
                            if isinstance(v3, dict):
                                print(f"        {k3}: dict keys={sorted(v3.keys())}")
                            elif isinstance(v3, list):
                                print(f"        {k3}: list[{len(v3)}]")
                            elif isinstance(v3, str) and len(v3) > 200:
                                print(f"        {k3}: str({len(v3)} chars)")
                            else:
                                print(f"        {k3}: {json.dumps(v3)}")
                elif isinstance(val, str) and len(val) > 200:
                    print(f"    {key}: str({len(val)} chars)")
                else:
                    print(f"    {key}: {json.dumps(val)}")
            break

def check_db_integrity(db_path, label):
    """Run SQLite integrity check on the DB"""
    print(f"\n{'='*60}")
    print(f"DB Integrity: {label}")
    print(f"{'='*60}")
    try:
        conn = sqlite3.connect(db_path)
        result = conn.execute("PRAGMA integrity_check").fetchone()
        print(f"  integrity_check: {result[0]}")
        
        # Check journal mode
        jmode = conn.execute("PRAGMA journal_mode").fetchone()
        print(f"  journal_mode: {jmode[0]}")
        
        # Check page count and size
        pc = conn.execute("PRAGMA page_count").fetchone()
        ps = conn.execute("PRAGMA page_size").fetchone()
        print(f"  page_count: {pc[0]}, page_size: {ps[0]}, total: {pc[0]*ps[0]} bytes")
        
        # Count total rows
        count = conn.execute("SELECT COUNT(*) FROM ItemTable").fetchone()
        print(f"  total rows: {count[0]}")
        
        # Check WAL file
        wal_path = db_path + "-wal"
        if os.path.exists(wal_path):
            wal_size = os.path.getsize(wal_path)
            print(f"  WAL file: {wal_size} bytes")
        else:
            print(f"  WAL file: does not exist")
        
        conn.close()
    except Exception as e:
        print(f"  ERROR: {e}")

def compare_model_cache_structure(working_cache, broken_cache):
    """Compare the structure of model.cache entries"""
    print(f"\n{'='*60}")
    print("agentSessions.model.cache structure comparison")
    print(f"{'='*60}")
    
    if not isinstance(working_cache, list) or not isinstance(broken_cache, list):
        print(f"  Working type: {type(working_cache)}, Broken type: {type(broken_cache)}")
        return
    
    print(f"  Working entries: {len(working_cache)}")
    print(f"  Broken entries: {len(broken_cache)}")
    
    if working_cache:
        w = working_cache[0]
        print(f"\n  Working [0] keys: {sorted(w.keys())}")
        for k, v in sorted(w.items()):
            if isinstance(v, dict):
                print(f"    {k}: {json.dumps(v)}")
            elif isinstance(v, str) and len(v) > 100:
                print(f"    {k}: str({len(v)} chars) = {v[:80]}...")
            else:
                print(f"    {k}: {json.dumps(v)}")
    
    if broken_cache:
        b = broken_cache[0]
        print(f"\n  Broken [0] keys: {sorted(b.keys())}")
        for k, v in sorted(b.items()):
            if isinstance(v, dict):
                print(f"    {k}: {json.dumps(v)}")
            elif isinstance(v, str) and len(v) > 100:
                print(f"    {k}: str({len(v)} chars) = {v[:80]}...")
            else:
                print(f"    {k}: {json.dumps(v)}")
    
    # Field-by-field comparison
    if working_cache and broken_cache:
        w_keys = set(working_cache[0].keys())
        b_keys = set(broken_cache[0].keys())
        print(f"\n  Keys only in working: {w_keys - b_keys}")
        print(f"  Keys only in broken: {b_keys - w_keys}")
        print(f"  Common keys: {w_keys & b_keys}")
        
        # Check types match
        for k in w_keys & b_keys:
            wt = type(working_cache[0][k]).__name__
            bt = type(broken_cache[0][k]).__name__
            if wt != bt:
                print(f"  TYPE MISMATCH for {k}: working={wt} broken={bt}")

def main():
    working_db = os.path.join(WS_BASE, WORKING_HASH, "state.vscdb")
    broken_db = os.path.join(WS_BASE, BROKEN_HASH, "state.vscdb")
    
    # 1. DB integrity check
    check_db_integrity(working_db, "WORKING (chasm)")
    check_db_integrity(broken_db, "BROKEN (Agentic)")
    
    # 2. Compare model.cache structure
    w_cache = read_db_key(working_db, "agentSessions.model.cache")
    b_cache = read_db_key(broken_db, "agentSessions.model.cache")
    compare_model_cache_structure(w_cache, b_cache)
    
    # 3. Get session files
    print(f"\n{'='*60}")
    print("Session file locations")
    print(f"{'='*60}")
    
    w_sessions = get_session_files(WORKING_HASH)
    b_sessions = get_session_files(BROKEN_HASH)
    
    print(f"\n  WORKING session files ({len(w_sessions)}):")
    for sid, path in sorted(w_sessions.items()):
        size = os.path.getsize(path)
        # Check which directory they're in
        rel = os.path.relpath(path, os.path.join(WS_BASE, WORKING_HASH))
        print(f"    {sid}: {size:,} bytes at {rel}")
    
    print(f"\n  BROKEN session files ({len(b_sessions)}):")
    for sid, path in sorted(b_sessions.items()):
        size = os.path.getsize(path)
        rel = os.path.relpath(path, os.path.join(WS_BASE, BROKEN_HASH))
        print(f"    {sid}: {size:,} bytes at {rel}")
    
    # 4. Deep structure comparison
    print(f"\n{'='*60}")
    print("Session JSONL structure deep comparison")
    print(f"{'='*60}")
    
    # Pick the best working session (non-empty, decent size)
    working_pick = None
    for sid, path in w_sessions.items():
        if os.path.getsize(path) > 5000:
            working_pick = (sid, path)
            break
    
    # Pick the best broken session (non-empty)
    broken_pick = None
    for sid, path in b_sessions.items():
        if os.path.getsize(path) > 5000:
            broken_pick = (sid, path)
            break
    
    if working_pick:
        objs = parse_jsonl(working_pick[1])
        analyze_session_structure(objs, f"WORKING session {working_pick[0]}")
    
    if broken_pick:
        objs = parse_jsonl(broken_pick[1])
        analyze_session_structure(objs, f"BROKEN session {broken_pick[0]}")
    
    # 5. Compare first JSONL lines byte-for-byte structure
    print(f"\n{'='*60}")
    print("First JSONL line (header) key comparison")
    print(f"{'='*60}")
    
    if working_pick and broken_pick:
        with open(working_pick[1], 'r', encoding='utf-8') as f:
            w_first = json.loads(f.readline().strip())
        with open(broken_pick[1], 'r', encoding='utf-8') as f:
            b_first = json.loads(f.readline().strip())
        
        def deep_key_diff(obj1, obj2, path=""):
            if isinstance(obj1, dict) and isinstance(obj2, dict):
                k1 = set(obj1.keys())
                k2 = set(obj2.keys())
                only1 = k1 - k2
                only2 = k2 - k1
                if only1:
                    print(f"  WORKING ONLY at {path}: {only1}")
                if only2:
                    print(f"  BROKEN ONLY at {path}: {only2}")
                for k in sorted(k1 & k2):
                    deep_key_diff(obj1[k], obj2[k], f"{path}.{k}")
            elif type(obj1) != type(obj2):
                print(f"  TYPE DIFF at {path}: working={type(obj1).__name__} broken={type(obj2).__name__}")
        
        deep_key_diff(w_first, b_first, "root")
    
    # 6. Check the index closely - re-read current state
    print(f"\n{'='*60}")
    print("Current index state (re-read)")
    print(f"{'='*60}")
    
    w_index = read_db_key(working_db, "chat.ChatSessionStore.index")
    b_index = read_db_key(broken_db, "chat.ChatSessionStore.index")
    
    print(f"\n  WORKING index version: {w_index.get('version') if isinstance(w_index, dict) else 'N/A'}")
    if isinstance(w_index, dict) and 'entries' in w_index:
        entries = w_index['entries']
        if isinstance(entries, dict):
            print(f"  WORKING entries: {len(entries)} (dict format)")
            for sid, meta in list(entries.items())[:2]:
                print(f"    {sid}: keys={sorted(meta.keys())}")
        elif isinstance(entries, list):
            print(f"  WORKING entries: {len(entries)} (LIST format!)")
    
    print(f"\n  BROKEN index version: {b_index.get('version') if isinstance(b_index, dict) else 'N/A'}")
    if isinstance(b_index, dict) and 'entries' in b_index:
        entries = b_index['entries']
        if isinstance(entries, dict):
            print(f"  BROKEN entries: {len(entries)} (dict format)")
            for sid, meta in entries.items():
                print(f"    {sid}: keys={sorted(meta.keys())}")
                # Print full metadata
                for k, v in sorted(meta.items()):
                    print(f"      {k}: {json.dumps(v)}")
        elif isinstance(entries, list):
            print(f"  BROKEN entries: {len(entries)} (LIST format!)")

    # 7. Check for other broken workspaces too
    print(f"\n{'='*60}")
    print("Quick check of ALL broken workspaces")
    print(f"{'='*60}")
    
    broken_hashes = {
        "Agentic": "5ec71800c69c79b96b06a37e38537907",
        "AgenticFortress": "724ab159cbc91cdd8242d9b5aa690c3b",
        "AgentQ": "0e71f7221cc2ffe28e938bc38efeb8c5",
        "AIModelVault": "56cb9246fe0ba2d5debb96e9135e5c95",
        "Cyborg": "cc60bfebb242bac1578d7b49a44033db",
        "Hyperlight": "1c25bd214fe52001bcec2ffa40836c82",
        "Framewerx": "c8f466664b769ee6a567242b576fb955",
        "OverwatchGCS": "c7aca0a33bd40b6a718d04f88c333669",
        "Rodeo": "9190fefe3d0b856449a6bdacdca0c1ef",
        "SentryWall": "6c88aa026ece73be4109fe5008915801",
        "XWERX_B001": "05db3446971342e7f6a633a732955575",
    }
    
    for name, h in sorted(broken_hashes.items()):
        db_path = os.path.join(WS_BASE, h, "state.vscdb")
        if not os.path.exists(db_path):
            print(f"\n  {name}: DB not found!")
            continue
        
        idx = read_db_key(db_path, "chat.ChatSessionStore.index")
        mc = read_db_key(db_path, "agentSessions.model.cache")
        
        # Session files
        sessions = get_session_files(h)
        
        idx_count = 0
        if isinstance(idx, dict) and 'entries' in idx:
            e = idx['entries']
            idx_count = len(e) if isinstance(e, (dict, list)) else 0
        
        mc_count = len(mc) if isinstance(mc, list) else 0
        
        print(f"\n  {name} ({h[:8]}):")
        print(f"    Index entries: {idx_count}, Model cache entries: {mc_count}")
        print(f"    Session files: {len(sessions)}")
        for sid, path in sessions.items():
            size = os.path.getsize(path)
            rel = os.path.relpath(path, os.path.join(WS_BASE, h))
            in_index = False
            if isinstance(idx, dict) and 'entries' in idx:
                e = idx['entries']
                if isinstance(e, dict):
                    in_index = sid in e
            print(f"      {sid}: {size:,}b at {rel} (in_index={in_index})")

    # 8. Check if the session file is readable as the EXACT bytes VS Code expects
    print(f"\n{'='*60}")
    print("Raw file format check")
    print(f"{'='*60}")
    
    if broken_pick:
        with open(broken_pick[1], 'rb') as f:
            raw = f.read(500)
        print(f"\n  Broken session first 500 bytes (hex):")
        print(f"    Starts with BOM: {raw[:3] == b'\\xef\\xbb\\xbf'}")
        print(f"    First byte: {hex(raw[0])}")
        print(f"    Contains \\r\\n: {b'\\r\\n' in raw}")
        print(f"    Contains \\n: {b'\\n' in raw}")
        print(f"    Line ending style: {'CRLF' if b'\\r\\n' in raw else 'LF'}")
        # Find first newline
        nl_pos = raw.find(b'\n')
        print(f"    First newline at byte: {nl_pos}")
    
    if working_pick:
        with open(working_pick[1], 'rb') as f:
            raw = f.read(500)
        print(f"\n  Working session first 500 bytes (hex):")
        print(f"    Starts with BOM: {raw[:3] == b'\\xef\\xbb\\xbf'}")
        print(f"    First byte: {hex(raw[0])}")
        print(f"    Contains \\r\\n: {b'\\r\\n' in raw}")
        print(f"    Contains \\n: {b'\\n' in raw}")
        print(f"    Line ending style: {'CRLF' if b'\\r\\n' in raw else 'LF'}")
        nl_pos = raw.find(b'\n')
        print(f"    First newline at byte: {nl_pos}")

if __name__ == "__main__":
    main()