chasm-cli 1.5.4

"""
Inspect agentSessions.model.cache from both workspaces to find what causes
'Cannot read properties of undefined (reading start)' error.
"""
import sqlite3
import json
import os

WS_STORAGE = r"C:\Users\adamm\AppData\Roaming\Code\User\workspaceStorage"
BROKEN_HASH = "5ec71800c69c79b96b06a37e38537907"
WORKING_HASH = "82cdabb21413f2ff42168423e82c8bdf"

def get_db_value(ws_hash, key):
    db_path = os.path.join(WS_STORAGE, ws_hash, "state.vscdb")
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT value FROM ItemTable WHERE key = ?", (key,))
    row = cursor.fetchone()
    conn.close()
    return row[0] if row else None

def deep_find_start_issues(obj, path="", depth=0):
    """Recursively find objects where .start access would fail."""
    issues = []
    if depth > 30:
        return issues
    
    if isinstance(obj, dict):
        # Check: any key 'range' whose value is None/not-dict
        if 'range' in obj:
            rng = obj['range']
            if rng is None:
                issues.append(f"{path}.range = null (accessing .start would fail)")
            elif isinstance(rng, dict):
                if 'start' not in rng and 'startLineNumber' not in rng and 'startColumn' not in rng:
                    issues.append(f"{path}.range has no start/startLineNumber: keys={list(rng.keys())}")
        
        # Check: has 'end' but no 'start' (broken Range)
        if 'end' in obj and 'start' not in obj and 'startLineNumber' not in obj:
            # Only flag if this looks like a Range-like object
            if len(obj) <= 5:
                issues.append(f"{path}: has 'end' but no 'start': keys={list(obj.keys())}")
        
        # Check: URI scheme issues
        if '$mid' in obj or 'scheme' in obj:
            scheme = obj.get('scheme', '')
            if isinstance(scheme, str) and scheme:
                import re
                if not re.match(r'^[a-zA-Z][a-zA-Z0-9+.\-]*$', scheme):
                    issues.append(f"{path}.scheme: ILLEGAL CHARS: '{scheme}'")
            external = obj.get('external', '')
            if isinstance(external, str) and external:
                if ':' in external:
                    uri_scheme = external.split(':')[0]
                    if not re.match(r'^[a-zA-Z][a-zA-Z0-9+.\-]*$', uri_scheme):
                        issues.append(f"{path}.external: bad URI scheme: '{uri_scheme}' in '{external[:100]}'")
        
        for k, v in obj.items():
            deep_find_start_issues(v, f"{path}.{k}", depth + 1)
            issues.extend(deep_find_start_issues(v, f"{path}.{k}", depth + 1))
    
    elif isinstance(obj, list):
        for i, item in enumerate(obj):
            if i < 100:  # Limit to first 100 items
                issues.extend(deep_find_start_issues(item, f"{path}[{i}]", depth + 1))
    
    return issues

def check_model_cache(ws_hash, label):
    raw = get_db_value(ws_hash, "agentSessions.model.cache")
    if not raw:
        print(f"  No model.cache for {label}")
        return
    
    data = json.loads(raw)
    print(f"\n{'='*80}")
    print(f"MODEL CACHE: {label}")
    print(f"{'='*80}")
    
    if isinstance(data, list):
        print(f"  Entries: {len(data)}")
        for i, entry in enumerate(data):
            if isinstance(entry, dict):
                print(f"\n  Entry[{i}]: keys={list(entry.keys())}")
                sid = entry.get('sessionId', 'unknown')
                print(f"    sessionId: {sid}")
                
                # Check if it has requests
                reqs = entry.get('requests', [])
                print(f"    requests: {len(reqs)}")
                
                # Check for 'value' wrapper
                val = entry.get('value')
                if val and isinstance(val, dict):
                    print(f"    value.keys: {list(val.keys())}")
                    v_reqs = val.get('requests', [])
                    print(f"    value.requests: {len(v_reqs)}")
                
                # Deep search for issues
                issues = deep_find_start_issues(entry, f"cache[{i}]")
                if issues:
                    print(f"    ISSUES FOUND ({len(issues)}):")
                    for issue in issues[:30]:
                        print(f"      {issue}")
                else:
                    print(f"    Deep search: OK")
    elif isinstance(data, dict):
        print(f"  Type: dict, keys: {list(data.keys())}")
        issues = deep_find_start_issues(data, "cache")
        if issues:
            print(f"  ISSUES FOUND ({len(issues)}):")
            for issue in issues[:30]:
                print(f"    {issue}")

def check_session_index(ws_hash, label):
    raw = get_db_value(ws_hash, "chat.ChatSessionStore.index")
    if not raw:
        print(f"  No session index for {label}")
        return
    
    data = json.loads(raw)
    print(f"\n{'='*80}")
    print(f"SESSION INDEX: {label}")
    print(f"{'='*80}")
    
    if isinstance(data, list):
        print(f"  Entries: {len(data)}")
        for entry in data:
            if isinstance(entry, dict):
                print(f"  - id={entry.get('sessionId', 'unknown')}, isActive={entry.get('isActive')}")
    elif isinstance(data, dict):
        print(f"  Type: dict, keys: {list(data.keys())}")

# Check all relevant DB keys
for key in [
    "agentSessions.model.cache",
    "chat.ChatSessionStore.index",
    "memento/interactive-session",
    "memento/interactive-session-view-copilot",
    "chat.terminalSessions",
]:
    for ws_hash, label in [(BROKEN_HASH, "Agentic (BROKEN)"), (WORKING_HASH, "chasm (WORKING)")]:
        raw = get_db_value(ws_hash, key)
        if not raw:
            print(f"\n  {key} not found in {label}")
            continue
        
        try:
            data = json.loads(raw)
        except json.JSONDecodeError as e:
            print(f"\n  {key} in {label}: INVALID JSON: {e}")
            continue
        
        issues = deep_find_start_issues(data, key)
        if issues:
            print(f"\n  {key} in {label}: {len(issues)} ISSUES:")
            for issue in issues[:20]:
                print(f"    {issue}")
        else:
            print(f"\n  {key} in {label}: OK")

# Also check the model cache structure in detail
print("\n\n" + "="*80)
print("DETAILED MODEL CACHE CHECK")
print("="*80)
for ws_hash, label in [(BROKEN_HASH, "Agentic (BROKEN)"), (WORKING_HASH, "chasm (WORKING)")]:
    check_model_cache(ws_hash, label)

# Check session JSONL files with correct v-field parsing
print("\n\n" + "="*80)
print("SESSION JSONL DEEP CHECK (using v field)")
print("="*80)
for ws_hash, label in [(BROKEN_HASH, "Agentic (BROKEN)"), (WORKING_HASH, "chasm (WORKING)")]:
    sessions_dir = os.path.join(WS_STORAGE, ws_hash, "chatSessions")
    if not os.path.isdir(sessions_dir):
        continue
    
    print(f"\n{label}:")
    for fname in sorted(os.listdir(sessions_dir)):
        if not fname.endswith('.jsonl'):
            continue
        fpath = os.path.join(sessions_dir, fname)
        with open(fpath, 'r', encoding='utf-8') as f:
            content = f.read()
        
        decoder = json.JSONDecoder()
        try:
            obj, _ = decoder.raw_decode(content.strip())
        except json.JSONDecodeError as e:
            print(f"  {fname}: JSON PARSE ERROR: {e}")
            continue
        
        v = obj.get("v", {})
        reqs = v.get("requests", [])
        
        print(f"  {fname}: {len(reqs)} requests, {os.path.getsize(fpath)} bytes")
        
        if reqs:
            for ridx, req in enumerate(reqs):
                msg = req.get("message")
                if msg is not None and not isinstance(msg, str):
                    if isinstance(msg, dict):
                        parts = msg.get("parts", [])
                        for pidx, part in enumerate(parts):
                            if isinstance(part, dict):
                                rng = part.get("range")
                                if rng is None:
                                    print(f"    REQ[{ridx}] msg.parts[{pidx}]: range is None!")
                                elif isinstance(rng, dict):
                                    if "start" not in rng:
                                        print(f"    REQ[{ridx}] msg.parts[{pidx}]: range has no 'start'! keys={list(rng.keys())}")
                
                # Deep search this request
                req_issues = deep_find_start_issues(req, f"  req[{ridx}]")
                if req_issues:
                    print(f"    REQ[{ridx}] issues:")
                    for issue in req_issues[:10]:
                        print(f"      {issue}")