chasm-cli 1.5.4

Universal chat session manager - harvest, merge, and analyze AI chat history from VS Code, Cursor, and other editors
Documentation
"""
Inspect agentSessions.model.cache from both workspaces to find what causes
'Cannot read properties of undefined (reading start)' error.
"""
import sqlite3
import json
import os

WS_STORAGE = r"C:\Users\adamm\AppData\Roaming\Code\User\workspaceStorage"
BROKEN_HASH = "5ec71800c69c79b96b06a37e38537907"
WORKING_HASH = "82cdabb21413f2ff42168423e82c8bdf"

def get_db_value(ws_hash, key):
    db_path = os.path.join(WS_STORAGE, ws_hash, "state.vscdb")
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT value FROM ItemTable WHERE key = ?", (key,))
    row = cursor.fetchone()
    conn.close()
    return row[0] if row else None

def deep_find_start_issues(obj, path="", depth=0):
    """Recursively find objects where .start access would fail."""
    issues = []
    if depth > 30:
        return issues
    
    if isinstance(obj, dict):
        # Check: any key 'range' whose value is None/not-dict
        if 'range' in obj:
            rng = obj['range']
            if rng is None:
                issues.append(f"{path}.range = null (accessing .start would fail)")
            elif isinstance(rng, dict):
                if 'start' not in rng and 'startLineNumber' not in rng and 'startColumn' not in rng:
                    issues.append(f"{path}.range has no start/startLineNumber: keys={list(rng.keys())}")
        
        # Check: has 'end' but no 'start' (broken Range)
        if 'end' in obj and 'start' not in obj and 'startLineNumber' not in obj:
            # Only flag if this looks like a Range-like object
            if len(obj) <= 5:
                issues.append(f"{path}: has 'end' but no 'start': keys={list(obj.keys())}")
        
        # Check: URI scheme issues
        if '$mid' in obj or 'scheme' in obj:
            scheme = obj.get('scheme', '')
            if isinstance(scheme, str) and scheme:
                import re
                if not re.match(r'^[a-zA-Z][a-zA-Z0-9+.\-]*$', scheme):
                    issues.append(f"{path}.scheme: ILLEGAL CHARS: '{scheme}'")
            external = obj.get('external', '')
            if isinstance(external, str) and external:
                if ':' in external:
                    uri_scheme = external.split(':')[0]
                    if not re.match(r'^[a-zA-Z][a-zA-Z0-9+.\-]*$', uri_scheme):
                        issues.append(f"{path}.external: bad URI scheme: '{uri_scheme}' in '{external[:100]}'")
        
        for k, v in obj.items():
            deep_find_start_issues(v, f"{path}.{k}", depth + 1)
            issues.extend(deep_find_start_issues(v, f"{path}.{k}", depth + 1))
    
    elif isinstance(obj, list):
        for i, item in enumerate(obj):
            if i < 100:  # Limit to first 100 items
                issues.extend(deep_find_start_issues(item, f"{path}[{i}]", depth + 1))
    
    return issues

def check_model_cache(ws_hash, label):
    raw = get_db_value(ws_hash, "agentSessions.model.cache")
    if not raw:
        print(f"  No model.cache for {label}")
        return
    
    data = json.loads(raw)
    print(f"\n{'='*80}")
    print(f"MODEL CACHE: {label}")
    print(f"{'='*80}")
    
    if isinstance(data, list):
        print(f"  Entries: {len(data)}")
        for i, entry in enumerate(data):
            if isinstance(entry, dict):
                print(f"\n  Entry[{i}]: keys={list(entry.keys())}")
                sid = entry.get('sessionId', 'unknown')
                print(f"    sessionId: {sid}")
                
                # Check if it has requests
                reqs = entry.get('requests', [])
                print(f"    requests: {len(reqs)}")
                
                # Check for 'value' wrapper
                val = entry.get('value')
                if val and isinstance(val, dict):
                    print(f"    value.keys: {list(val.keys())}")
                    v_reqs = val.get('requests', [])
                    print(f"    value.requests: {len(v_reqs)}")
                
                # Deep search for issues
                issues = deep_find_start_issues(entry, f"cache[{i}]")
                if issues:
                    print(f"    ISSUES FOUND ({len(issues)}):")
                    for issue in issues[:30]:
                        print(f"      {issue}")
                else:
                    print(f"    Deep search: OK")
    elif isinstance(data, dict):
        print(f"  Type: dict, keys: {list(data.keys())}")
        issues = deep_find_start_issues(data, "cache")
        if issues:
            print(f"  ISSUES FOUND ({len(issues)}):")
            for issue in issues[:30]:
                print(f"    {issue}")

def check_session_index(ws_hash, label):
    raw = get_db_value(ws_hash, "chat.ChatSessionStore.index")
    if not raw:
        print(f"  No session index for {label}")
        return
    
    data = json.loads(raw)
    print(f"\n{'='*80}")
    print(f"SESSION INDEX: {label}")
    print(f"{'='*80}")
    
    if isinstance(data, list):
        print(f"  Entries: {len(data)}")
        for entry in data:
            if isinstance(entry, dict):
                print(f"  - id={entry.get('sessionId', 'unknown')}, isActive={entry.get('isActive')}")
    elif isinstance(data, dict):
        print(f"  Type: dict, keys: {list(data.keys())}")

# Check all relevant DB keys
for key in [
    "agentSessions.model.cache",
    "chat.ChatSessionStore.index",
    "memento/interactive-session",
    "memento/interactive-session-view-copilot",
    "chat.terminalSessions",
]:
    for ws_hash, label in [(BROKEN_HASH, "Agentic (BROKEN)"), (WORKING_HASH, "chasm (WORKING)")]:
        raw = get_db_value(ws_hash, key)
        if not raw:
            print(f"\n  {key} not found in {label}")
            continue
        
        try:
            data = json.loads(raw)
        except json.JSONDecodeError as e:
            print(f"\n  {key} in {label}: INVALID JSON: {e}")
            continue
        
        issues = deep_find_start_issues(data, key)
        if issues:
            print(f"\n  {key} in {label}: {len(issues)} ISSUES:")
            for issue in issues[:20]:
                print(f"    {issue}")
        else:
            print(f"\n  {key} in {label}: OK")

# Also check the model cache structure in detail
print("\n\n" + "="*80)
print("DETAILED MODEL CACHE CHECK")
print("="*80)
for ws_hash, label in [(BROKEN_HASH, "Agentic (BROKEN)"), (WORKING_HASH, "chasm (WORKING)")]:
    check_model_cache(ws_hash, label)

# Check session JSONL files with correct v-field parsing
print("\n\n" + "="*80)
print("SESSION JSONL DEEP CHECK (using v field)")
print("="*80)
for ws_hash, label in [(BROKEN_HASH, "Agentic (BROKEN)"), (WORKING_HASH, "chasm (WORKING)")]:
    sessions_dir = os.path.join(WS_STORAGE, ws_hash, "chatSessions")
    if not os.path.isdir(sessions_dir):
        continue
    
    print(f"\n{label}:")
    for fname in sorted(os.listdir(sessions_dir)):
        if not fname.endswith('.jsonl'):
            continue
        fpath = os.path.join(sessions_dir, fname)
        with open(fpath, 'r', encoding='utf-8') as f:
            content = f.read()
        
        decoder = json.JSONDecoder()
        try:
            obj, _ = decoder.raw_decode(content.strip())
        except json.JSONDecodeError as e:
            print(f"  {fname}: JSON PARSE ERROR: {e}")
            continue
        
        v = obj.get("v", {})
        reqs = v.get("requests", [])
        
        print(f"  {fname}: {len(reqs)} requests, {os.path.getsize(fpath)} bytes")
        
        if reqs:
            for ridx, req in enumerate(reqs):
                msg = req.get("message")
                if msg is not None and not isinstance(msg, str):
                    if isinstance(msg, dict):
                        parts = msg.get("parts", [])
                        for pidx, part in enumerate(parts):
                            if isinstance(part, dict):
                                rng = part.get("range")
                                if rng is None:
                                    print(f"    REQ[{ridx}] msg.parts[{pidx}]: range is None!")
                                elif isinstance(rng, dict):
                                    if "start" not in rng:
                                        print(f"    REQ[{ridx}] msg.parts[{pidx}]: range has no 'start'! keys={list(rng.keys())}")
                
                # Deep search this request
                req_issues = deep_find_start_issues(req, f"  req[{ridx}]")
                if req_issues:
                    print(f"    REQ[{ridx}] issues:")
                    for issue in req_issues[:10]:
                        print(f"      {issue}")