chasm-cli 1.5.4

Universal chat session manager - harvest, merge, and analyze AI chat history from VS Code, Cursor, and other editors
Documentation
"""
Deep dump of ALL chat-related DB state for Agentic workspace.
Compare model cache entry format against working workspace.
Check all cross-references for consistency.
"""
import sqlite3, json, os, base64, glob

BROKEN_WS = r"C:\Users\adamm\AppData\Roaming\Code\User\workspaceStorage\5ec71800c69c79b96b06a37e38537907"
WORKING_WS = r"C:\Users\adamm\AppData\Roaming\Code\User\workspaceStorage\82cdabb21413f2ff42168423e82c8bdf"
BROKEN_DB = os.path.join(BROKEN_WS, "state.vscdb")
WORKING_DB = os.path.join(WORKING_WS, "state.vscdb")
BROKEN_SESSIONS_DIR = os.path.join(BROKEN_WS, "chatSessions")
WORKING_SESSIONS_DIR = os.path.join(WORKING_WS, "chatSessions")

CHAT_KEYS = [
    "chat.ChatSessionStore.index",
    "agentSessions.model.cache",
    "agentSessions.state.cache",
    "memento/interactive-session-view-copilot",
    "chat.untitledInputState",
    "chat/toolOutputStateCache",
]

def dump_key(conn, key):
    cur = conn.execute("SELECT value FROM ItemTable WHERE key = ?", (key,))
    row = cur.fetchone()
    if row is None:
        return None
    try:
        return json.loads(row[0])
    except:
        return row[0]

def list_sessions(sessions_dir):
    if not os.path.isdir(sessions_dir):
        return []
    files = glob.glob(os.path.join(sessions_dir, "*"))
    return [os.path.basename(f) for f in files]

def dump_workspace(label, db_path, sessions_dir):
    print(f"\n{'='*80}")
    print(f"  {label}")
    print(f"{'='*80}")
    
    conn = sqlite3.connect(db_path)
    
    # Dump all chat-related keys
    for key in CHAT_KEYS:
        val = dump_key(conn, key)
        print(f"\n--- {key} ---")
        if val is None:
            print("  (not found)")
        elif isinstance(val, (dict, list)):
            print(json.dumps(val, indent=2)[:3000])
        else:
            print(f"  {str(val)[:500]}")
    
    # Also find any other chat/session/copilot keys
    print(f"\n--- ALL chat/session/copilot/agent keys ---")
    cur = conn.execute("SELECT key FROM ItemTable WHERE key LIKE '%chat%' OR key LIKE '%session%' OR key LIKE '%copilot%' OR key LIKE '%agent%'")
    all_keys = [r[0] for r in cur.fetchall()]
    for k in sorted(all_keys):
        if k not in CHAT_KEYS:
            val = dump_key(conn, k)
            print(f"\n  {k}:")
            if isinstance(val, (dict, list)):
                s = json.dumps(val, indent=2)
                print(f"    {s[:500]}")
            elif val is not None:
                print(f"    {str(val)[:500]}")
    
    # List session files
    print(f"\n--- Session files ---")
    sessions = list_sessions(sessions_dir)
    for s in sorted(sessions):
        fpath = os.path.join(sessions_dir, s)
        size = os.path.getsize(fpath)
        print(f"  {s}  ({size} bytes)")
    
    conn.close()

# Dump both workspaces
dump_workspace("BROKEN (Agentic) - 5ec71800", BROKEN_DB, BROKEN_SESSIONS_DIR)
dump_workspace("WORKING (chasm) - 82cdabb2", WORKING_DB, WORKING_SESSIONS_DIR)

# Cross-reference check for broken workspace
print(f"\n{'='*80}")
print("  CROSS-REFERENCE VALIDATION (Broken)")
print(f"{'='*80}")

conn = sqlite3.connect(BROKEN_DB)
index = dump_key(conn, "chat.ChatSessionStore.index")
model_cache = dump_key(conn, "agentSessions.model.cache")
state_cache = dump_key(conn, "agentSessions.state.cache")
memento = dump_key(conn, "memento/interactive-session-view-copilot")
conn.close()

session_files = set()
for f in list_sessions(BROKEN_SESSIONS_DIR):
    sid = f.replace(".jsonl", "").replace(".json", "")
    session_files.add(sid)

print(f"\nSession files on disk: {session_files}")

if index:
    index_ids = set()
    for entry in index:
        sid = entry.get("sessionId", entry.get("id", "?"))
        index_ids.add(sid)
    print(f"Index session IDs: {index_ids}")
    missing_from_disk = index_ids - session_files
    if missing_from_disk:
        print(f"  WARNING: In index but NOT on disk: {missing_from_disk}")
    extra_on_disk = session_files - index_ids
    if extra_on_disk:
        print(f"  WARNING: On disk but NOT in index: {extra_on_disk}")

if model_cache:
    mc_ids = set()
    for entry in model_cache:
        resource = entry.get("resource", "")
        # decode from vscode-chat-session://local/{base64}
        if "/local/" in resource:
            b64 = resource.split("/local/")[1]
            try:
                sid = base64.b64decode(b64).decode()
                mc_ids.add(sid)
            except:
                mc_ids.add(f"(decode failed: {b64})")
    print(f"Model cache session IDs: {mc_ids}")
    missing_from_disk2 = mc_ids - session_files
    if missing_from_disk2:
        print(f"  WARNING: In model cache but NOT on disk: {missing_from_disk2}")

if state_cache:
    print(f"State cache entries: {len(state_cache) if isinstance(state_cache, list) else 'N/A'}")
    if isinstance(state_cache, list):
        for i, entry in enumerate(state_cache):
            if isinstance(entry, dict):
                sid = entry.get("sessionId", entry.get("id", "?"))
                resource = entry.get("resource", "")
                print(f"  [{i}] sessionId/resource: {sid} / {resource}")
    elif isinstance(state_cache, dict):
        for k, v in state_cache.items():
            print(f"  {k}: {json.dumps(v)[:200]}")

if memento is not None:
    print(f"Memento (interactive-session-view-copilot): {json.dumps(memento)[:500]}")