chasm-cli 1.5.4

Universal chat session manager - harvest, merge, and analyze AI chat history from VS Code, Cursor, and other editors
Documentation
"""Read actual raw content of small sessions and compare byte-level format."""
import json, os, sqlite3

WS_BASE = r"C:\Users\adamm\AppData\Roaming\Code\User\workspaceStorage"

# Pick small sessions for direct comparison
sessions = {
    "WORKING_bc6d": os.path.join(WS_BASE, "82cdabb21413f2ff42168423e82c8bdf", "chatSessions", "bc6d5655-0778-4d67-8d68-660821103ca8.jsonl"),
    "BROKEN_6be2": os.path.join(WS_BASE, "5ec71800c69c79b96b06a37e38537907", "chatSessions", "6be29cba-331e-4aa4-bc58-659cc20f4800.jsonl"),
    "WORKING_44f0": os.path.join(WS_BASE, "82cdabb21413f2ff42168423e82c8bdf", "chatSessions", "44f0cf62-331e-43c9-b32c-25d91ebab0b8.jsonl"),
    "BROKEN_4e5d": os.path.join(WS_BASE, "724ab159cbc91cdd8242d9b5aa690c3b", "chatSessions", "4e5dd6b4-ea53-475b-8f9e-8fad3bf59388.jsonl"),
}

for label, path in sessions.items():
    print(f"\n{'='*70}")
    print(f"{label}: {os.path.basename(path)}")
    print(f"{'='*70}")
    
    with open(path, 'rb') as f:
        raw = f.read()
    
    # CORRECT newline analysis (using actual byte values, not escaped strings)
    nl_count = raw.count(b'\n')  # actual newline byte 0x0a
    cr_count = raw.count(b'\r')  # carriage return byte 0x0d
    crlf_count = raw.count(b'\r\n')
    
    print(f"Size: {len(raw)} bytes")
    print(f"Actual newlines (0x0a): {nl_count}")
    print(f"Carriage returns (0x0d): {cr_count}")
    print(f"CRLF pairs: {crlf_count}")
    print(f"Last 10 bytes hex: {raw[-10:].hex()}")
    print(f"Ends with 0x0a: {raw[-1:] == b'\\x0a' if raw else 'empty'}")
    
    # Split by actual newlines
    text = raw.decode('utf-8')
    lines = text.split('\n')
    non_empty = [l for l in lines if l.strip()]
    print(f"Lines after split('\\n'): {len(lines)}")
    print(f"Non-empty lines: {len(non_empty)}")
    
    # Parse each non-empty line as JSON
    print(f"\nJSON objects per line:")
    for i, line in enumerate(non_empty):
        stripped = line.strip()
        if not stripped:
            continue
        try:
            obj = json.loads(stripped)
            kind = obj.get('kind', '?')
            k = obj.get('k', None)
            print(f"  Line {i}: kind={kind}, k={k}, len={len(stripped)}")
        except json.JSONDecodeError as e:
            # Try to find concatenated JSON objects
            print(f"  Line {i}: PARSE ERROR at col {e.colno}: {e.msg}")
            # Count how many JSON objects are concatenated
            count = 0
            pos = 0
            decoder = json.JSONDecoder()
            objects = []
            while pos < len(stripped):
                try:
                    obj, end = decoder.raw_decode(stripped, pos)
                    objects.append(obj)
                    pos = end
                    # Skip whitespace between objects
                    while pos < len(stripped) and stripped[pos] in ' \t':
                        pos += 1
                except:
                    break
            print(f"    Concatenated objects: {len(objects)}")
            for j, obj in enumerate(objects):
                kind = obj.get('kind', '?')
                k = obj.get('k', None)
                print(f"    [{j}] kind={kind}, k={k}")

# Now let's look at what VS Code expects by checking the response structure
print(f"\n{'='*70}")
print("RESPONSE STRUCTURE DEEP DIVE")
print(f"{'='*70}")

# Read the broken session to see detailed response structure 
broken_path = sessions["BROKEN_6be2"]
with open(broken_path, 'r', encoding='utf-8') as f:
    text = f.read()

# Parse using raw_decode to handle concatenated JSON
decoder = json.JSONDecoder()
pos = 0
objects = []
while pos < len(text):
    text_stripped = text[pos:].lstrip()
    if not text_stripped:
        break
    try:
        obj, end = decoder.raw_decode(text, pos + (len(text) - len(text[pos:]) - len(text[pos:].lstrip()) + len(text[pos:].lstrip())))
    except:
        # Try from stripped position
        new_pos = len(text) - len(text_stripped)
        try:
            obj, end = decoder.raw_decode(text, new_pos)
            objects.append(obj)
            pos = end
            continue
        except:
            break
    objects.append(obj)
    pos = end

# Simpler approach: split by newline and parse
objects = []
for line in text.split('\n'):
    line = line.strip()
    if not line:
        continue
    try:
        objects.append(json.loads(line))
    except:
        decoder = json.JSONDecoder()
        p = 0
        while p < len(line):
            try:
                obj, end = decoder.raw_decode(line, p)
                objects.append(obj)
                p = end
                while p < len(line) and line[p] in ' \t':
                    p += 1
            except:
                break

print(f"\nBROKEN session 6be29cba has {len(objects)} total objects")
for obj in objects:
    if obj.get('kind') == 0:
        v = obj['v']
        requests = v.get('requests', [])
        print(f"\nSession has {len(requests)} requests")
        for ri, req in enumerate(requests):
            print(f"\n  Request {ri}:")
            print(f"    Keys: {sorted(req.keys())}")
            msg = req.get('message', {})
            print(f"    message keys: {sorted(msg.keys()) if isinstance(msg, dict) else type(msg)}")
            if isinstance(msg, dict):
                print(f"    message.text length: {len(msg.get('text', ''))}")
            
            resp = req.get('response', {})
            if isinstance(resp, dict):
                print(f"    response keys: {sorted(resp.keys())}")
                value = resp.get('value', [])
                if isinstance(value, list):
                    print(f"    response.value (parts): {len(value)} items")
                    for pi, part in enumerate(value[:5]):
                        if isinstance(part, dict):
                            kind = part.get('kind', 'NO_KIND')
                            print(f"      [{pi}] kind={kind}, keys={sorted(part.keys())}")
                elif isinstance(value, dict):
                    print(f"    response.value is DICT (not list): keys={sorted(value.keys())}")
                else:
                    print(f"    response.value type: {type(value)}")
            
            ms = req.get('modelState', {})
            print(f"    modelState: {json.dumps(ms)}")

# Do the same for a working session
working_path = sessions["WORKING_44f0"]
with open(working_path, 'r', encoding='utf-8') as f:
    text = f.read()

objects = []
for line in text.split('\n'):
    line = line.strip()
    if not line:
        continue
    try:
        objects.append(json.loads(line))
    except:
        decoder = json.JSONDecoder()
        p = 0
        while p < len(line):
            try:
                obj, end = decoder.raw_decode(line, p)
                objects.append(obj)
                p = end
                while p < len(line) and line[p] in ' \t':
                    p += 1
            except:
                break

print(f"\n\nWORKING session 44f0cf62 has {len(objects)} total objects")
for obj in objects:
    if obj.get('kind') == 0:
        v = obj['v']
        requests = v.get('requests', [])
        print(f"\nSession has {len(requests)} requests")
        for ri, req in enumerate(requests):
            print(f"\n  Request {ri}:")
            print(f"    Keys: {sorted(req.keys())}")
            msg = req.get('message', {})
            print(f"    message keys: {sorted(msg.keys()) if isinstance(msg, dict) else type(msg)}")
            if isinstance(msg, dict):
                print(f"    message.text length: {len(msg.get('text', ''))}")
            
            resp = req.get('response', {})
            if isinstance(resp, dict):
                print(f"    response keys: {sorted(resp.keys())}")
                value = resp.get('value', [])
                if isinstance(value, list):
                    print(f"    response.value (parts): {len(value)} items")
                    for pi, part in enumerate(value[:5]):
                        if isinstance(part, dict):
                            kind = part.get('kind', 'NO_KIND')
                            print(f"      [{pi}] kind={kind}, keys={sorted(part.keys())}")
                elif isinstance(value, dict):
                    print(f"    response.value is DICT (not list): keys={sorted(value.keys())}")
                else:
                    print(f"    response.value type: {type(value)}")
            
            ms = req.get('modelState', {})
            print(f"    modelState: {json.dumps(ms)}")