prollytree 0.3.4

A prolly (probabilistic) tree for efficient storage, retrieval, and modification of ordered data.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
#!/usr/bin/env python3

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Test script for VersionedKvStore Python bindings."""

import tempfile
import os
import subprocess
import pytest
from prollytree import VersionedKvStore, StorageBackend


# By default the RocksDB tests must FAIL when the wheel under test was not built
# with `rocksdb_storage` — that is the packaging regression this test exists to
# catch (CI/PyPI wheels are expected to ship the feature). For local builds that
# intentionally omit RocksDB (e.g. `--with-sql` without the rocksdb toolchain),
# set PROLLYTREE_TEST_SKIP_MISSING_BACKENDS=1 to opt into skip-instead-of-fail.
_ROCKSDB_MISSING_MARKER = "rocksdb_storage"


def _handle_missing_rocksdb(exc):
    """Re-raise unless the local opt-in env var is set, in which case skip."""
    if _ROCKSDB_MISSING_MARKER not in str(exc):
        raise exc
    if os.environ.get("PROLLYTREE_TEST_SKIP_MISSING_BACKENDS"):
        pytest.skip("wheel built without rocksdb_storage feature (opt-in skip)")
    raise exc

def test_versioned_kv_store():
    """Test the versioned key-value store functionality."""

    # Save original directory
    original_dir = os.getcwd()

    # Create a temporary directory and initialize a git repository
    with tempfile.TemporaryDirectory() as tmpdir:
        try:
            print(f"[DIR] Creating test in: {tmpdir}")

            # Initialize git repository
            subprocess.run(["git", "init"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.name", "Test User"], cwd=tmpdir, check=True)
            subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmpdir, check=True)

            # Create a subdirectory for our dataset
            dataset_dir = os.path.join(tmpdir, "dataset")
            os.makedirs(dataset_dir)
            os.chdir(dataset_dir)

            print("[OK] Git repository initialized")

            # Test 1: Initialize VersionedKvStore
            print("\n[TEST] Test 1: Initialize VersionedKvStore")
            store = VersionedKvStore(dataset_dir)
            print(f"   Storage backend: {store.storage_backend()}")
            print(f"   Current branch: {store.current_branch()}")

            # Test 2: Basic key-value operations
            print("\n[TEST] Test 2: Basic key-value operations")
            store.insert(b"name", b"Alice")
            store.insert(b"age", b"30")
            store.insert(b"city", b"San Francisco")

            # Check values
            name = store.get(b"name")
            age = store.get(b"age")
            city = store.get(b"city")

            print(f"   name: {name}")
            print(f"   age: {age}")
            print(f"   city: {city}")

            # Test 3: List keys and status
            print("\n[TEST] Test 3: List keys and status")
            keys = store.list_keys()
            print(f"   Keys: {[k.decode() for k in keys]}")

            status = store.status()
            print("   Status:")
            for key, status_str in status:
                print(f"   - {key.decode()}: {status_str}")

            # Test 4: Commit changes
            print("\n[TEST] Test 4: Commit changes")
            commit_hash = store.commit("Add initial user data")
            print(f"   Commit hash: {commit_hash}")

            # Check status after commit
            status = store.status()
            print(f"   Status after commit: {len(status)} staged changes")

            # Test 5: Update and delete operations
            print("\n[TEST] Test 5: Update and delete operations")
            updated = store.update(b"age", b"31")
            print(f"   Updated age: {updated}")

            deleted = store.delete(b"city")
            print(f"   Deleted city: {deleted}")

            # Add new key
            store.insert(b"country", b"USA")

            # Check status
            status = store.status()
            print("   Status after changes:")
            for key, status_str in status:
                print(f"   - {key.decode()}: {status_str}")

            # Test 6: Branch operations
            print("\n[TEST] Test 6: Branch operations")
            store.create_branch("feature-branch")
            print("   Created and switched to feature-branch")
            print(f"   Current branch: {store.current_branch()}")

            # Make changes on feature branch
            store.insert(b"feature", b"new-feature")
            store.commit("Add feature on feature branch")

            # List all branches
            branches = store.list_branches()
            print(f"   Available branches: {branches}")

            # Test 7: Switch back to main
            print("\n[TEST] Test 7: Switch back to main")
            store.checkout("main")
            print(f"   Current branch: {store.current_branch()}")

            # Check if feature key exists (should not exist on main)
            feature = store.get(b"feature")
            print(f"   Feature key on main: {feature}")

            # Test 8: Commit history
            print("\n[TEST] Test 8: Commit history")
            history = store.log()
            print(f"   Commit history ({len(history)} commits):")
            for i, commit in enumerate(history[:3]):  # Show first 3 commits
                print(f"   {i+1}. {commit['id'][:8]} - {commit['message']}")
                print(f"      Author: {commit['author']}")
                print(f"      Timestamp: {commit['timestamp']}")

            print("\n[OK] All VersionedKvStore tests completed successfully!")
        finally:
            os.chdir(original_dir)


def test_storage_backends():
    """Test different storage backends.

    Note: All storage backends (Git, File, InMemory) require being inside a git
    repository because they all use git for version control metadata.
    """

    with tempfile.TemporaryDirectory() as tmpdir:
        print(f"\n[DIR] Testing storage backends in: {tmpdir}")

        # Save original directory
        original_dir = os.getcwd()

        try:
            # Initialize git repository at the root
            subprocess.run(["git", "init"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.name", "Test User"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmpdir, check=True, capture_output=True)

            # Test 1: Git backend (default)
            print("\n[TEST] Test: Git backend (default)")
            git_dir = os.path.join(tmpdir, "git_data")
            os.makedirs(git_dir)
            # gix discovers repos from cwd, so we need to change directory
            os.chdir(git_dir)
            store_git = VersionedKvStore(git_dir)
            assert store_git.storage_backend() == StorageBackend.Git
            store_git.insert(b"key1", b"value1")
            store_git.commit("Git backend test")
            print(f"   Backend: {store_git.storage_backend()}")
            print("   [OK] Git backend works")

            # Test 2: File backend
            print("\n[TEST] Test: File backend")
            file_dir = os.path.join(tmpdir, "file_data")
            os.makedirs(file_dir)
            store_file = VersionedKvStore(file_dir, StorageBackend.File)
            assert store_file.storage_backend() == StorageBackend.File
            store_file.insert(b"key1", b"value1")
            store_file.commit("File backend test")
            assert store_file.get(b"key1") == b"value1"
            print(f"   Backend: {store_file.storage_backend()}")
            print("   [OK] File backend works")

            # Test 3: InMemory backend
            print("\n[TEST] Test: InMemory backend")
            mem_dir = os.path.join(tmpdir, "mem_data")
            os.makedirs(mem_dir)
            store_mem = VersionedKvStore(mem_dir, StorageBackend.InMemory)
            assert store_mem.storage_backend() == StorageBackend.InMemory
            store_mem.insert(b"key1", b"value1")
            store_mem.commit("InMemory backend test")
            assert store_mem.get(b"key1") == b"value1"
            print(f"   Backend: {store_mem.storage_backend()}")
            print("   [OK] InMemory backend works")

            # Test 4: RocksDB backend.
            # Defaults to failing if the wheel was built without rocksdb_storage
            # (catches packaging regressions). With PROLLYTREE_TEST_SKIP_MISSING_BACKENDS=1
            # we print-skip and continue so the prior Git/File/InMemory passes
            # remain visible (using pytest.skip here would mark the whole test
            # skipped and hide them).
            print("\n[TEST] Test: RocksDB backend")
            rocks_dir = os.path.join(tmpdir, "rocks_data")
            os.makedirs(rocks_dir)
            store_rocks = None
            try:
                store_rocks = VersionedKvStore(rocks_dir, StorageBackend.RocksDB)
            except ValueError as exc:
                if _ROCKSDB_MISSING_MARKER in str(exc) and os.environ.get(
                    "PROLLYTREE_TEST_SKIP_MISSING_BACKENDS"
                ):
                    print("   [SKIP] wheel built without rocksdb_storage feature (opt-in)")
                else:
                    raise
            if store_rocks is not None:
                assert store_rocks.storage_backend() == StorageBackend.RocksDB
                store_rocks.insert(b"key1", b"value1")
                store_rocks.commit("RocksDB backend test")
                assert store_rocks.get(b"key1") == b"value1"
                print(f"   Backend: {store_rocks.storage_backend()}")
                print("   [OK] RocksDB backend works")

            print("\n[OK] All storage backend tests completed successfully!")
        finally:
            os.chdir(original_dir)


def test_rocksdb_storage_backend():
    """End-to-end smoke test for the RocksDB storage backend.

    Fails by default if the wheel under test was not built with the
    `rocksdb_storage` feature — that is the packaging regression this test
    exists to catch. Set PROLLYTREE_TEST_SKIP_MISSING_BACKENDS=1 to opt into
    a skip on local builds that intentionally omit RocksDB.
    """

    original_dir = os.getcwd()

    with tempfile.TemporaryDirectory() as tmpdir:
        try:
            print(f"\n[DIR] Testing RocksDB backend in: {tmpdir}")

            subprocess.run(["git", "init"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.name", "Test User"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmpdir, check=True, capture_output=True)

            rocks_dir = os.path.join(tmpdir, "rocks_data")
            os.makedirs(rocks_dir)
            os.chdir(rocks_dir)

            try:
                store = VersionedKvStore(rocks_dir, StorageBackend.RocksDB)
            except ValueError as exc:
                _handle_missing_rocksdb(exc)

            assert store.storage_backend() == StorageBackend.RocksDB

            # Round-trip: insert, commit, read back
            store.insert(b"alpha", b"1")
            store.insert(b"beta", b"2")
            commit_id = store.commit("RocksDB initial")
            assert store.get(b"alpha") == b"1"
            assert store.get(b"beta") == b"2"
            assert commit_id  # non-empty commit hash

            # Update + delete should also persist
            store.insert(b"alpha", b"1-updated")
            store.delete(b"beta")
            store.commit("RocksDB update")
            assert store.get(b"alpha") == b"1-updated"
            assert store.get(b"beta") is None

            # Reopen the on-disk store and confirm the data survives the drop
            del store
            reopened = VersionedKvStore.open(rocks_dir, StorageBackend.RocksDB)
            assert reopened.storage_backend() == StorageBackend.RocksDB
            assert reopened.get(b"alpha") == b"1-updated"
            assert reopened.get(b"beta") is None

            print("   [OK] RocksDB backend round-trip + reopen passed")
        finally:
            os.chdir(original_dir)


def test_versioning_operations_on_file_backend():
    """Test that versioning operations work on File backend.

    All storage backends use Git for version control, so checkout, merge,
    try_merge, and diff should all work regardless of the storage layer.
    The storage layer only affects how tree chunks are stored.
    """

    # Save original directory
    original_dir = os.getcwd()

    with tempfile.TemporaryDirectory() as tmpdir:
        try:
            print(f"\n[DIR] Testing versioning operations on File backend in: {tmpdir}")

            # Initialize git repository (needed for all backends)
            subprocess.run(["git", "init"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.name", "Test User"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmpdir, check=True, capture_output=True)
            os.chdir(tmpdir)

            # Create File backend store
            file_dir = os.path.join(tmpdir, "file_data")
            os.makedirs(file_dir)
            store = VersionedKvStore(file_dir, StorageBackend.File)
            store.insert(b"key1", b"value1")
            store.commit("Initial commit on main")

            # Create a feature branch
            print("\n[TEST] Test: create_branch on File backend")
            store.create_branch("feature")
            print(f"   [OK] Created and switched to branch: {store.current_branch()}")

            # Make changes on feature branch
            store.insert(b"feature_key", b"feature_value")
            store.commit("Add feature key")

            # Test checkout back to main
            print("\n[TEST] Test: checkout works on File backend")
            store.checkout("main")
            assert store.current_branch() == "main", "Should be on main branch"
            assert store.get(b"feature_key") is None, "Feature key should not exist on main"
            print(f"   [OK] Checkout to main succeeded, current branch: {store.current_branch()}")

            # Add a change on main
            store.insert(b"main_key", b"main_value")
            store.commit("Add main key")

            # Test try_merge to detect conflicts (should succeed with no conflicts)
            print("\n[TEST] Test: try_merge works on File backend")
            success, conflicts = store.try_merge("feature")
            assert success, "Merge should succeed with no conflicts"
            assert len(conflicts) == 0, "Should have no conflicts"
            print(f"   [OK] try_merge succeeded with no conflicts")

            # Verify merge result
            assert store.get(b"feature_key") == b"feature_value", "Feature key should exist after merge"
            assert store.get(b"main_key") == b"main_value", "Main key should still exist"
            print("   [OK] Merge result verified: both keys present")

            # Test diff works on File backend
            print("\n[TEST] Test: diff works on File backend")
            history = store.log()
            if len(history) >= 2:
                diffs = store.diff(history[1]["id"], history[0]["id"])
                print(f"   [OK] diff returned {len(diffs)} differences")
            else:
                print("   [SKIP] Not enough commits to test diff")

            # Test merge with conflict resolution
            print("\n[TEST] Test: merge with conflict resolution on File backend")
            # Create another branch with conflicting change
            store.create_branch("conflict-branch")
            store.update(b"key1", b"conflict_value")
            store.commit("Change key1 on conflict-branch")

            # Go back to main and change the same key
            store.checkout("main")
            store.update(b"key1", b"main_updated_value")
            store.commit("Change key1 on main")

            # Merge with TakeDestination strategy (keep main's value)
            from prollytree import ConflictResolution
            merge_commit = store.merge("conflict-branch", ConflictResolution.TakeDestination)
            assert store.get(b"key1") == b"main_updated_value", "Should keep main's value"
            print(f"   [OK] Merge with conflict resolution succeeded, commit: {merge_commit[:8]}")

            print("\n[OK] All versioning operations on File backend completed successfully!")
        finally:
            os.chdir(original_dir)


def test_versioning_operations_on_rocksdb_backend():
    """Test that versioning operations work on the RocksDB backend.

    Mirrors test_versioning_operations_on_file_backend so the RocksDB
    storage layer gets the same checkout / try_merge / diff / conflict-merge
    coverage. Fails by default if the wheel was built without
    `rocksdb_storage`; set PROLLYTREE_TEST_SKIP_MISSING_BACKENDS=1 to opt
    into a skip on local builds.
    """

    original_dir = os.getcwd()

    with tempfile.TemporaryDirectory() as tmpdir:
        try:
            print(f"\n[DIR] Testing versioning operations on RocksDB backend in: {tmpdir}")

            subprocess.run(["git", "init"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.name", "Test User"], cwd=tmpdir, check=True, capture_output=True)
            subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmpdir, check=True, capture_output=True)
            os.chdir(tmpdir)

            rocks_dir = os.path.join(tmpdir, "rocks_data")
            os.makedirs(rocks_dir)
            try:
                store = VersionedKvStore(rocks_dir, StorageBackend.RocksDB)
            except ValueError as exc:
                _handle_missing_rocksdb(exc)

            store.insert(b"key1", b"value1")
            store.commit("Initial commit on main")

            # Create a feature branch and add a key
            print("\n[TEST] Test: create_branch on RocksDB backend")
            store.create_branch("feature")
            print(f"   [OK] Created and switched to branch: {store.current_branch()}")
            store.insert(b"feature_key", b"feature_value")
            store.commit("Add feature key")

            # Checkout back to main
            print("\n[TEST] Test: checkout works on RocksDB backend")
            store.checkout("main")
            assert store.current_branch() == "main", "Should be on main branch"
            assert store.get(b"feature_key") is None, "Feature key should not exist on main"
            print(f"   [OK] Checkout to main succeeded, current branch: {store.current_branch()}")

            # Add a non-conflicting change on main
            store.insert(b"main_key", b"main_value")
            store.commit("Add main key")

            # try_merge should succeed without conflicts
            print("\n[TEST] Test: try_merge works on RocksDB backend")
            success, conflicts = store.try_merge("feature")
            assert success, "Merge should succeed with no conflicts"
            assert len(conflicts) == 0, "Should have no conflicts"
            assert store.get(b"feature_key") == b"feature_value", "Feature key should exist after merge"
            assert store.get(b"main_key") == b"main_value", "Main key should still exist"
            print("   [OK] try_merge succeeded and merge result verified")

            # diff between two commits
            print("\n[TEST] Test: diff works on RocksDB backend")
            history = store.log()
            if len(history) >= 2:
                diffs = store.diff(history[1]["id"], history[0]["id"])
                print(f"   [OK] diff returned {len(diffs)} differences")
            else:
                print("   [SKIP] Not enough commits to test diff")

            # Conflict resolution via TakeDestination
            print("\n[TEST] Test: merge with conflict resolution on RocksDB backend")
            store.create_branch("conflict-branch")
            store.update(b"key1", b"conflict_value")
            store.commit("Change key1 on conflict-branch")

            store.checkout("main")
            store.update(b"key1", b"main_updated_value")
            store.commit("Change key1 on main")

            from prollytree import ConflictResolution
            merge_commit = store.merge("conflict-branch", ConflictResolution.TakeDestination)
            assert store.get(b"key1") == b"main_updated_value", "Should keep main's value"
            print(f"   [OK] Merge with conflict resolution succeeded, commit: {merge_commit[:8]}")

            print("\n[OK] All versioning operations on RocksDB backend completed successfully!")
        finally:
            os.chdir(original_dir)


if __name__ == "__main__":
    test_versioned_kv_store()
    test_storage_backends()
    test_rocksdb_storage_backend()
    test_versioning_operations_on_file_backend()
    test_versioning_operations_on_rocksdb_backend()