1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
//! v031-cluster-M3b: cache schema_version + graceful-discard on mismatch
//!
//! ROOT CAUSE
//! ----------
//! v031-cluster-M3a extended `QueryKey` with a `language` field. Because
//! `CacheFileData` serializes `Vec<(QueryKey, CacheEntry)>` via serde_json,
//! every pre-v0.3.1 cache file on disk now fails to deserialize on first
//! daemon load after upgrade — the JSON is missing the `language` field
//! that `serde(deny_unknown_fields)` is not even needed for; the missing
//! field IS the failure. Without graceful-discard, the daemon panics or
//! returns `DaemonError::InvalidMessage` on every startup until the user
//! manually deletes the cache file.
//!
//! ASSERTION (post-fix)
//! --------------------
//! 1. `CacheFileData` carries a `schema_version: u32` header field.
//! 2. `QueryCache::load_from_file` on a mismatched/legacy cache:
//! - logs a warning,
//! - DELETES the offending cache file,
//! - returns a fresh empty `QueryCache` without erroring out.
//! 3. The current schema_version round-trips correctly through save/load.
//!
//! PRE-FIX BEHAVIOUR
//! -----------------
//! `load_from_file` calls `serde_json::from_slice::<CacheFileData>(...)` and
//! propagates any deserialize error up as `DaemonError`. A v0.3.0-shaped
//! cache (no `language` in QueryKey, no `schema_version` header) hits that
//! error path. This test refuses to compile pre-fix because it references
//! `CacheFileData::CACHE_SCHEMA_VERSION` and constructs `CacheFileData`
//! with a `schema_version` field — the compile error is the load-bearing
//! failure mode for the schema-header half of the assertion.
use std::fs;
use std::io::Write;
use tldr_cli::commands::daemon::salsa::{
CacheFileData, QueryCache, QueryKey, CACHE_SCHEMA_VERSION,
};
use tldr_core::Language;
/// Construct a raw on-disk cache file matching the v0.3.0 wire format:
/// 4-byte magic "TLDR", 1-byte version=1, 8-byte little-endian checksum,
/// then the JSON payload. The JSON payload here uses the v0.3.0 shape
/// (no `schema_version` header). Both the missing schema_version and the
/// missing `language` field on QueryKey will trigger the discard path.
fn write_v030_shaped_cache(path: &std::path::Path) {
// v0.3.0 JSON payload shape: no schema_version, QueryKey without language.
// Use a literal JSON so we faithfully simulate what's actually on disk
// for users upgrading from v0.3.0.
let legacy_json = br#"{"entries":[[{"query_name":"calls","args_hash":12345},{"value":[104,105],"revision":1,"input_hashes":[],"created_at":{"secs":0,"nanos":0},"last_accessed":{"secs":0,"nanos":0}}]],"dependents":[],"stats":{"hits":0,"misses":0,"invalidations":0,"evictions":0,"total_entries":1,"total_bytes":0,"hit_rate":0.0},"revision":1}"#;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
legacy_json.hash(&mut hasher);
let checksum = hasher.finish();
let mut file = fs::File::create(path).unwrap();
file.write_all(b"TLDR").unwrap();
file.write_all(&[1u8]).unwrap(); // CACHE_VERSION = 1 (header byte)
file.write_all(&checksum.to_le_bytes()).unwrap();
file.write_all(legacy_json).unwrap();
file.flush().unwrap();
}
/// RED test: load a v0.3.0-shaped cache. Pre-fix: returns Err. Post-fix:
/// returns Ok with an empty cache, the offending file is deleted, and a
/// warning was logged to stderr.
#[test]
fn test_v030_cache_discarded_gracefully_on_v031_load() {
let dir = tempfile::tempdir().unwrap();
let cache_path = dir.path().join("legacy_cache.bin");
write_v030_shaped_cache(&cache_path);
assert!(cache_path.exists(), "fixture must write a cache file");
// Post-fix: load returns Ok with a fresh empty cache.
let cache = QueryCache::load_from_file(&cache_path)
.expect("v0.3.0-shaped cache must be discarded gracefully, not error out");
// Post-fix invariant 1: cache is fresh/empty.
assert_eq!(
cache.len(),
0,
"discarded cache must yield a fresh empty QueryCache"
);
// Post-fix invariant 2: offending file is deleted on disk.
assert!(
!cache_path.exists(),
"legacy cache file must be removed by graceful-discard path"
);
}
/// Regression test: the current schema_version round-trips through
/// save_to_file → load_from_file. This guards against silent regressions
/// where a future change forgets to bump or thread schema_version.
#[test]
fn test_cache_header_schema_version_round_trip() {
let dir = tempfile::tempdir().unwrap();
let cache_path = dir.path().join("round_trip_cache.bin");
// Populate a cache with one entry so the file is non-trivial.
let cache = QueryCache::with_defaults();
cache.insert(
QueryKey::new("calls", 42, Language::Python),
&"hello".to_string(),
vec![],
);
cache.save_to_file(&cache_path).unwrap();
// Read the raw bytes and parse the JSON payload to confirm the
// schema_version field is present and equals the current constant.
let raw = fs::read(&cache_path).unwrap();
// Skip 4-byte magic + 1-byte version + 8-byte checksum = 13 bytes header.
let payload = &raw[13..];
let parsed: CacheFileData = serde_json::from_slice(payload)
.expect("post-fix payload must deserialize as current CacheFileData");
assert_eq!(
parsed.schema_version, CACHE_SCHEMA_VERSION,
"saved schema_version must match the current constant"
);
// And the high-level load API must accept it.
let loaded = QueryCache::load_from_file(&cache_path).unwrap();
assert_eq!(loaded.len(), 1, "round-tripped cache must preserve entries");
}
/// Boundary test: a payload whose JSON parses as CacheFileData but with a
/// stale schema_version (e.g., schema_version=0 or schema_version=
/// CACHE_SCHEMA_VERSION-1) must also be discarded, not silently used.
#[test]
fn test_stale_schema_version_discarded() {
let dir = tempfile::tempdir().unwrap();
let cache_path = dir.path().join("stale_cache.bin");
// Build a payload with explicit stale schema_version.
let stale = CacheFileData {
schema_version: CACHE_SCHEMA_VERSION.saturating_sub(1),
entries: vec![],
dependents: vec![],
stats: Default::default(),
revision: 0,
};
let json = serde_json::to_vec(&stale).unwrap();
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
json.hash(&mut hasher);
let checksum = hasher.finish();
let mut file = fs::File::create(&cache_path).unwrap();
file.write_all(b"TLDR").unwrap();
file.write_all(&[1u8]).unwrap();
file.write_all(&checksum.to_le_bytes()).unwrap();
file.write_all(&json).unwrap();
file.flush().unwrap();
drop(file);
// Post-fix: stale schema_version triggers graceful discard.
let cache = QueryCache::load_from_file(&cache_path)
.expect("stale-schema cache must be discarded, not error");
assert_eq!(cache.len(), 0);
assert!(
!cache_path.exists(),
"stale-schema cache file must be removed"
);
}