rqmd 0.1.1

rqmd: command-line interface (binary `rqmd`)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
//! Shared harness for the E2E CLI tests (`tests/cli.rs`).
//!
//! Rust port of the `runQmd()` helper + `beforeAll` fixtures in qmd's
//! `test/cli.test.ts`. Each test gets its own [`Env`] (a fresh `TempDir` with
//! the same markdown fixtures qmd creates) and spawns the built `rqmd` binary
//! with fully isolated state:
//!
//! * `RQMD_INDEX_PATH`  — points the SQLite index at the temp dir
//!   (honoured before the production-mode gate, `store/path.rs`).
//! * `RQMD_CONFIG_DIR`  — points the YAML config (`index.yml`) at the temp dir.
//! * `PWD`              — rqmd's `pwd()` prefers this over `current_dir()`.
//! * `--index index`    — prepended so `IndexState::new` skips the `.rqmd/`
//!   local-config walk up the ancestors of the cwd (which, on Windows, live
//!   under the user profile alongside the OS temp dir). This keeps every run
//!   hermetic regardless of the host's `~/.rqmd` / `~/.config/rqmd`.
//! * `NO_COLOR=1`       — strip ANSI so substring assertions are clean.
//! * `CI=1`             — make any accidental LLM/model path fail fast instead
//!   of hitting the network (none of the ported commands need a model).

#![allow(dead_code)]

use std::net::TcpListener;
use std::path::{Path, PathBuf};
use std::process::{Child, Command, Stdio};
use std::time::{Duration, Instant};

use assert_cmd::cargo::CommandCargoExt;

/// Result of one `rqmd` invocation.
pub struct Out {
    pub stdout: String,
    pub stderr: String,
    pub code: i32,
}

impl Out {
    /// Assert exit code 0, surfacing stderr on failure.
    pub fn assert_ok(&self) -> &Self {
        assert_eq!(
            self.code, 0,
            "expected exit 0\n--- stdout ---\n{}\n--- stderr ---\n{}",
            self.stdout, self.stderr
        );
        self
    }

    /// Assert a non-zero exit code (clap parse errors use 2; app errors use 1).
    pub fn assert_err(&self) -> &Self {
        assert_ne!(
            self.code, 0,
            "expected non-zero exit\n--- stdout ---\n{}\n--- stderr ---\n{}",
            self.stdout, self.stderr
        );
        self
    }

    /// Assert exit code 1 (an application error routed through `main`'s
    /// `eprintln!("error: …"); exit(1)`).
    pub fn assert_code(&self, code: i32) -> &Self {
        assert_eq!(
            self.code, code,
            "expected exit {code}\n--- stdout ---\n{}\n--- stderr ---\n{}",
            self.stdout, self.stderr
        );
        self
    }
}

/// One isolated test environment: a temp dir holding the markdown fixtures, a
/// `config/` dir for `index.yml`, and a default index db path.
pub struct Env {
    /// Kept alive for the duration of the test; dropping it removes the dir.
    _root: tempfile::TempDir,
    pub root: PathBuf,
    pub fixtures: PathBuf,
    pub config_dir: PathBuf,
    pub db: PathBuf,
}

impl Env {
    /// Spawn `rqmd <args>` from the fixtures dir with the default isolated db
    /// + config. `--index index` is prepended (see module docs).
    pub fn run(&self, args: &[&str]) -> Out {
        self.run_in(&self.fixtures, args)
    }

    /// Like [`run`](Self::run) but from an explicit working directory.
    pub fn run_in(&self, cwd: &Path, args: &[&str]) -> Out {
        let mut full: Vec<&str> = vec!["--index", "index"];
        full.extend_from_slice(args);
        spawn(cwd, &self.db, &self.config_dir, &full, &[])
    }

    /// Run from an explicit cwd with extra env overrides and *no* `--index`
    /// prepend. Used by `rqmd init` tests, which are index-agnostic and need to
    /// control `HOME`/`USERPROFILE` to exercise the `$HOME` guard.
    pub fn run_in_env(&self, cwd: &Path, args: &[&str], extra: &[(&str, &str)]) -> Out {
        spawn(cwd, &self.db, &self.config_dir, args, extra)
    }

    /// Like [`run`](Self::run) but with extra environment overrides applied
    /// last (so they win over the defaults — used to point at a custom
    /// `RQMD_INDEX_PATH`).
    pub fn run_env(&self, args: &[&str], extra: &[(&str, &str)]) -> Out {
        let mut full: Vec<&str> = vec!["--index", "index"];
        full.extend_from_slice(args);
        spawn(&self.fixtures, &self.db, &self.config_dir, &full, extra)
    }

    /// Spawn `rqmd <args>` verbatim (no `--index` prepend). Used for the
    /// help / no-args parser tests that don't touch the index.
    pub fn run_bare(&self, args: &[&str]) -> Out {
        spawn(&self.fixtures, &self.db, &self.config_dir, args, &[])
    }

    /// Overwrite `<config_dir>/index.yml` (the global `beforeEach` reset, and
    /// the ignore-pattern tests that hand-write a collections config).
    pub fn write_config(&self, yaml: &str) {
        std::fs::write(self.config_dir.join("index.yml"), yaml).expect("write index.yml");
    }

    /// Forward-slash form of an absolute path under `root`, safe to embed in a
    /// double-quoted YAML scalar on Windows (`C:/Users/...`).
    pub fn yaml_path(&self, p: &Path) -> String {
        p.to_string_lossy().replace('\\', "/")
    }
}

/// Create a fresh isolated [`Env`], mirroring qmd's `beforeAll` fixtures.
/// The fixtures subdir is literally named `fixtures` so `collection add .`
/// derives the collection name `fixtures` (qmd relies on the same basename).
pub fn env() -> Env {
    let root = tempfile::tempdir().expect("mkdtemp");
    let root_path = root.path().to_path_buf();
    let fixtures = root_path.join("fixtures");
    let config_dir = root_path.join("config");
    let db = root_path.join("index.sqlite");

    std::fs::create_dir_all(fixtures.join("notes")).unwrap();
    std::fs::create_dir_all(fixtures.join("docs")).unwrap();
    std::fs::create_dir_all(&config_dir).unwrap();
    std::fs::write(config_dir.join("index.yml"), "collections: {}\n").unwrap();

    write_fixtures(&fixtures);

    Env {
        _root: root,
        root: root_path,
        fixtures,
        config_dir,
        db,
    }
}

fn write_fixtures(fixtures: &Path) {
    std::fs::write(
        fixtures.join("README.md"),
        "# Test Project\n\n\
         This is a test project for QMD CLI testing.\n\n\
         ## Features\n\n\
         - Full-text search with BM25\n\
         - Vector similarity search\n\
         - Hybrid search with reranking\n",
    )
    .unwrap();

    std::fs::write(
        fixtures.join("notes").join("meeting.md"),
        "# Team Meeting Notes\n\n\
         Date: 2024-01-15\n\n\
         ## Attendees\n\
         - Alice\n\
         - Bob\n\
         - Charlie\n\n\
         ## Discussion Topics\n\
         - Project timeline review\n\
         - Resource allocation\n\
         - Technical debt prioritization\n\n\
         ## Action Items\n\
         1. Alice to update documentation\n\
         2. Bob to fix authentication bug\n\
         3. Charlie to review pull requests\n",
    )
    .unwrap();

    std::fs::write(
        fixtures.join("notes").join("ideas.md"),
        "# Product Ideas\n\n\
         ## Feature Requests\n\
         - Dark mode support\n\
         - Keyboard shortcuts\n\
         - Export to PDF\n\n\
         ## Technical Improvements\n\
         - Improve search performance\n\
         - Add caching layer\n\
         - Optimize database queries\n",
    )
    .unwrap();

    std::fs::write(
        fixtures.join("docs").join("api.md"),
        "# API Documentation\n\n\
         ## Endpoints\n\n\
         ### GET /search\n\
         Search for documents.\n\n\
         Parameters:\n\
         - q: Search query (required)\n\
         - limit: Max results (default: 10)\n\n\
         ### GET /document/:id\n\
         Retrieve a specific document.\n\n\
         ### POST /index\n\
         Index new documents.\n",
    )
    .unwrap();

    std::fs::write(
        fixtures.join("test1.md"),
        "# Test Document 1\n\n\
         This is the first test document.\n\n\
         It has multiple lines for testing line numbers.\n\
         Line 6 is here.\n\
         Line 7 is here.\n",
    )
    .unwrap();

    std::fs::write(
        fixtures.join("test2.md"),
        "# Test Document 2\n\n\
         This is the second test document.\n",
    )
    .unwrap();
}

fn spawn(cwd: &Path, db: &Path, cfg: &Path, args: &[&str], extra: &[(&str, &str)]) -> Out {
    let mut cmd = Command::cargo_bin("rqmd").expect("rqmd binary is built by cargo test");
    cmd.current_dir(cwd)
        .env_remove("XDG_CACHE_HOME")
        .env_remove("XDG_CONFIG_HOME")
        .env_remove("RQMD_CACHE_DIR")
        .env("NO_COLOR", "1")
        .env("CI", "1")
        .env("PWD", cwd)
        .env("RQMD_INDEX_PATH", db)
        .env("RQMD_CONFIG_DIR", cfg)
        .args(args);
    for (k, v) in extra {
        cmd.env(k, v);
    }
    let out = cmd.output().expect("spawn rqmd");
    Out {
        stdout: String::from_utf8_lossy(&out.stdout).into_owned(),
        stderr: String::from_utf8_lossy(&out.stderr).into_owned(),
        code: out.status.code().unwrap_or(-1),
    }
}

/// Spawn `rqmd` against a named-index *cache directory* instead of a single
/// pinned `RQMD_INDEX_PATH`, so `--index <name>` resolves to
/// `<cache>/<name>.sqlite` (and a link's `?index=<name>` does the same). This
/// mirrors qmd's custom-index test env (`XDG_CACHE_HOME` set, `INDEX_PATH`
/// empty). `--index` is NOT prepended — pass it explicitly, or omit it (e.g.
/// for the `get` round-trip that relies on the link's `?index=`). The same
/// `cache` dir must be reused across calls so each index DB persists.
pub fn spawn_cache(cwd: &Path, cache: &Path, cfg: &Path, args: &[&str]) -> Out {
    let mut cmd = Command::cargo_bin("rqmd").expect("rqmd binary is built by cargo test");
    cmd.current_dir(cwd)
        .env_remove("XDG_CACHE_HOME")
        .env_remove("XDG_CONFIG_HOME")
        .env_remove("RQMD_INDEX_PATH")
        .env("NO_COLOR", "1")
        .env("CI", "1")
        .env("PWD", cwd)
        .env("RQMD_CACHE_DIR", cache)
        .env("RQMD_CONFIG_DIR", cfg)
        .args(args);
    let out = cmd.output().expect("spawn rqmd");
    Out {
        stdout: String::from_utf8_lossy(&out.stdout).into_owned(),
        stderr: String::from_utf8_lossy(&out.stderr).into_owned(),
        code: out.status.code().unwrap_or(-1),
    }
}

// ---------------------------------------------------------------------------
// Assertion helpers
// ---------------------------------------------------------------------------

/// True if `s` is exactly 6 lowercase-hex characters (rqmd's docid shape).
pub fn is_hex6(s: &str) -> bool {
    s.len() == 6
        && s.bytes()
            .all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase())
}

/// First non-empty line of `s` (the first data row for csv/files output).
pub fn first_line(s: &str) -> &str {
    s.lines().find(|l| !l.trim().is_empty()).unwrap_or("")
}

// ---------------------------------------------------------------------------
// MCP HTTP server harness (`mcp --http` / `--daemon`)
// ---------------------------------------------------------------------------
//
// These helpers spawn the `rqmd` binary as a *long-running* child (the existing
// `spawn`/`spawn_cache` use `.output()`, which blocks until exit and is useless
// for a server). They use the `spawn_cache` env contract — `RQMD_CACHE_DIR` set,
// `RQMD_INDEX_PATH` + `XDG_CACHE_HOME` removed — because `default_db_path`
// short-circuits on `RQMD_INDEX_PATH` *before* the `--index <name>` path is
// computed, so a pinned index path would make `--index` a no-op. The same
// `cache` dir must back the seeding (`collection add`) and the server so they
// open the same `<cache>/<name>.sqlite`.

/// A spawned long-running `rqmd` server child plus the port it was told to bind.
/// Killed (and reaped) on drop so a failing test never leaks the process.
pub struct ServerChild {
    pub child: Child,
    pub port: u16,
}

impl Drop for ServerChild {
    fn drop(&mut self) {
        let _ = self.child.kill();
        let _ = self.child.wait();
    }
}

/// Best-effort teardown for `mcp --daemon` tests: on drop, runs `rqmd mcp stop`
/// (which kills the detached daemon child and removes its PID file). The daemon
/// is a *detached* process not tracked by any `Child` handle, so a child-kill
/// guard cannot reach it — `mcp stop` (reading the PID file) is the only handle.
/// Mirrors qmd's `afterAll` PID-file cleanup (cli.test.ts:1630-1646).
pub struct DaemonGuard {
    pub cwd: PathBuf,
    pub cache: PathBuf,
    pub cfg: PathBuf,
}

impl Drop for DaemonGuard {
    fn drop(&mut self) {
        let _ = spawn_cache(&self.cwd, &self.cache, &self.cfg, &["mcp", "stop"]);
    }
}

/// Pick an ephemeral free TCP port by binding `:0` and immediately releasing it.
/// (Small TOCTOU window before the server rebinds, acceptable for tests; mirrors
/// qmd's random-port approach.)
pub fn free_port() -> u16 {
    TcpListener::bind("127.0.0.1:0")
        .expect("bind ephemeral port")
        .local_addr()
        .expect("local_addr")
        .port()
}

/// Spawn `rqmd <args>` as a detached long-running child under the `spawn_cache`
/// env (see module note). stdio is nulled. The caller polls for readiness.
pub fn spawn_cache_child(cwd: &Path, cache: &Path, cfg: &Path, args: &[&str]) -> Child {
    let mut cmd = Command::cargo_bin("rqmd").expect("rqmd binary is built by cargo test");
    cmd.current_dir(cwd)
        .env_remove("XDG_CACHE_HOME")
        .env_remove("XDG_CONFIG_HOME")
        .env_remove("RQMD_INDEX_PATH")
        .env("NO_COLOR", "1")
        .env("CI", "1")
        .env("PWD", cwd)
        .env("RQMD_CACHE_DIR", cache)
        .env("RQMD_CONFIG_DIR", cfg)
        .stdin(Stdio::null())
        .stdout(Stdio::null())
        .stderr(Stdio::null())
        .args(args);
    cmd.spawn().expect("spawn rqmd server")
}

/// Poll `http://127.0.0.1:<port>/health` until it returns 200 (parsed JSON
/// body) or the deadline elapses. `None` on timeout.
pub fn wait_for_health(port: u16, timeout: Duration) -> Option<serde_json::Value> {
    let url = format!("http://127.0.0.1:{port}/health");
    let deadline = Instant::now() + timeout;
    while Instant::now() < deadline {
        match ureq::get(&url).call() {
            Ok(resp) if resp.status() == 200 => {
                if let Ok(v) = resp.into_json::<serde_json::Value>() {
                    return Some(v);
                }
            }
            _ => {}
        }
        std::thread::sleep(Duration::from_millis(150));
    }
    None
}

/// POST a JSON body to `http://127.0.0.1:<port>/query` and return the parsed
/// response. Panics on transport error so tests fail loudly.
pub fn post_query(port: u16, body: serde_json::Value) -> serde_json::Value {
    let url = format!("http://127.0.0.1:{port}/query");
    ureq::post(&url)
        .send_json(body)
        .expect("POST /query")
        .into_json::<serde_json::Value>()
        .expect("query response json")
}