tokensave 6.3.3

Code intelligence tool that builds a semantic knowledge graph from Rust, Go, Java, Scala, TypeScript, Python, C, C++, Kotlin, C#, Swift, and many more codebases
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
use std::fs;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};

use glob::Pattern;
use serde::{Deserialize, Serialize};

use crate::errors::{Result, TokenSaveError};

/// Name of the configuration file stored inside the `.tokensave` directory.
pub const CONFIG_FILENAME: &str = "config.json";

/// Name of the hidden directory used to store `TokenSave` metadata.
pub const TOKENSAVE_DIR: &str = ".tokensave";

/// Name of the project-level query-ignore file stored inside the
/// `.tokensave` directory. See [`load_query_ignore`].
pub const QUERYIGNORE_FILENAME: &str = "queryignore";

/// Configuration for a `TokenSave` project.
///
/// Controls which files are indexed, size limits, and feature toggles.
/// Language inclusion is derived automatically from the installed
/// `LanguageExtractor` set — only exclude patterns live in the config.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TokenSaveConfig {
    /// Schema version of the configuration.
    pub version: u32,
    /// Root directory of the project being indexed.
    pub root_dir: String,
    /// Glob patterns for files to exclude during indexing.
    pub exclude: Vec<String>,
    /// Glob patterns for hidden (dot-prefixed) paths to include despite the
    /// default hidden-directory filter.  For example, `[".github/**"]` indexes
    /// files under `.github/` that would otherwise be skipped.
    #[serde(default)]
    pub include: Vec<String>,
    /// Maximum file size in bytes; files larger than this are skipped.
    pub max_file_size: u64,
    /// Whether to extract doc comments from source files.
    pub extract_docstrings: bool,
    /// Whether to track call-site locations for edges.
    pub track_call_sites: bool,
    /// Whether to respect `.gitignore` rules when scanning files.
    #[serde(default)]
    pub git_ignore: bool,
}

impl Default for TokenSaveConfig {
    fn default() -> Self {
        Self {
            version: 1,
            root_dir: String::new(),
            exclude: vec![
                "target/**".to_string(),
                ".git/**".to_string(),
                ".tokensave/**".to_string(),
                "**/node_modules/**".to_string(),
                "vendor/**".to_string(),
                "**/*.min.*".to_string(),
                "bin/**".to_string(),
                "build/**".to_string(),
                "out/**".to_string(),
                ".gradle/**".to_string(),
            ],
            include: Vec::new(),
            max_file_size: 1_048_576,
            extract_docstrings: true,
            track_call_sites: true,
            git_ignore: false,
        }
    }
}

/// Returns the path to the `.tokensave` directory within the given project root.
pub fn get_tokensave_dir(project_root: &Path) -> PathBuf {
    project_root.join(TOKENSAVE_DIR)
}

/// Returns the path to the configuration file (`config.json`) within the `.tokensave` directory.
pub fn get_config_path(project_root: &Path) -> PathBuf {
    get_tokensave_dir(project_root).join(CONFIG_FILENAME)
}

/// Loads the configuration from disk.
///
/// If the configuration file does not exist, returns a default configuration
/// with `root_dir` set to the given project root.
pub fn load_config(project_root: &Path) -> Result<TokenSaveConfig> {
    let config_path = get_config_path(project_root);

    if !config_path.exists() {
        return Ok(TokenSaveConfig {
            root_dir: project_root.to_string_lossy().to_string(),
            ..TokenSaveConfig::default()
        });
    }

    let contents = fs::read_to_string(&config_path).map_err(|e| TokenSaveError::Config {
        message: format!(
            "failed to read config file '{}': {}",
            config_path.display(),
            e
        ),
    })?;

    let config: TokenSaveConfig =
        serde_json::from_str(&contents).map_err(|e| TokenSaveError::Config {
            message: format!(
                "failed to parse config file '{}': {}",
                config_path.display(),
                e
            ),
        })?;

    Ok(config)
}

/// Saves the configuration to disk using an atomic write.
///
/// Writes to a temporary file first and then renames it to the final location,
/// ensuring that a partial write never corrupts the configuration.
pub fn save_config(project_root: &Path, config: &TokenSaveConfig) -> Result<()> {
    let tokensave_dir = get_tokensave_dir(project_root);
    fs::create_dir_all(&tokensave_dir).map_err(|e| TokenSaveError::Config {
        message: format!(
            "failed to create tokensave directory '{}': {}",
            tokensave_dir.display(),
            e
        ),
    })?;

    let config_path = get_config_path(project_root);
    let tmp_path = config_path.with_extension("tmp");

    let json = serde_json::to_string_pretty(config).map_err(|e| TokenSaveError::Config {
        message: format!("failed to serialize config: {e}"),
    })?;

    fs::write(&tmp_path, &json).map_err(|e| TokenSaveError::Config {
        message: format!(
            "failed to write temporary config file '{}': {}",
            tmp_path.display(),
            e
        ),
    })?;

    fs::rename(&tmp_path, &config_path).map_err(|e| TokenSaveError::Config {
        message: format!(
            "failed to rename temporary config file '{}' to '{}': {}",
            tmp_path.display(),
            config_path.display(),
            e
        ),
    })?;

    Ok(())
}

/// Returns `true` if `.tokensave` is ignored by Git for this project.
///
/// This respects the repository `.gitignore`, `.git/info/exclude`, and the
/// user's global excludes file via `git check-ignore`. If Git cannot answer
/// (for example outside a Git repository), falls back to checking the local
/// `.gitignore` file only.
pub fn is_in_gitignore(project_path: &Path) -> bool {
    if let Some(is_ignored) = is_ignored_by_git(project_path, None) {
        return is_ignored;
    }

    is_in_local_gitignore(project_path)
}

fn is_ignored_by_git(project_path: &Path, git_config_global: Option<&Path>) -> Option<bool> {
    let mut command = Command::new("git");
    command
        .arg("-C")
        .arg(project_path)
        .arg("check-ignore")
        .arg("-q")
        .arg(".tokensave/")
        .stdout(Stdio::null())
        .stderr(Stdio::null());

    if let Some(path) = git_config_global {
        command.env("GIT_CONFIG_GLOBAL", path);
    }

    let status = command.status().ok()?;

    match status.code() {
        Some(0) => Some(true),
        Some(1) => Some(false),
        _ => None,
    }
}

fn is_in_local_gitignore(project_path: &Path) -> bool {
    let gitignore = project_path.join(".gitignore");
    match fs::read_to_string(&gitignore) {
        Ok(content) => content.lines().any(|line| {
            let trimmed = line.trim();
            trimmed == ".tokensave" || trimmed == ".tokensave/" || trimmed == "/.tokensave"
        }),
        Err(_) => false,
    }
}

/// Appends `.tokensave` to the project's `.gitignore`, creating the file if
/// needed. Ensures the entry starts on its own line (adds a trailing newline
/// to existing content if missing).
pub fn add_to_gitignore(project_path: &Path) {
    let gitignore = project_path.join(".gitignore");
    let mut content = fs::read_to_string(&gitignore).unwrap_or_default();
    if !content.is_empty() && !content.ends_with('\n') {
        content.push('\n');
    }
    content.push_str(".tokensave\n");
    if let Err(e) = fs::write(&gitignore, content) {
        eprintln!("warning: failed to update .gitignore: {e}");
    }
}

/// Resolves a CLI path argument to an absolute `PathBuf`.
///
/// If `path` is `Some`, uses that value; otherwise falls back to the current
/// working directory.
pub fn resolve_path(path: Option<String>) -> PathBuf {
    match path {
        Some(p) => PathBuf::from(p),
        None => std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
    }
}

/// Walks from `start` upward looking for a `.tokensave/tokensave.db`.
///
/// Returns the first ancestor directory (inclusive) that contains an
/// initialised `TokenSave` project, or `None` if the filesystem root is
/// reached without finding one.
pub fn discover_project_root(start: &Path) -> Option<PathBuf> {
    let mut dir = start.to_path_buf();
    loop {
        if dir.join(".tokensave/tokensave.db").exists() {
            return Some(dir);
        }
        if !dir.pop() {
            return None;
        }
    }
}

/// Like [`resolve_path`], but when `path` is `None` it walks up from `cwd`
/// to find the nearest initialised `TokenSave` project before falling back to
/// `cwd` itself.
///
/// Used by `serve`, `sync`, and `status`. NOT used by `init` (which must
/// create a fresh project at the target directory).
pub fn resolve_path_with_discovery(path: Option<String>) -> PathBuf {
    if let Some(p) = path {
        PathBuf::from(p)
    } else {
        let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
        discover_project_root(&cwd).unwrap_or(cwd)
    }
}

/// Returns `true` if the path matches any of the configured `include` patterns.
///
/// This is used to allow hidden (dot-prefixed) directories that would
/// otherwise be skipped by the file walker.
pub fn is_included(path: &str, config: &TokenSaveConfig) -> bool {
    let match_opts = glob::MatchOptions {
        case_sensitive: true,
        require_literal_separator: false,
        require_literal_leading_dot: false,
    };

    for pattern_str in &config.include {
        if let Ok(pattern) = Pattern::new(pattern_str) {
            if pattern.matches_with(path, match_opts) {
                return true;
            }
        }
    }

    false
}

/// Returns `true` if a directory should be pruned during scanning.
///
/// Matches `dir/_` against exclude patterns (for `dir/**`-style globs) and
/// also matches `dir` itself (for bare `**/dirname`-style globs).  This
/// ensures that patterns like `**/node_modules` and `**/node_modules/**`
/// both trigger directory pruning in `scan_files_walkdir`.
pub fn is_excluded_dir(dir_path: &str, config: &TokenSaveConfig) -> bool {
    let match_opts = glob::MatchOptions {
        case_sensitive: true,
        require_literal_separator: false,
        require_literal_leading_dot: false,
    };

    for pattern_str in &config.exclude {
        if let Ok(pattern) = Pattern::new(pattern_str) {
            // Try both the dummy-file probe (catches dir/**) and the bare
            // directory path (catches **/dirname).
            if pattern.matches_with(&format!("{dir_path}/_"), match_opts)
                || pattern.matches_with(dir_path, match_opts)
            {
                return true;
            }
        }
    }

    false
}

/// Returns `true` if the file matches any of the configured exclude patterns.
pub fn is_excluded(file_path: &str, config: &TokenSaveConfig) -> bool {
    let match_opts = glob::MatchOptions {
        case_sensitive: true,
        require_literal_separator: false,
        require_literal_leading_dot: false,
    };

    for pattern_str in &config.exclude {
        if let Ok(pattern) = Pattern::new(pattern_str) {
            if pattern.matches_with(file_path, match_opts) {
                return true;
            }
        }
    }

    false
}

/// A single project-level query-ignore pattern.
///
/// Two flavours are supported:
/// - **Glob** — when the raw pattern contains a `*`, it is compiled with the
///   `glob` crate (the same engine used by the indexing `exclude`/`include`
///   patterns) so segments like `tests/*` or `**/generated/**` work.
/// - **Substring** — any other pattern matches when it appears anywhere in the
///   normalized `file_path` (gitignore-like "name fragment" matching).
///
/// Patterns are matched against node `file_path` values, which are stored
/// relative to the project root and normalized to use `/` separators.
#[derive(Debug, Clone)]
enum IgnoreRule {
    Glob(Pattern),
    Substring(String),
}

impl IgnoreRule {
    fn matches(&self, path: &str) -> bool {
        match self {
            IgnoreRule::Glob(pattern) => pattern.matches_with(
                path,
                glob::MatchOptions {
                    case_sensitive: true,
                    require_literal_separator: false,
                    require_literal_leading_dot: false,
                },
            ),
            IgnoreRule::Substring(needle) => path.contains(needle),
        }
    }
}

/// Project-level set of query-time ignore patterns.
///
/// This is the persistent, implicit counterpart to a per-call path exclusion:
/// once configured in `.tokensave/queryignore`, matching results are dropped
/// from `tokensave_search` and `tokensave_context` without the caller having
/// to pass a filter on every request.
#[derive(Debug, Clone, Default)]
pub struct QueryIgnore {
    rules: Vec<IgnoreRule>,
}

impl QueryIgnore {
    /// Parses query-ignore patterns from raw file contents.
    ///
    /// One pattern per line. Blank lines and lines whose first non-whitespace
    /// character is `#` are ignored. Surrounding whitespace is trimmed. A
    /// pattern containing `*` is treated as a glob; everything else is a
    /// substring match. Invalid globs are silently skipped.
    pub fn parse(contents: &str) -> Self {
        let mut rules = Vec::new();
        for line in contents.lines() {
            let trimmed = line.trim();
            if trimmed.is_empty() || trimmed.starts_with('#') {
                continue;
            }
            let normalized = trimmed.replace('\\', "/");
            if normalized.contains('*') {
                if let Ok(pattern) = Pattern::new(&normalized) {
                    rules.push(IgnoreRule::Glob(pattern));
                }
            } else {
                rules.push(IgnoreRule::Substring(normalized));
            }
        }
        QueryIgnore { rules }
    }

    /// Returns `true` when no patterns are configured (the common case).
    pub fn is_empty(&self) -> bool {
        self.rules.is_empty()
    }

    /// Returns `true` if `file_path` matches any configured ignore pattern.
    /// `file_path` is normalized to `/` separators before matching.
    pub fn is_ignored(&self, file_path: &str) -> bool {
        if self.rules.is_empty() {
            return false;
        }
        let normalized = file_path.replace('\\', "/");
        self.rules.iter().any(|rule| rule.matches(&normalized))
    }
}

/// Loads the project-level query-ignore patterns from
/// `<project_root>/.tokensave/queryignore`.
///
/// Returns an empty [`QueryIgnore`] (matching nothing) when the file is absent
/// or unreadable, so callers can apply it unconditionally with zero behavior
/// change for projects that have not opted in.
///
/// Unlike `config.exclude`, these patterns are applied at QUERY time only and
/// do not affect indexing — a path excluded here is still in the graph, it is
/// merely hidden from `tokensave_search` / `tokensave_context` results. This
/// complements `.gitignore` handling (`config.git_ignore`), which controls
/// what gets indexed in the first place.
pub fn load_query_ignore(project_root: &Path) -> QueryIgnore {
    let path = get_tokensave_dir(project_root).join(QUERYIGNORE_FILENAME);
    match fs::read_to_string(&path) {
        Ok(contents) => QueryIgnore::parse(&contents),
        Err(_) => QueryIgnore::default(),
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::{
        is_excluded, is_excluded_dir, is_ignored_by_git, is_included, load_query_ignore,
        QueryIgnore, TokenSaveConfig,
    };
    use std::fs;
    use std::process::Command;
    use tempfile::TempDir;

    #[test]
    fn test_is_included_matches_glob() {
        let config = TokenSaveConfig {
            include: vec![".github/**".to_string()],
            ..TokenSaveConfig::default()
        };
        assert!(is_included(".github/workflows/ci.yml", &config));
        assert!(is_included(".github/scripts/build.sh", &config));
        assert!(!is_included(".vscode/settings.json", &config));
        assert!(!is_included("src/main.rs", &config));
    }

    #[test]
    fn test_is_included_empty_matches_nothing() {
        let config = TokenSaveConfig::default();
        assert!(!is_included(".github/workflows/ci.yml", &config));
    }

    #[test]
    fn test_include_does_not_override_exclude() {
        let config = TokenSaveConfig {
            include: vec![".config/**".to_string()],
            exclude: vec![".config/secret/**".to_string()],
            ..TokenSaveConfig::default()
        };
        // Included by include glob
        assert!(is_included(".config/secret/key.rs", &config));
        // But also matched by exclude glob
        assert!(is_excluded(".config/secret/key.rs", &config));
    }

    #[test]
    fn test_default_excludes_nested_node_modules() {
        let config = TokenSaveConfig::default();
        // Top-level node_modules — should be excluded
        assert!(is_excluded("node_modules/express/index.js", &config));
        // Nested node_modules inside a sub-project — must also be excluded
        assert!(is_excluded(
            "projectA/node_modules/express/index.js",
            &config
        ));
        assert!(is_excluded(
            "packages/web/node_modules/react/index.js",
            &config
        ));
    }

    #[test]
    fn test_dir_pruning_pattern_matches_nested_dirs() {
        // scan_files_walkdir checks is_excluded("{dir}/_") for directory pruning.
        // Patterns like **/node_modules/** must match the dummy-file probe.
        let config = TokenSaveConfig::default();
        assert!(is_excluded("node_modules/_", &config));
        assert!(is_excluded("projectA/node_modules/_", &config));
    }

    #[test]
    fn test_is_excluded_dir_bare_pattern() {
        // Users may write "**/node_modules" (no trailing /**).
        // is_excluded_dir should match both bare and /**-suffixed patterns.
        let config = TokenSaveConfig {
            exclude: vec!["**/dist".to_string()],
            ..TokenSaveConfig::default()
        };
        assert!(is_excluded_dir("dist", &config));
        assert!(is_excluded_dir("packages/web/dist", &config));
        // Files inside dist should still be caught by accept_file's is_excluded
        // but dir pruning prevents even walking into the directory.
    }

    #[test]
    fn test_is_in_gitignore_respects_global_excludes_file() {
        let sandbox = TempDir::new().unwrap();
        let repo = sandbox.path().join("repo");
        fs::create_dir(&repo).unwrap();

        Command::new("git")
            .arg("-C")
            .arg(&repo)
            .arg("init")
            .arg("-q")
            .status()
            .unwrap();

        let excludes = sandbox.path().join("global_ignore");
        fs::write(&excludes, ".tokensave\n").unwrap();

        let git_config = sandbox.path().join("gitconfig");
        let status = Command::new("git")
            .env("GIT_CONFIG_GLOBAL", &git_config)
            .arg("config")
            .arg("--global")
            .arg("core.excludesFile")
            .arg(&excludes)
            .status()
            .unwrap();
        assert!(status.success());

        let ignored = is_ignored_by_git(&repo, Some(&git_config));

        assert_eq!(ignored, Some(true));
    }

    #[test]
    fn test_query_ignore_substring_match() {
        let qi = QueryIgnore::parse("generated\n");
        assert!(qi.is_ignored("src/generated/api.rs"));
        assert!(qi.is_ignored("generated.rs"));
        assert!(!qi.is_ignored("src/main.rs"));
    }

    #[test]
    fn test_query_ignore_glob_match() {
        let qi = QueryIgnore::parse("tests/*\n**/proto/**\n");
        assert!(qi.is_ignored("tests/foo.rs"));
        // `*` does not require a literal separator, so a nested path matches too.
        assert!(qi.is_ignored("tests/sub/bar.rs"));
        assert!(qi.is_ignored("crate/proto/messages.rs"));
        assert!(!qi.is_ignored("src/lib.rs"));
    }

    #[test]
    fn test_query_ignore_skips_comments_and_blanks() {
        let qi = QueryIgnore::parse("# a comment\n\n   \n  vendor  \n");
        assert!(qi.is_ignored("third_party/vendor/lib.rs"));
        assert!(!qi.is_ignored("src/main.rs"));
    }

    #[test]
    fn test_query_ignore_empty_matches_nothing() {
        let qi = QueryIgnore::default();
        assert!(qi.is_empty());
        assert!(!qi.is_ignored("anything/at/all.rs"));
        let parsed = QueryIgnore::parse("# only comments\n\n");
        assert!(parsed.is_empty());
    }

    #[test]
    fn test_query_ignore_normalizes_separators() {
        let qi = QueryIgnore::parse("src/gen\n");
        // file_path with backslashes (Windows-style) should still match.
        assert!(qi.is_ignored("src\\gen\\out.rs"));
    }

    #[test]
    fn test_load_query_ignore_absent_is_empty() {
        let dir = TempDir::new().unwrap();
        let qi = load_query_ignore(dir.path());
        assert!(qi.is_empty());
    }

    #[test]
    fn test_load_query_ignore_reads_file() {
        let dir = TempDir::new().unwrap();
        let ts_dir = dir.path().join(".tokensave");
        fs::create_dir_all(&ts_dir).unwrap();
        fs::write(ts_dir.join("queryignore"), "generated\ntests/*\n").unwrap();

        let qi = load_query_ignore(dir.path());
        assert!(!qi.is_empty());
        assert!(qi.is_ignored("src/generated/x.rs"));
        assert!(qi.is_ignored("tests/foo.rs"));
        assert!(!qi.is_ignored("src/main.rs"));
    }
}