# Valknut configuration example
# Copy to `valknut.yml` (or pass with `--config`) and adjust for your repository.
# All sections shown here map directly to `ValknutConfig` in the Rust CLI.
analysis:
modules:
complexity: true
dependencies: true
duplicates: true
refactoring: true
structure: true
coverage: true
languages:
enabled:
- python
- javascript
- typescript
- rust
- go
max_file_size_mb: 10.0
complexity_thresholds:
python: 10.0
javascript: 10.0
typescript: 10.0
rust: 15.0
go: 12.0
files:
include_patterns:
- "**/*"
exclude_patterns:
- "*/node_modules/*"
- "*/venv/*"
- "*/target/*"
- "*/__pycache__/*"
- "*.min.js"
max_files: null # null = no limit
follow_symlinks: false
quality:
confidence_threshold: 0.7
max_analysis_time_per_file: 30
strict_mode: false
coverage:
enabled: true
file_path: null
auto_discover: true
max_age_days: 7
search_paths:
- "./coverage/"
- "./target/coverage/"
- "./target/tarpaulin/"
- "./.coverage/"
- "./htmlcov/"
- "./build/coverage/"
- "./reports/"
- "./"
denoise:
enabled: true
auto: true
min_function_tokens: 40
min_match_tokens: 24
require_blocks: 2
similarity: 0.82
weights:
ast: 0.35
pdg: 0.45
emb: 0.20
io_mismatch_penalty: 0.25
threshold_s: 0.82
stop_motifs:
enabled: true
percentile: 0.5
refresh_days: 7
auto_calibration:
enabled: true
quality_target: 0.8
sample_size: 200
max_iterations: 50
ranking:
by: "saved_tokens"
min_saved_tokens: 100
min_rarity_gain: 1.2
dry_run: false
scoring:
normalization_scheme: "z_score"
use_bayesian_fallbacks: true
confidence_reporting: false
weights:
complexity: 1.0
graph: 0.8
structure: 0.9
style: 0.5
coverage: 0.7
statistical_params:
confidence_level: 0.95
min_sample_size: 10
outlier_threshold: 3.0
graph:
enable_betweenness: true
enable_closeness: false
enable_cycle_detection: true
max_exact_size: 10000
use_approximation: true
approximation_sample_rate: 0.1
lsh:
num_hashes: 128
num_bands: 16
shingle_size: 3
similarity_threshold: 0.7
max_candidates: 100
use_semantic_similarity: false
dedupe:
include:
- "src/**"
exclude:
- "benchmarks/**"
- "examples/**"
- "datasets/**"
- "**/generated/**"
- "**/*.pb.rs"
min_function_tokens: 40
min_ast_nodes: 35
min_match_tokens: 24
min_match_coverage: 0.40
shingle_k: 9
require_distinct_blocks: 2
weights:
ast: 0.35
pdg: 0.45
emb: 0.20
io_mismatch_penalty: 0.25
threshold_s: 0.82
stop_phrases:
- "^\\s*@staticmethod\\b"
- "group\\.bench_with_input\\s*\\("
- "\\bb\\.iter\\s*\\(\\|\\|"
- "\\bgroup\\.finish\\s*\\(\\)\\s*;?"
- "\\blet\\s+config\\s*=\\s*AnalysisConfig::(new|default)\\s*\\(\\)\\s*;?"
- "\\bchecks\\.push\\s*\\(\\s*HealthCheck\\s*\\{"
rank_by: "saved_tokens"
min_saved_tokens: 100
keep_top_per_file: 3
adaptive:
auto_denoise: true
adaptive_learning: true
rarity_weighting: true
structural_validation: true
stop_motif_percentile: 0.75
hub_suppression_threshold: 0.6
quality_gate_percentage: 0.8
tfidf_kgram_size: 8
wl_iterations: 3
min_rarity_gain: 1.2
external_call_jaccard_threshold: 0.2
cache_refresh_days: 7
auto_refresh_cache: true
languages:
python:
enabled: true
file_extensions: [".py", ".pyi"]
tree_sitter_language: "python"
max_file_size_mb: 10.0
complexity_threshold: 10.0
additional_settings: {}
javascript:
enabled: true
file_extensions: [".js", ".mjs", ".jsx"]
tree_sitter_language: "javascript"
max_file_size_mb: 5.0
complexity_threshold: 10.0
additional_settings: {}
typescript:
enabled: true
file_extensions: [".ts", ".tsx", ".d.ts"]
tree_sitter_language: "typescript"
max_file_size_mb: 5.0
complexity_threshold: 10.0
additional_settings: {}
rust:
enabled: true
file_extensions: [".rs"]
tree_sitter_language: "rust"
max_file_size_mb: 10.0
complexity_threshold: 15.0
additional_settings: {}
go:
enabled: true
file_extensions: [".go"]
tree_sitter_language: "go"
max_file_size_mb: 8.0
complexity_threshold: 12.0
additional_settings: {}
io:
cache_dir: ".valknut/cache"
enable_caching: true
cache_ttl_seconds: 3600
report_dir: "./reports"
report_format: "json"
performance:
max_threads: null # null = auto-detect
memory_limit_mb: null # null = no limit
file_timeout_seconds: 30
total_timeout_seconds: null
enable_simd: false
batch_size: 100
structure:
enable_branch_packs: true
enable_file_split_packs: true
top_packs: 20
fsdir:
max_files_per_dir: 25
max_subdirs_per_dir: 10
max_dir_loc: 2000
min_branch_recommendation_gain: 0.15
min_files_for_split: 5
target_loc_per_subdir: 1000
fsfile:
huge_loc: 800
huge_bytes: 128000
min_split_loc: 200
min_entities_per_split: 3
partitioning:
balance_tolerance: 0.25
max_clusters: 4
min_clusters: 2
naming_fallbacks: ["core", "io", "api", "util"]
coverage:
auto_discover: true
search_paths:
- "./coverage/"
- "./target/coverage/"
- "./target/tarpaulin/"
- "./.coverage/"
- "./htmlcov/"
- "./build/coverage/"
- "./reports/"
- "./"
file_patterns:
- "coverage.xml"
- "lcov.info"
- "coverage.json"
- "coverage.lcov"
- "cobertura.xml"
- "**/coverage.xml"
- "**/lcov.info"
- "**/coverage.json"
- "**/cobertura.xml"
max_age_days: 7
coverage_file: null # Set to override auto-discovery