lethe-core-rust 0.1.1

High-performance hybrid retrieval engine combining BM25 lexical search with vector similarity using z-score fusion. Features hero configuration for optimal parity with splade baseline, gamma boosting for code/error contexts, and comprehensive chunking pipeline.
Documentation
#!/usr/bin/env python3
"""
Analyze complete system test coverage including database-dependent crates
"""
import json
import os
from pathlib import Path

def get_lines_of_code(file_path):
    """Count actual lines of code (excluding empty lines and comments)"""
    if not os.path.exists(file_path):
        return 0
    
    count = 0
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith('//') and not line.startswith('/*'):
                    count += 1
    except:
        return 0
    return count

def analyze_database_crates():
    """Analyze lines of code in database-dependent crates"""
    database_crates = [
        'crates/infrastructure/src',
        'crates/api/src', 
        'crates/cli/src'
    ]
    
    total_lines = 0
    files_analyzed = []
    
    for crate_dir in database_crates:
        if os.path.exists(crate_dir):
            for root, dirs, files in os.walk(crate_dir):
                for file in files:
                    if file.endswith('.rs'):
                        file_path = os.path.join(root, file)
                        lines = get_lines_of_code(file_path)
                        total_lines += lines
                        files_analyzed.append((file_path, lines))
    
    return total_lines, files_analyzed

def main():
    print("🔍 Complete System Test Coverage Analysis")
    print("=" * 50)
    
    # Read existing coverage data
    try:
        with open('tarpaulin-report.json', 'r') as f:
            coverage_data = json.load(f)
    except FileNotFoundError:
        print("❌ tarpaulin-report.json not found. Please run tarpaulin first.")
        return
    
    # Calculate covered/tested lines from existing data
    total_lines = 0
    covered_lines = 0
    
    print("\n📊 Tested Crates Coverage:")
    print("-" * 30)
    
    for file_data in coverage_data['files']:
        file_path = "/".join(file_data['path'])
        file_covered = file_data['covered']
        file_lines = file_data['coverable']
        
        if file_lines > 0:
            file_coverage = (file_covered / file_lines) * 100
            rel_path = file_path.replace('/home/nathan/Projects/lethe/lethe-core/', '')
            print(f"{rel_path}: {file_coverage:.1f}% ({file_covered}/{file_lines})")
            
        total_lines += file_lines
        covered_lines += file_covered
    
    tested_coverage = (covered_lines / total_lines * 100) if total_lines > 0 else 0
    
    print(f"\n✅ Tested Crates Total: {tested_coverage:.2f}% ({covered_lines}/{total_lines})")
    
    # Analyze database-dependent crates
    db_lines, db_files = analyze_database_crates()
    
    print(f"\n📋 Database-Dependent Crates (not tested):")
    print("-" * 40)
    for file_path, lines in db_files:
        rel_path = file_path.replace('crates/', '')
        print(f"{rel_path}: {lines} lines")
    
    print(f"\n📊 Database Crates Total: {db_lines} lines")
    
    # Calculate complete system metrics
    complete_total = total_lines + db_lines
    complete_coverage = (covered_lines / complete_total * 100) if complete_total > 0 else 0
    
    print(f"\n🎯 COMPLETE SYSTEM COVERAGE:")
    print("=" * 30)
    print(f"Tested lines: {covered_lines}")
    print(f"Untested database lines: {db_lines}")
    print(f"Total system lines: {complete_total}")
    print(f"Overall coverage: {complete_coverage:.2f}%")
    
    # Coverage breakdown
    print(f"\n📈 Coverage Breakdown:")
    print(f"  Core business logic (tested): {tested_coverage:.1f}%")
    print(f"  Database layer (untested): 0.0%")
    print(f"  System-wide effective: {complete_coverage:.1f}%")
    
    # Analysis
    print(f"\n💡 Analysis:")
    if complete_coverage >= 75:
        print(f"✅ Excellent coverage! The core business logic is well-tested.")
    elif complete_coverage >= 60:
        print(f"✅ Good coverage. Consider adding database integration tests.")
    else:
        print(f"⚠️ Coverage could be improved. Focus on core logic first.")
    
    print(f"\nThe {tested_coverage:.1f}% coverage of core business logic (domain + shared)")
    print(f"represents the most critical parts of the system being well-tested.")
    print(f"Database infrastructure represents {(db_lines/complete_total*100):.1f}% of total codebase.")

if __name__ == "__main__":
    main()