vyctor 0.1.0

A fast CLI tool for semantic file search using vector embeddings
Documentation
//! Integration tests for the semantic chunking system

use std::path::Path;
use vyctor::{detect_language, Chunk, ChunkType, Chunker, Language};

#[test]
fn test_detect_language_from_path() {
    assert_eq!(detect_language(Path::new("main.rs")), Language::Rust);
    assert_eq!(detect_language(Path::new("app.py")), Language::Python);
    assert_eq!(detect_language(Path::new("index.ts")), Language::TypeScript);
    assert_eq!(detect_language(Path::new("component.tsx")), Language::Tsx);
    assert_eq!(
        detect_language(Path::new("script.js")),
        Language::JavaScript
    );
    assert_eq!(detect_language(Path::new("Main.java")), Language::Java);
    assert_eq!(detect_language(Path::new("main.go")), Language::Go);
    assert_eq!(detect_language(Path::new("main.c")), Language::C);
    assert_eq!(detect_language(Path::new("main.cpp")), Language::Cpp);
}

#[test]
fn test_chunker_basic() {
    let chunker = Chunker::new(100, 20);
    let content = "line1\nline2\nline3\nline4\nline5";
    let chunks = chunker.chunk(content);

    assert!(!chunks.is_empty());
    assert_eq!(chunks[0].start_line, 1);
}

#[test]
fn test_chunker_with_rust_file() {
    let chunker = Chunker::new(1000, 100);
    let content = r#"
use std::io;

fn hello() {
    println!("Hello");
}

fn world() {
    println!("World");
}

struct MyStruct {
    field: i32,
}
"#;
    let chunks = chunker.chunk_with_path(content, Some("test.rs"));

    // Should detect language
    for chunk in &chunks {
        assert_eq!(chunk.language.as_deref(), Some("rust"));
    }

    // Should have multiple chunks for semantic units
    assert!(!chunks.is_empty());
}

#[test]
fn test_chunker_with_python_file() {
    let chunker = Chunker::new(1000, 100);
    let content = r#"
import os

def hello():
    print("Hello")

class MyClass:
    def __init__(self):
        pass

    def method(self):
        return 42
"#;
    let chunks = chunker.chunk_with_path(content, Some("script.py"));

    // Should detect language
    for chunk in &chunks {
        assert_eq!(chunk.language.as_deref(), Some("python"));
    }

    assert!(!chunks.is_empty());
}

#[test]
fn test_chunker_with_javascript_file() {
    let chunker = Chunker::new(1000, 100);
    let content = r#"
import { something } from './module';

function hello() {
    console.log("Hello");
}

class MyClass {
    constructor() {
        this.value = 42;
    }

    method() {
        return this.value;
    }
}

export default MyClass;
"#;
    let chunks = chunker.chunk_with_path(content, Some("app.js"));

    // Should detect language
    for chunk in &chunks {
        assert_eq!(chunk.language.as_deref(), Some("javascript"));
    }

    assert!(!chunks.is_empty());
}

#[test]
fn test_chunker_preserves_content() {
    let chunker = Chunker::new(500, 50);
    let content = r#"
fn first_function() {
    let x = 1;
    let y = 2;
    let z = x + y;
    println!("{}", z);
}

fn second_function() {
    let a = "hello";
    let b = "world";
    println!("{} {}", a, b);
}
"#;
    let chunks = chunker.chunk_with_path(content, Some("test.rs"));

    // All important content should be present in at least one chunk
    let all_content: String = chunks
        .iter()
        .map(|c| c.content.as_str())
        .collect::<Vec<_>>()
        .join("\n");

    assert!(all_content.contains("first_function"));
    assert!(all_content.contains("second_function"));
    assert!(all_content.contains("let x = 1"));
    assert!(all_content.contains("let a = \"hello\""));
}

#[test]
fn test_chunker_semantic_disabled() {
    let mut chunker = Chunker::new(100, 20);
    chunker.set_semantic_chunking(false);

    let content = r#"
fn hello() {
    println!("Hello");
}

fn world() {
    println!("World");
}
"#;
    let chunks = chunker.chunk_with_path(content, Some("test.rs"));

    // All chunks should be Unknown type when semantic chunking is disabled
    for chunk in &chunks {
        assert_eq!(chunk.chunk_type, ChunkType::Unknown);
    }
}

#[test]
fn test_chunker_large_function_splitting() {
    // Create a chunker with small max size to force splitting
    let chunker = Chunker::with_options(200, 50, 400, true);

    // Create a large function that will need to be split
    let body_lines: Vec<String> = (0..50)
        .map(|i| format!("    let var{} = {};", i, i))
        .collect();
    let content = format!("fn large_function() {{\n{}\n}}", body_lines.join("\n"));

    let chunks = chunker.chunk_with_path(&content, Some("test.rs"));

    // Should produce multiple chunks
    assert!(
        chunks.len() > 1,
        "Large function should be split into multiple chunks"
    );

    // First chunk should contain the function signature
    assert!(chunks[0].content.contains("fn large_function()"));
}

#[test]
fn test_chunk_metadata() {
    let chunk = Chunk::with_metadata(
        "fn test() {}".to_string(),
        1,
        1,
        ChunkType::Function,
        Some("test".to_string()),
        Some("rust".to_string()),
    );

    assert_eq!(chunk.chunk_type, ChunkType::Function);
    assert_eq!(chunk.symbol_name.as_deref(), Some("test"));
    assert_eq!(chunk.language.as_deref(), Some("rust"));
    assert_eq!(chunk.start_line, 1);
    assert_eq!(chunk.end_line, 1);
}

#[test]
fn test_chunk_type_as_str() {
    assert_eq!(ChunkType::Function.as_str(), "function");
    assert_eq!(ChunkType::Class.as_str(), "class");
    assert_eq!(ChunkType::Method.as_str(), "method");
    assert_eq!(ChunkType::Struct.as_str(), "struct");
    assert_eq!(ChunkType::Module.as_str(), "module");
    assert_eq!(ChunkType::Unknown.as_str(), "unknown");
}

#[test]
fn test_chunker_with_unknown_extension() {
    let chunker = Chunker::new(100, 20);
    let content = "some random content\nspread across\nmultiple lines";
    let chunks = chunker.chunk_with_path(content, Some("file.unknown"));

    // Should fall back to character-based chunking
    assert!(!chunks.is_empty());
    assert_eq!(chunks[0].chunk_type, ChunkType::Unknown);
}

#[test]
fn test_chunker_empty_content() {
    let chunker = Chunker::new(100, 20);
    let chunks = chunker.chunk("");
    assert!(chunks.is_empty());
}

#[test]
fn test_chunker_whitespace_only() {
    let chunker = Chunker::new(100, 20);
    let chunks = chunker.chunk("   \n\n   \n  ");
    assert!(chunks.is_empty() || chunks.iter().all(|c| c.content.trim().is_empty()));
}

#[test]
fn test_language_has_tree_sitter_grammar() {
    // These should have tree-sitter grammars (when feature is enabled)
    #[cfg(feature = "semantic-chunking")]
    {
        assert!(Language::Rust.has_tree_sitter_grammar());
        assert!(Language::Python.has_tree_sitter_grammar());
        assert!(Language::JavaScript.has_tree_sitter_grammar());
        assert!(Language::TypeScript.has_tree_sitter_grammar());
        assert!(Language::Go.has_tree_sitter_grammar());
        assert!(Language::Java.has_tree_sitter_grammar());
    }

    // Unknown should never have tree-sitter grammar
    assert!(!Language::Unknown.has_tree_sitter_grammar());
}

#[test]
fn test_language_semantic_node_types() {
    let rust_types = Language::Rust.semantic_node_types();
    assert!(rust_types.contains(&"function_item"));
    assert!(rust_types.contains(&"struct_item"));
    assert!(rust_types.contains(&"impl_item"));

    let python_types = Language::Python.semantic_node_types();
    assert!(python_types.contains(&"function_definition"));
    assert!(python_types.contains(&"class_definition"));

    let js_types = Language::JavaScript.semantic_node_types();
    assert!(js_types.contains(&"function_declaration"));
    assert!(js_types.contains(&"class_declaration"));
}

#[test]
fn test_chunker_go_file() {
    let chunker = Chunker::new(1000, 100);
    let content = r#"
package main

import "fmt"

func main() {
    fmt.Println("Hello")
}

type Config struct {
    Name string
    Port int
}

func (c *Config) Validate() error {
    return nil
}
"#;
    let chunks = chunker.chunk_with_path(content, Some("main.go"));

    for chunk in &chunks {
        assert_eq!(chunk.language.as_deref(), Some("go"));
    }

    assert!(!chunks.is_empty());
}

#[test]
fn test_chunker_java_file() {
    let chunker = Chunker::new(1000, 100);
    let content = r#"
package com.example;

import java.util.List;

public class Main {
    private int value;

    public Main() {
        this.value = 0;
    }

    public int getValue() {
        return value;
    }

    public static void main(String[] args) {
        System.out.println("Hello");
    }
}
"#;
    let chunks = chunker.chunk_with_path(content, Some("Main.java"));

    for chunk in &chunks {
        assert_eq!(chunk.language.as_deref(), Some("java"));
    }

    assert!(!chunks.is_empty());
}

#[test]
fn test_chunker_typescript_file() {
    let chunker = Chunker::new(1000, 100);
    let content = r#"
import { Component } from 'react';

interface Props {
    name: string;
    count: number;
}

class MyComponent extends Component<Props> {
    constructor(props: Props) {
        super(props);
    }

    render() {
        return null;
    }
}

export default MyComponent;
"#;
    let chunks = chunker.chunk_with_path(content, Some("component.tsx"));

    for chunk in &chunks {
        assert_eq!(chunk.language.as_deref(), Some("tsx"));
    }

    assert!(!chunks.is_empty());
}