use std::path::Path;
use vyctor::{detect_language, Chunk, ChunkType, Chunker, Language};
#[test]
fn test_detect_language_from_path() {
assert_eq!(detect_language(Path::new("main.rs")), Language::Rust);
assert_eq!(detect_language(Path::new("app.py")), Language::Python);
assert_eq!(detect_language(Path::new("index.ts")), Language::TypeScript);
assert_eq!(detect_language(Path::new("component.tsx")), Language::Tsx);
assert_eq!(
detect_language(Path::new("script.js")),
Language::JavaScript
);
assert_eq!(detect_language(Path::new("Main.java")), Language::Java);
assert_eq!(detect_language(Path::new("main.go")), Language::Go);
assert_eq!(detect_language(Path::new("main.c")), Language::C);
assert_eq!(detect_language(Path::new("main.cpp")), Language::Cpp);
}
#[test]
fn test_chunker_basic() {
let chunker = Chunker::new(100, 20);
let content = "line1\nline2\nline3\nline4\nline5";
let chunks = chunker.chunk(content);
assert!(!chunks.is_empty());
assert_eq!(chunks[0].start_line, 1);
}
#[test]
fn test_chunker_with_rust_file() {
let chunker = Chunker::new(1000, 100);
let content = r#"
use std::io;
fn hello() {
println!("Hello");
}
fn world() {
println!("World");
}
struct MyStruct {
field: i32,
}
"#;
let chunks = chunker.chunk_with_path(content, Some("test.rs"));
for chunk in &chunks {
assert_eq!(chunk.language.as_deref(), Some("rust"));
}
assert!(!chunks.is_empty());
}
#[test]
fn test_chunker_with_python_file() {
let chunker = Chunker::new(1000, 100);
let content = r#"
import os
def hello():
print("Hello")
class MyClass:
def __init__(self):
pass
def method(self):
return 42
"#;
let chunks = chunker.chunk_with_path(content, Some("script.py"));
for chunk in &chunks {
assert_eq!(chunk.language.as_deref(), Some("python"));
}
assert!(!chunks.is_empty());
}
#[test]
fn test_chunker_with_javascript_file() {
let chunker = Chunker::new(1000, 100);
let content = r#"
import { something } from './module';
function hello() {
console.log("Hello");
}
class MyClass {
constructor() {
this.value = 42;
}
method() {
return this.value;
}
}
export default MyClass;
"#;
let chunks = chunker.chunk_with_path(content, Some("app.js"));
for chunk in &chunks {
assert_eq!(chunk.language.as_deref(), Some("javascript"));
}
assert!(!chunks.is_empty());
}
#[test]
fn test_chunker_preserves_content() {
let chunker = Chunker::new(500, 50);
let content = r#"
fn first_function() {
let x = 1;
let y = 2;
let z = x + y;
println!("{}", z);
}
fn second_function() {
let a = "hello";
let b = "world";
println!("{} {}", a, b);
}
"#;
let chunks = chunker.chunk_with_path(content, Some("test.rs"));
let all_content: String = chunks
.iter()
.map(|c| c.content.as_str())
.collect::<Vec<_>>()
.join("\n");
assert!(all_content.contains("first_function"));
assert!(all_content.contains("second_function"));
assert!(all_content.contains("let x = 1"));
assert!(all_content.contains("let a = \"hello\""));
}
#[test]
fn test_chunker_semantic_disabled() {
let mut chunker = Chunker::new(100, 20);
chunker.set_semantic_chunking(false);
let content = r#"
fn hello() {
println!("Hello");
}
fn world() {
println!("World");
}
"#;
let chunks = chunker.chunk_with_path(content, Some("test.rs"));
for chunk in &chunks {
assert_eq!(chunk.chunk_type, ChunkType::Unknown);
}
}
#[test]
fn test_chunker_large_function_splitting() {
let chunker = Chunker::with_options(200, 50, 400, true);
let body_lines: Vec<String> = (0..50)
.map(|i| format!(" let var{} = {};", i, i))
.collect();
let content = format!("fn large_function() {{\n{}\n}}", body_lines.join("\n"));
let chunks = chunker.chunk_with_path(&content, Some("test.rs"));
assert!(
chunks.len() > 1,
"Large function should be split into multiple chunks"
);
assert!(chunks[0].content.contains("fn large_function()"));
}
#[test]
fn test_chunk_metadata() {
let chunk = Chunk::with_metadata(
"fn test() {}".to_string(),
1,
1,
ChunkType::Function,
Some("test".to_string()),
Some("rust".to_string()),
);
assert_eq!(chunk.chunk_type, ChunkType::Function);
assert_eq!(chunk.symbol_name.as_deref(), Some("test"));
assert_eq!(chunk.language.as_deref(), Some("rust"));
assert_eq!(chunk.start_line, 1);
assert_eq!(chunk.end_line, 1);
}
#[test]
fn test_chunk_type_as_str() {
assert_eq!(ChunkType::Function.as_str(), "function");
assert_eq!(ChunkType::Class.as_str(), "class");
assert_eq!(ChunkType::Method.as_str(), "method");
assert_eq!(ChunkType::Struct.as_str(), "struct");
assert_eq!(ChunkType::Module.as_str(), "module");
assert_eq!(ChunkType::Unknown.as_str(), "unknown");
}
#[test]
fn test_chunker_with_unknown_extension() {
let chunker = Chunker::new(100, 20);
let content = "some random content\nspread across\nmultiple lines";
let chunks = chunker.chunk_with_path(content, Some("file.unknown"));
assert!(!chunks.is_empty());
assert_eq!(chunks[0].chunk_type, ChunkType::Unknown);
}
#[test]
fn test_chunker_empty_content() {
let chunker = Chunker::new(100, 20);
let chunks = chunker.chunk("");
assert!(chunks.is_empty());
}
#[test]
fn test_chunker_whitespace_only() {
let chunker = Chunker::new(100, 20);
let chunks = chunker.chunk(" \n\n \n ");
assert!(chunks.is_empty() || chunks.iter().all(|c| c.content.trim().is_empty()));
}
#[test]
fn test_language_has_tree_sitter_grammar() {
#[cfg(feature = "semantic-chunking")]
{
assert!(Language::Rust.has_tree_sitter_grammar());
assert!(Language::Python.has_tree_sitter_grammar());
assert!(Language::JavaScript.has_tree_sitter_grammar());
assert!(Language::TypeScript.has_tree_sitter_grammar());
assert!(Language::Go.has_tree_sitter_grammar());
assert!(Language::Java.has_tree_sitter_grammar());
}
assert!(!Language::Unknown.has_tree_sitter_grammar());
}
#[test]
fn test_language_semantic_node_types() {
let rust_types = Language::Rust.semantic_node_types();
assert!(rust_types.contains(&"function_item"));
assert!(rust_types.contains(&"struct_item"));
assert!(rust_types.contains(&"impl_item"));
let python_types = Language::Python.semantic_node_types();
assert!(python_types.contains(&"function_definition"));
assert!(python_types.contains(&"class_definition"));
let js_types = Language::JavaScript.semantic_node_types();
assert!(js_types.contains(&"function_declaration"));
assert!(js_types.contains(&"class_declaration"));
}
#[test]
fn test_chunker_go_file() {
let chunker = Chunker::new(1000, 100);
let content = r#"
package main
import "fmt"
func main() {
fmt.Println("Hello")
}
type Config struct {
Name string
Port int
}
func (c *Config) Validate() error {
return nil
}
"#;
let chunks = chunker.chunk_with_path(content, Some("main.go"));
for chunk in &chunks {
assert_eq!(chunk.language.as_deref(), Some("go"));
}
assert!(!chunks.is_empty());
}
#[test]
fn test_chunker_java_file() {
let chunker = Chunker::new(1000, 100);
let content = r#"
package com.example;
import java.util.List;
public class Main {
private int value;
public Main() {
this.value = 0;
}
public int getValue() {
return value;
}
public static void main(String[] args) {
System.out.println("Hello");
}
}
"#;
let chunks = chunker.chunk_with_path(content, Some("Main.java"));
for chunk in &chunks {
assert_eq!(chunk.language.as_deref(), Some("java"));
}
assert!(!chunks.is_empty());
}
#[test]
fn test_chunker_typescript_file() {
let chunker = Chunker::new(1000, 100);
let content = r#"
import { Component } from 'react';
interface Props {
name: string;
count: number;
}
class MyComponent extends Component<Props> {
constructor(props: Props) {
super(props);
}
render() {
return null;
}
}
export default MyComponent;
"#;
let chunks = chunker.chunk_with_path(content, Some("component.tsx"));
for chunk in &chunks {
assert_eq!(chunk.language.as_deref(), Some("tsx"));
}
assert!(!chunks.is_empty());
}