devgen_splitter/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
//! # DevGen Code Splitter Library
//!
//! This library provides functionality for splitting source code into manageable chunks
//! and identifying various code entities within those chunks. It's designed to assist
//! in code analysis, documentation generation, and other tasks that require structured
//! parsing of source code.
//!
//! ## Main Components:
//!
//! - `EntityType`: Enum representing different types of code entities (e.g., Struct, Function).
//! - `Entity`: Struct containing metadata about a specific code entity.
//! - `Chunk`: Struct representing a section of code containing one or more entities.
//! - `SplitOptions`: Configuration options for controlling how code is split into chunks.
//! - `Lang`: Enum representing supported programming languages (imported from `lang` module).
//! - `split`: Function for splitting code into chunks (imported from `splitter` module).
//!
//! ## Usage Example:
//!
//! ```rust
//! use devgen_splitter::{split, SplitOptions, Lang};
//!
//! let source_code = "// Your source code here...";
//! let options = SplitOptions { chunk_line_limit: 100 };
//! let chunks = split("test.rs", source_code, &options).unwrap();
//!
//! for chunk in chunks {
//!     println!("Chunk line range: {:?}", chunk.line_range);
//!     for entity in chunk.entities {
//!         println!("Entity: {} ({:?})", entity.name, entity.entity_type);
//!     }
//! }
//! ```

// ... (rest of the existing code remains unchanged)
use std::ops::Range;
/// Represents the different types of entities that can be identified in the code.
#[derive(Debug, Clone, PartialEq)]
pub enum EntityType {
    /// Represents a struct definition
    Struct,
    /// Represents an interface definition
    Interface,
    /// Represents a standalone function
    Function,
    /// Represents a method within a class or interface
    Method,
    /// Represents an enumeration definition
    Enum,
}

/// Represents a code entity with its associated metadata.
///
/// This struct contains information about a specific code entity, including its name,
/// type, and line ranges both in the original source code and within the current chunk.
#[derive(Debug, Clone, PartialEq)]
pub struct Entity {
    /// name of the entity
    pub name: String,
    /// type of the entity
    pub entity_type: EntityType,
    /// the line range of the entity in the source code
    /// including the doc string and the body
    pub completed_line_range: Range<usize>,
    /// the line range of the chunk in the current chunk
    pub chunk_line_range: Range<usize>,
    /// if the entity is a method, the name of the parent struct or interface
    pub parent: Option<String>,
}

/// Represents a chunk of code containing one or more entities.
///
/// A chunk is a section of the source code that may contain multiple entities
/// and is defined by a range of line numbers.
#[derive(Debug, Clone, PartialEq)]
pub struct Chunk {
    /// The line range of the chunk in the source code
    pub line_range: Range<usize>,
    /// The entities contained within this chunk
    pub entities: Vec<Entity>,
}

/// Configuration options for the devgen splitter.
///
/// This struct defines the parameters used to control how the source code
/// is split into chunks, specifying the minimum and maximum number of lines
/// for each chunk.
pub struct SplitOptions {
    /// The maximum number of lines for each code chunk.
    ///
    /// This value determines the size of the "window" used when splitting the code into chunks.
    /// If a chunk exceeds this size, it will be divided into smaller chunks.
    /// A larger value results in fewer, larger chunks, while a smaller value produces more,
    /// smaller chunks.
    pub chunk_line_limit: usize,
}

mod lang;
mod splitter;
pub use lang::Lang;
pub use splitter::split;