devgen_splitter/
lib.rs

1//! # DevGen Code Splitter Library
2//!
3//! This library provides functionality for splitting source code into manageable chunks
4//! and identifying various code entities within those chunks. It's designed to assist
5//! in code analysis, documentation generation, and other tasks that require structured
6//! parsing of source code.
7//!
8//! ## Main Components:
9//!
10//! - `EntityType`: Enum representing different types of code entities (e.g., Struct, Function).
11//! - `Entity`: Struct containing metadata about a specific code entity.
12//! - `Chunk`: Struct representing a section of code containing one or more entities.
13//! - `SplitOptions`: Configuration options for controlling how code is split into chunks.
14//! - `Lang`: Enum representing supported programming languages (imported from `lang` module).
15//! - `split`: Function for splitting code into chunks (imported from `splitter` module).
16//!
17//! ## Usage Example:
18//!
19//! ```rust
20//! use devgen_splitter::{
21//!     split,
22//!     Lang,
23//!     SplitOptions,
24//! };
25//!
26//! let source_code = "// Your source code here...";
27//! let options = SplitOptions {
28//!     chunk_line_limit: 100,
29//! };
30//! let chunks = split("test.rs", source_code, &options).unwrap();
31//!
32//! for chunk in chunks {
33//!     println!("Chunk line range: {:?}", chunk.line_range);
34//!     for entity in chunk.entities {
35//!         println!("Entity: {} ({:?})", entity.name, entity.entity_type);
36//!     }
37//! }
38//! ```
39
40// ... (rest of the existing code remains unchanged)
41use serde::{
42    Deserialize,
43    Serialize,
44};
45use std::ops::Range;
46
47/// Represents the different types of entities that can be identified in the code.
48#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
49pub enum EntityType {
50    /// Represents a struct definition
51    Struct,
52    /// Represents an interface definition
53    Interface,
54    /// Represents a standalone function
55    Function,
56    /// Represents a method within a class or interface
57    Method,
58    /// Represents an enumeration definition
59    Enum,
60}
61
62/// Represents a code entity with its associated metadata.
63///
64/// This struct contains information about a specific code entity, including its name,
65/// type, and line ranges both in the original source code and within the current chunk.
66#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
67pub struct Entity {
68    /// name of the entity
69    pub name: String,
70    /// type of the entity
71    pub entity_type: EntityType,
72    /// the line range of the entity in the source code
73    /// including the doc string and the body
74    pub completed_line_range: Range<usize>,
75    /// the line range of the chunk in the current chunk
76    pub chunk_line_range: Range<usize>,
77    /// if the entity is a method, the name of the parent struct or interface
78    pub parent: Option<String>,
79    /// the line range of the parent in the source code
80    pub parent_line_range: Option<Range<usize>>,
81}
82
83/// Represents a chunk of code containing one or more entities.
84///
85/// A chunk is a section of the source code that may contain multiple entities
86/// and is defined by a range of line numbers.
87#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
88pub struct Chunk {
89    /// The line range of the chunk in the source code
90    pub line_range: Range<usize>,
91    /// The entities contained within this chunk
92    pub entities: Vec<Entity>,
93}
94
95/// Configuration options for the devgen splitter.
96///
97/// This struct defines the parameters used to control how the source code
98/// is split into chunks, specifying the maximum number of characters for each chunk.
99#[derive(Debug, Clone, Default)]
100pub struct SplitOptions {
101    /// the maximum number of lines for each chunk
102    pub chunk_line_limit: usize,
103}
104
105mod lang;
106mod splitter;
107pub use lang::Lang;
108pub use splitter::split;