1#![doc = ""]
9#![doc = include_str!("../README.md")]
10
11pub mod dart;
12pub mod go;
13pub mod java;
14pub mod javascript;
15mod js_shared;
16pub mod kotlin;
17pub mod markdown;
18pub mod php;
19pub mod python;
20pub(crate) mod queries;
21pub mod ruby;
22pub mod rust_lang;
23pub mod sfc;
24pub mod swift;
25pub mod typescript;
26
27use anyhow::Result;
28use cartog_core::{Edge, Symbol};
29use tree_sitter::Node;
30
31#[derive(Debug, Clone, Default)]
33pub struct ExtractionResult {
34 pub symbols: Vec<Symbol>,
35 pub edges: Vec<Edge>,
36}
37
38pub trait Extractor: Send {
43 fn extract(&mut self, source: &str, file_path: &str) -> Result<ExtractionResult>;
44}
45
46pub(crate) fn node_text<'a>(node: Node, source: &'a str) -> &'a str {
49 source.get(node.start_byte()..node.end_byte()).unwrap_or("")
50}
51
52pub(crate) fn last_segment<'a>(s: &'a str, sep: &str) -> &'a str {
56 s.rsplit(sep).next().unwrap_or(s)
57}
58
59pub(crate) fn qualified(parent_qname: Option<&str>, name: &str) -> String {
61 match parent_qname {
62 Some(p) => format!("{p}.{name}"),
63 None => name.to_string(),
64 }
65}
66
67pub(crate) const MAX_TREE_DEPTH: usize = 600;
71
72pub(crate) fn tree_depth_exceeds(root: Node, limit: usize) -> bool {
75 let mut cursor = root.walk();
76 let mut depth = 0usize;
77 loop {
78 if depth > limit {
79 return true;
80 }
81 if cursor.goto_first_child() {
82 depth += 1;
83 continue;
84 }
85 loop {
86 if cursor.goto_next_sibling() {
87 break;
88 }
89 if !cursor.goto_parent() {
90 return false;
91 }
92 depth -= 1;
93 }
94 }
95}
96
97#[derive(Clone, Copy, Default)]
100pub(crate) struct ParentScope<'a> {
101 pub id: Option<&'a str>,
102 pub qname: Option<&'a str>,
103}
104
105impl<'a> ParentScope<'a> {
106 pub fn top_level(namespace: Option<&'a str>) -> Self {
108 Self {
109 id: None,
110 qname: namespace,
111 }
112 }
113
114 pub fn nested(id: &'a str, qname: &'a str) -> Self {
116 Self {
117 id: Some(id),
118 qname: Some(qname),
119 }
120 }
121}
122
123pub use cartog_core::detect_language;
124
125pub fn get_extractor(language: &str) -> Option<Box<dyn Extractor>> {
127 match language {
128 "python" => Some(Box::new(python::PythonExtractor::new())),
129 "typescript" => Some(Box::new(typescript::TypeScriptExtractor::new())),
130 "tsx" => Some(Box::new(typescript::TsxExtractor::new())),
131 "javascript" => Some(Box::new(javascript::JavaScriptExtractor::new())),
132 "rust" => Some(Box::new(rust_lang::RustExtractor::new())),
133 "go" => Some(Box::new(go::GoExtractor::new())),
134 "ruby" => Some(Box::new(ruby::RubyExtractor::new())),
135 "java" => Some(Box::new(java::JavaExtractor::new())),
136 "php" => Some(Box::new(php::PhpExtractor::new())),
137 "dart" => Some(Box::new(dart::DartExtractor::new())),
138 "swift" => Some(Box::new(swift::SwiftExtractor::new())),
139 "kotlin" => Some(Box::new(kotlin::KotlinExtractor::new())),
140 "vue" => Some(Box::new(sfc::VueExtractor::new())),
141 "svelte" => Some(Box::new(sfc::SvelteExtractor::new())),
142 "astro" => Some(Box::new(sfc::AstroExtractor::new())),
143 "markdown" => Some(Box::new(markdown::MarkdownExtractor::new())),
144 _ => None,
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 #[test]
153 fn test_get_extractor() {
154 assert!(get_extractor("python").is_some());
155 assert!(get_extractor("typescript").is_some());
156 assert!(get_extractor("tsx").is_some());
157 assert!(get_extractor("javascript").is_some());
158 assert!(get_extractor("rust").is_some());
159 assert!(get_extractor("go").is_some());
160 assert!(get_extractor("ruby").is_some());
161 assert!(get_extractor("java").is_some());
162 assert!(get_extractor("php").is_some());
163 assert!(get_extractor("dart").is_some());
164 assert!(get_extractor("swift").is_some());
165 assert!(get_extractor("kotlin").is_some());
166 assert!(get_extractor("vue").is_some());
167 assert!(get_extractor("svelte").is_some());
168 assert!(get_extractor("astro").is_some());
169 assert!(get_extractor("markdown").is_some());
170 assert!(get_extractor("unknown").is_none());
171 }
172
173 const ALL_LANGS: [&str; 16] = [
174 "python",
175 "typescript",
176 "tsx",
177 "javascript",
178 "rust",
179 "go",
180 "ruby",
181 "java",
182 "php",
183 "dart",
184 "swift",
185 "kotlin",
186 "vue",
187 "svelte",
188 "astro",
189 "markdown",
190 ];
191
192 proptest::proptest! {
193 #[test]
201 fn extractors_never_panic_on_arbitrary_source(src in ".{0,400}") {
202 thread_local! {
203 static EXTRACTORS: std::cell::RefCell<Vec<(&'static str, Box<dyn Extractor>)>> =
204 std::cell::RefCell::new(
205 ALL_LANGS.iter().map(|&l| (l, get_extractor(l).unwrap())).collect()
206 );
207 }
208 EXTRACTORS.with_borrow_mut(|exs| {
209 for (lang, ex) in exs.iter_mut() {
210 let _ = ex.extract(&src, &format!("fuzz.{lang}"));
211 }
212 });
213 }
214 }
215}