1use std::borrow::Cow;
2use std::path::Path;
3use std::path::PathBuf;
4
5use crate::text_changes::TextChange;
6use crate::text_changes::apply_text_changes;
7use anyhow::Result;
8use jsonc_parser::CollectOptions;
9use jsonc_parser::CommentCollectionStrategy;
10use jsonc_parser::ParseOptions;
11
12pub fn format_text(
13 input_text: &str,
14 format_with_host: impl FnMut(&Path, String) -> Result<Option<String>>,
15) -> Result<Option<String>> {
16 let had_bom = input_text.starts_with("\u{FEFF}");
17 let input_text = if had_bom { &input_text[3..] } else { input_text };
18 let result = format_inner(input_text, format_with_host)?;
19 if result.is_none() && had_bom {
20 Ok(Some(input_text.to_string()))
21 } else {
22 Ok(result)
23 }
24}
25
26fn format_inner(
27 input_text: &str,
28 format_with_host: impl FnMut(&Path, String) -> Result<Option<String>>,
29) -> Result<Option<String>> {
30 let parse_result = jsonc_parser::parse_to_ast(
31 input_text,
32 &CollectOptions {
33 comments: CommentCollectionStrategy::Off,
34 tokens: false,
35 },
36 &ParseOptions {
37 allow_comments: true,
38 allow_loose_object_property_names: true,
39 allow_trailing_commas: true,
40 },
41 )?;
42 let Some(root_value) = parse_result.value else {
43 return Ok(None);
44 };
45
46 Ok(match format_root(input_text, &root_value, format_with_host) {
47 Some(text) => {
48 #[cfg(debug_assertions)]
49 validate_output_json(&text)?;
50 Some(text)
51 }
52 None => None,
53 })
54}
55
56fn format_root(
57 input_text: &str,
58 root_value: &jsonc_parser::ast::Value,
59 mut format_with_host: impl FnMut(&Path, String) -> Result<Option<String>>,
60) -> Option<String> {
61 let root_obj = root_value.as_object()?;
62 let maybe_default_language = get_metadata_language(root_obj);
63 let cells = root_value.as_object()?.get_array("cells")?;
64
65 let text_changes: Vec<TextChange> = cells
66 .elements
67 .iter()
68 .filter_map(|element| get_cell_text_change(input_text, element, maybe_default_language, &mut format_with_host))
69 .collect();
70
71 if text_changes.is_empty() {
72 None
73 } else {
74 Some(apply_text_changes(input_text, text_changes))
75 }
76}
77
78#[cfg(debug_assertions)]
79fn validate_output_json(text: &str) -> Result<()> {
80 let result = jsonc_parser::parse_to_ast(
83 text,
84 &CollectOptions {
85 comments: CommentCollectionStrategy::Off,
86 tokens: false,
87 },
88 &ParseOptions {
89 allow_comments: true,
90 allow_loose_object_property_names: false,
91 allow_trailing_commas: true,
92 },
93 );
94 match result {
95 Ok(_) => Ok(()),
96 Err(err) => {
97 anyhow::bail!(
98 "dprint-plugin-jupyter produced invalid json. Please open an issue with reproduction steps at https://github.com/dprint/dprint-plugin-jupyter/issues\n{:#}\n\n== TEXT ==\n{}",
99 err,
100 text
101 );
102 }
103 }
104}
105
106fn get_cell_text_change(
107 file_text: &str,
108 cell: &jsonc_parser::ast::Value,
109 maybe_default_language: Option<&str>,
110 format_with_host: &mut impl FnMut(&Path, String) -> Result<Option<String>>,
111) -> Option<TextChange> {
112 let cell = cell.as_object()?;
113 let cell_language = get_cell_vscode_language_id(cell).or_else(|| {
114 let cell_type = cell.get_string("cell_type")?;
115 match cell_type.value.as_ref() {
116 "markdown" => Some("markdown"),
117 "code" => maybe_default_language,
118 _ => None,
119 }
120 })?;
121 let code_block = analyze_code_block(cell, file_text)?;
122 let file_path = language_to_path(cell_language)?;
123 let formatted_text = format_with_host(&file_path, code_block.source).ok()??;
124 let formatted_text = formatted_text.trim_end();
126
127 let new_text = if code_block.is_array {
128 build_array_json_text(formatted_text, code_block.indent_text)
129 } else {
130 serde_json::to_string(&formatted_text).unwrap()
131 };
132
133 Some(TextChange {
134 range: code_block.replace_range,
135 new_text,
136 })
137}
138
139struct CodeBlockText<'a> {
140 is_array: bool,
143 indent_text: &'a str,
144 replace_range: std::ops::Range<usize>,
145 source: String,
146}
147
148fn analyze_code_block<'a>(cell: &jsonc_parser::ast::Object<'a>, file_text: &'a str) -> Option<CodeBlockText<'a>> {
149 let mut indent_text = "";
150 let mut replace_range = std::ops::Range::default();
151 let mut is_array = false;
152 let cell_source = match &cell.get("source")?.value {
153 jsonc_parser::ast::Value::Array(items) => {
154 is_array = true;
155 let mut strings = Vec::with_capacity(items.elements.len());
156 for (i, element) in items.elements.iter().enumerate() {
157 let string_lit = element.as_string_lit()?;
158 if i == 0 {
159 indent_text = get_indent_text(file_text, string_lit.range.start);
160 replace_range.start = string_lit.range.start;
161 }
162 if i == items.elements.len() - 1 {
163 replace_range.end = string_lit.range.end;
164 }
165 strings.push(&string_lit.value);
166 }
167
168 let mut text = String::with_capacity(strings.iter().map(|s| s.len()).sum::<usize>());
169 for string in strings {
170 text.push_str(string);
171 }
172 text
173 }
174 jsonc_parser::ast::Value::StringLit(string) => {
175 replace_range = string.range.start..string.range.end;
176 string.value.to_string()
177 }
178 _ => return None,
179 };
180 Some(CodeBlockText {
181 is_array,
182 indent_text,
183 replace_range,
184 source: cell_source,
185 })
186}
187
188fn build_array_json_text(formatted_text: &str, indent_text: &str) -> String {
190 let mut new_text = String::new();
191 let mut current_end_index = 0;
192 for (i, line) in formatted_text.split('\n').enumerate() {
193 current_end_index += line.len();
194 if i > 0 {
195 new_text.push_str(",\n");
196 new_text.push_str(indent_text);
197 }
198 let is_last_line = current_end_index == formatted_text.len();
199 new_text.push_str(
200 &serde_json::to_string(
201 if is_last_line {
202 Cow::Borrowed(line)
203 } else {
204 Cow::Owned(format!("{}\n", line))
205 }
206 .as_ref(),
207 )
208 .unwrap(),
209 );
210 current_end_index += 1;
211 }
212 new_text
213}
214
215fn get_metadata_language<'a>(root_obj: &'a jsonc_parser::ast::Object<'a>) -> Option<&'a str> {
216 let language_info = root_obj.get_object("metadata")?.get_object("language_info")?;
217 Some(&language_info.get_string("name")?.value)
218}
219
220fn get_cell_vscode_language_id<'a>(cell: &'a jsonc_parser::ast::Object<'a>) -> Option<&'a str> {
221 let cell_metadata = cell.get_object("metadata")?;
222 let cell_language_info = cell_metadata.get_object("vscode")?;
223 Some(&cell_language_info.get_string("languageId")?.value)
224}
225
226fn language_to_path(language: &str) -> Option<PathBuf> {
227 let ext = match language.to_lowercase().as_str() {
228 "bash" => Some("sh"),
229 "c++" => Some("cpp"),
230 "css" => Some("css"),
231 "csharp" => Some("cs"),
232 "html" => Some("html"),
233 "go" => Some("go"),
234 "kotlin" => Some("kt"),
235 "json" => Some("json"),
236 "julia" => Some("jl"),
237 "markdown" => Some("md"),
238 "typescript" => Some("ts"),
239 "javascript" => Some("js"),
240 "perl" => Some("perl"),
241 "php" => Some("php"),
242 "python" | "python3" => Some("py"),
243 "r" => Some("r"),
244 "ruby" => Some("rb"),
245 "scala" => Some("scala"),
246 "sql" => Some("sql"),
247 "yaml" => Some("yml"),
248 _ => None,
249 };
250 ext.map(|ext| PathBuf::from(format!("code_block.{}", ext)))
251}
252
253fn get_indent_text(file_text: &str, start_pos: usize) -> &str {
254 let preceeding_text = &file_text[..start_pos];
255 let whitespace_start = preceeding_text.trim_end().len();
256 let whitespace_text = &preceeding_text[whitespace_start..];
257 let whitespace_newline_pos = whitespace_text.rfind('\n');
258 &preceeding_text[whitespace_newline_pos
259 .map(|pos| whitespace_start + pos + 1)
260 .unwrap_or(whitespace_start)..]
261}
262
263#[cfg(test)]
264mod test {
265 use super::*;
266
267 #[test]
268 fn test_get_indent_text() {
269 assert_eq!(get_indent_text(" hello", 2), " ");
270 assert_eq!(get_indent_text("\n hello", 3), " ");
271 assert_eq!(get_indent_text("t\n hello", 4), " ");
272 assert_eq!(get_indent_text("t\n\t\thello", 4), "\t\t");
273 assert_eq!(get_indent_text("hello", 0), "");
274 assert_eq!(get_indent_text("\nhello", 1), "");
275 assert_eq!(get_indent_text("\nhello", 2), "");
276 }
277
278 #[test]
279 fn formats_with_bom() {
280 {
282 let input_text = "\u{FEFF}{\"cells\":[{\"cell_type\":\"code\",\"source\":\"let x = 5;\"}]}";
283 let formatted_text = format_text(input_text, |_, text| Ok(Some(text))).unwrap().unwrap();
284 assert_eq!(
285 formatted_text,
286 "{\"cells\":[{\"cell_type\":\"code\",\"source\":\"let x = 5;\"}]}"
287 );
288 }
289 let input_text = "\u{FEFF}{
291 \"cells\":[{
292 \"cell_type\":\"code\",
293 \"metadata\": {
294 \"vscode\": {
295 \"languageId\": \"typescript\"
296 }
297 },
298 \"source\": \"let x = 5;\"
299 }]
300}
301";
302 let formatted_text = format_text(input_text, |_, text| Ok(Some(format!("{}_formatted", text))))
303 .unwrap()
304 .unwrap();
305 assert_eq!(
306 formatted_text,
307 "{
308 \"cells\":[{
309 \"cell_type\":\"code\",
310 \"metadata\": {
311 \"vscode\": {
312 \"languageId\": \"typescript\"
313 }
314 },
315 \"source\": \"let x = 5;_formatted\"
316 }]
317}
318"
319 );
320 }
321}