const MAX_LINE_LEN: usize = 140;
const RECORD_FIELD_WRAP_THRESHOLD: usize = 136;
pub(crate) fn wrap_long_java_lines(src: &str) -> String {
let mut out = String::with_capacity(src.len() + 256);
for line in src.split_inclusive('\n') {
let (content, terminator) = match line.strip_suffix('\n') {
Some(rest) => (rest, "\n"),
None => (line, ""),
};
let len = visible_len(content);
let is_record_field_annotation =
content.trim_start().starts_with('@') && (content.ends_with(',') || content.contains(") {"));
let threshold = if is_record_field_annotation {
RECORD_FIELD_WRAP_THRESHOLD
} else {
MAX_LINE_LEN
};
if len <= threshold {
out.push_str(content);
out.push_str(terminator);
continue;
}
let wrapped = wrap_line(content);
out.push_str(&wrapped);
out.push_str(terminator);
}
out
}
fn visible_len(s: &str) -> usize {
s.len()
}
fn wrap_line(line: &str) -> String {
let indent = leading_whitespace(line);
if let Some(wrapped) = wrap_javadoc_line(line, indent) {
if all_lines_fit(&wrapped) {
return wrapped;
}
}
if let Some(wrapped) = wrap_record_field_annotations(line, indent) {
if all_lines_fit(&wrapped) {
return wrapped;
}
}
if let Some(wrapped) = wrap_function_descriptor_call(line, indent) {
if all_lines_fit(&wrapped) {
return wrapped;
}
}
if let Some(wrapped) = wrap_method_type_call(line, indent) {
if all_lines_fit(&wrapped) {
return wrapped;
}
}
if let Some(wrapped) = wrap_method_signature(line, indent) {
if all_lines_fit(&wrapped) {
return wrapped;
}
}
if let Some(wrapped) = wrap_ternary_string_msg(line, indent) {
if all_lines_fit(&wrapped) {
return wrapped;
}
}
if let Some(wrapped) = wrap_invoke_call(line, indent) {
if all_lines_fit(&wrapped) {
return wrapped;
}
}
line.to_string()
}
fn all_lines_fit(text: &str) -> bool {
text.lines().all(|l| l.len() <= MAX_LINE_LEN)
}
fn leading_whitespace(line: &str) -> &str {
let end = line.find(|c: char| !c.is_whitespace()).unwrap_or(line.len());
&line[..end]
}
fn wrap_javadoc_line(line: &str, indent: &str) -> Option<String> {
let trimmed = line.trim_start();
if !trimmed.starts_with("* ") && trimmed != "*" {
return None;
}
let prefix = format!("{indent}* ");
let rest = &trimmed[2..];
let budget = MAX_LINE_LEN.saturating_sub(prefix.len());
if rest.len() <= budget {
return None;
}
let mut wrapped = String::new();
let mut remaining = rest;
loop {
if remaining.len() <= budget {
wrapped.push_str(&prefix);
wrapped.push_str(remaining);
break;
}
let slice = &remaining[..budget.min(remaining.len())];
let break_at = slice.rfind(' ');
let (head, tail) = match break_at {
Some(idx) if idx > 0 => (&remaining[..idx], remaining[idx + 1..].trim_start()),
_ => {
let after_budget_space = remaining[budget..].find(' ');
match after_budget_space {
Some(off) => {
let idx = budget + off;
(&remaining[..idx], remaining[idx + 1..].trim_start())
}
None => {
wrapped.push_str(&prefix);
wrapped.push_str(remaining);
break;
}
}
}
};
wrapped.push_str(&prefix);
wrapped.push_str(head);
wrapped.push('\n');
remaining = tail;
if remaining.is_empty() {
break;
}
}
Some(wrapped)
}
fn wrap_record_field_annotations(line: &str, indent: &str) -> Option<String> {
let trimmed = line.trim_start();
if !trimmed.starts_with('@') {
return None;
}
let (body, trailing) = match trimmed.strip_suffix(',') {
Some(rest) => (rest, ","),
None => (trimmed, ""),
};
let annotations = split_annotations(body)?;
if annotations.len() < 2 {
return None;
}
let declaration = annotations.last()?.clone();
let annotation_lines = &annotations[..annotations.len() - 1];
if !declaration.contains(' ') {
return None;
}
let mut wrapped = String::new();
for ann in annotation_lines {
wrapped.push_str(indent);
wrapped.push_str(ann);
wrapped.push('\n');
}
wrapped.push_str(indent);
wrapped.push_str(&declaration);
wrapped.push_str(trailing);
Some(wrapped)
}
fn split_annotations(body: &str) -> Option<Vec<String>> {
let bytes = body.as_bytes();
if bytes.first() != Some(&b'@') {
return None;
}
let mut tokens: Vec<String> = Vec::new();
let mut idx = 0;
while idx < bytes.len() {
while idx < bytes.len() && bytes[idx] == b' ' {
idx += 1;
}
if idx >= bytes.len() {
break;
}
if bytes[idx] != b'@' {
tokens.push(body[idx..].trim().to_string());
break;
}
let start = idx;
idx += 1; while idx < bytes.len() && (bytes[idx].is_ascii_alphanumeric() || bytes[idx] == b'_' || bytes[idx] == b'.') {
idx += 1;
}
if idx < bytes.len() && bytes[idx] == b'(' {
let mut depth = 1;
let mut in_str = false;
idx += 1;
while idx < bytes.len() && depth > 0 {
let b = bytes[idx];
if in_str {
if b == b'\\' && idx + 1 < bytes.len() {
idx += 2;
continue;
}
if b == b'"' {
in_str = false;
}
} else {
match b {
b'"' => in_str = true,
b'(' => depth += 1,
b')' => depth -= 1,
_ => {}
}
}
idx += 1;
}
}
tokens.push(body[start..idx].to_string());
}
Some(tokens)
}
fn wrap_function_descriptor_call(line: &str, indent: &str) -> Option<String> {
wrap_call_args(line, indent, &["FunctionDescriptor.of(", "FunctionDescriptor.ofVoid("])
}
fn wrap_method_type_call(line: &str, indent: &str) -> Option<String> {
wrap_call_args(line, indent, &["MethodType.methodType("])
}
fn wrap_invoke_call(line: &str, indent: &str) -> Option<String> {
wrap_call_args(line, indent, &[".invoke("])
}
fn wrap_call_args(line: &str, indent: &str, prefixes: &[&str]) -> Option<String> {
let (prefix_pos, prefix_str) = prefixes
.iter()
.filter_map(|p| line.find(p).map(|pos| (pos, *p)))
.min_by_key(|(pos, _)| *pos)?;
let open_paren = prefix_pos + prefix_str.len() - 1;
let close_paren = matching_paren(line, open_paren)?;
let inner = &line[open_paren + 1..close_paren];
if inner.trim().is_empty() {
return None;
}
let args = split_top_level_commas(inner);
if args.len() < 2 {
return None;
}
let head = &line[..=open_paren];
let tail = &line[close_paren..];
let cont_indent = format!("{indent} ");
let mut wrapped = String::new();
wrapped.push_str(head);
wrapped.push('\n');
for (i, arg) in args.iter().enumerate() {
wrapped.push_str(&cont_indent);
wrapped.push_str(arg.trim());
if i + 1 < args.len() {
wrapped.push(',');
}
wrapped.push('\n');
}
wrapped.push_str(indent);
wrapped.push_str(tail);
Some(wrapped)
}
fn matching_paren(line: &str, open_idx: usize) -> Option<usize> {
let bytes = line.as_bytes();
if bytes.get(open_idx) != Some(&b'(') {
return None;
}
let mut depth = 1;
let mut i = open_idx + 1;
let mut in_str = false;
while i < bytes.len() {
let b = bytes[i];
if in_str {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'"' {
in_str = false;
}
} else {
match b {
b'"' => in_str = true,
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
return Some(i);
}
}
_ => {}
}
}
i += 1;
}
None
}
fn split_top_level_commas(s: &str) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
let mut depth_paren = 0i32;
let mut depth_angle = 0i32;
let mut in_str = false;
let mut current = String::new();
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
let c = b as char;
if in_str {
current.push(c);
if b == b'\\' && i + 1 < bytes.len() {
current.push(bytes[i + 1] as char);
i += 2;
continue;
}
if b == b'"' {
in_str = false;
}
i += 1;
continue;
}
match b {
b'"' => {
in_str = true;
current.push(c);
}
b'(' => {
depth_paren += 1;
current.push(c);
}
b')' => {
depth_paren -= 1;
current.push(c);
}
b'<' => {
depth_angle += 1;
current.push(c);
}
b'>' => {
depth_angle -= 1;
current.push(c);
}
b',' if depth_paren == 0 && depth_angle == 0 => {
out.push(std::mem::take(&mut current));
}
_ => current.push(c),
}
i += 1;
}
if !current.trim().is_empty() {
out.push(current);
}
out
}
fn wrap_method_signature(line: &str, indent: &str) -> Option<String> {
let trimmed = line.trim_start();
let is_signature = (trimmed.starts_with("public ")
|| trimmed.starts_with("private ")
|| trimmed.starts_with("protected ")
|| trimmed.starts_with("static "))
&& trimmed.ends_with('{')
&& line.contains('(')
&& line.contains(')');
if !is_signature {
return None;
}
let open_idx = line.find('(')?;
let close_idx = matching_paren(line, open_idx)?;
let inner = &line[open_idx + 1..close_idx];
if inner.trim().is_empty() {
return None;
}
let params = split_top_level_commas(inner);
if params.len() < 2 {
return None;
}
let head = &line[..=open_idx];
let tail = &line[close_idx..];
let cont_indent = format!("{indent} ");
let mut wrapped = String::new();
wrapped.push_str(head);
wrapped.push('\n');
for (i, p) in params.iter().enumerate() {
wrapped.push_str(&cont_indent);
wrapped.push_str(p.trim());
if i + 1 < params.len() {
wrapped.push(',');
}
wrapped.push('\n');
}
wrapped.push_str(indent);
wrapped.push_str(tail);
Some(wrapped)
}
fn wrap_ternary_string_msg(line: &str, indent: &str) -> Option<String> {
let q_idx = line.find(" ? ")?;
let colon_idx = find_top_level(line, q_idx + 3, ':')?;
let head = &line[..q_idx];
let then_branch = &line[q_idx + 3..colon_idx];
let else_branch = &line[colon_idx + 1..];
let cont = format!("{indent} ");
let mut wrapped = String::new();
wrapped.push_str(head);
wrapped.push('\n');
wrapped.push_str(&cont);
wrapped.push_str("? ");
wrapped.push_str(then_branch.trim());
wrapped.push('\n');
wrapped.push_str(&cont);
wrapped.push_str(": ");
wrapped.push_str(else_branch.trim());
Some(wrapped)
}
fn find_top_level(s: &str, start: usize, target: char) -> Option<usize> {
let bytes = s.as_bytes();
let mut depth_paren = 0i32;
let mut depth_angle = 0i32;
let mut in_str = false;
let mut i = start;
while i < bytes.len() {
let b = bytes[i];
if in_str {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'"' {
in_str = false;
}
i += 1;
continue;
}
match b {
b'"' => in_str = true,
b'(' => depth_paren += 1,
b')' => depth_paren -= 1,
b'<' => depth_angle += 1,
b'>' => depth_angle -= 1,
_ => {}
}
if b == target as u8 && depth_paren == 0 && depth_angle == 0 {
return Some(i);
}
i += 1;
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn short_lines_untouched() {
let input = "package dev.kreuzberg;\nimport java.util.List;\n";
assert_eq!(wrap_long_java_lines(input), input);
}
#[test]
fn function_descriptor_split() {
let long = " FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS, ValueLayout.ADDRESS),\n";
let out = wrap_long_java_lines(long);
for line in out.lines() {
assert!(line.len() <= MAX_LINE_LEN, "line too long: {line}");
}
assert!(out.contains("FunctionDescriptor.of(\n"));
assert!(out.contains("ValueLayout.JAVA_INT,"));
assert!(out.trim_end().ends_with("),"));
}
#[test]
fn method_type_split() {
let long = " MethodType.methodType(int.class, MemorySegment.class, MemorySegment.class, MemorySegment.class, MemorySegment.class, MemorySegment.class, MemorySegment.class)),\n";
let out = wrap_long_java_lines(long);
for line in out.lines() {
assert!(line.len() <= MAX_LINE_LEN, "line too long ({}): {line}", line.len());
}
}
#[test]
fn method_signature_split() {
let long = " public static ExtractionResult extractFile(final java.nio.file.Path path, final String mimeType, final ExtractionConfig config) throws KreuzbergRsException {\n";
let out = wrap_long_java_lines(long);
for line in out.lines() {
assert!(line.len() <= MAX_LINE_LEN, "line too long ({}): {line}", line.len());
}
assert!(out.contains("extractFile(\n"));
assert!(out.contains("throws KreuzbergRsException"));
}
#[test]
fn record_field_annotations_split() {
let long = " @JsonInclude(JsonInclude.Include.NON_NULL) @JsonProperty(\"processingWarnings\") @JsonAlias(\"processing_warnings\") List<ProcessingWarning> processingWarnings,\n";
let out = wrap_long_java_lines(long);
for line in out.lines() {
assert!(line.len() <= MAX_LINE_LEN, "line too long ({}): {line}", line.len());
}
assert!(out.contains(" @JsonInclude(JsonInclude.Include.NON_NULL)\n"));
assert!(out.contains(" @JsonProperty(\"processingWarnings\")\n"));
assert!(out.trim_end().ends_with(","));
}
#[test]
fn ternary_msg_split() {
let long = " String msg = errPtr.equals(MemorySegment.NULL) ? \"registration failed (rc=\" + rc + \")\" : errPtr.reinterpret(Long.MAX_VALUE).getString(0);\n";
let out = wrap_long_java_lines(long);
for line in out.lines() {
assert!(line.len() <= MAX_LINE_LEN, "line too long ({}): {line}", line.len());
}
}
#[test]
fn javadoc_continuation_split() {
let long = " * {@literal @}param items Vector of {@code BatchBytesItem} structs, each containing content bytes, MIME type, and optional per-item configuration overrides.\n";
let out = wrap_long_java_lines(long);
for line in out.lines() {
assert!(line.len() <= MAX_LINE_LEN, "line too long ({}): {line}", line.len());
}
for line in out.lines() {
assert!(
line.trim_start().starts_with("* "),
"javadoc continuation lost its prefix: {line}"
);
}
}
#[test]
fn invoke_call_split() {
let long = " int rc = (int) NativeLib.KREUZBERG_RENDER_PDF_PAGE_TO_PNG.invoke(cpdfBytes, pageIndex, cdpi, cpassword, outPtrHolder, outLenHolder, outCapHolder, somethingExtraToForceWrap);\n";
let out = wrap_long_java_lines(long);
for line in out.lines() {
assert!(line.len() <= MAX_LINE_LEN, "line too long ({}): {line}", line.len());
}
}
}