pub(super) fn strip_harmony_separators(text: &str) -> String {
let mut out = String::with_capacity(text.len());
let mut in_fence = false;
for line in text.split_inclusive('\n') {
if line.trim_start().starts_with("```") {
in_fence = !in_fence;
out.push_str(line);
continue;
}
if in_fence {
out.push_str(line);
} else {
out.push_str(&strip_harmony_from_segment(line));
}
}
out
}
fn strip_harmony_from_segment(s: &str) -> String {
let bytes = s.as_bytes();
let mut out = String::with_capacity(s.len());
let mut i = 0;
let mut last_emit = 0;
while i < bytes.len() {
if bytes[i] == b'<' {
if let Some(end) = try_match_harmony_run(bytes, i) {
out.push_str(&s[last_emit..i]);
last_emit = end;
i = end;
continue;
}
}
i += 1;
}
out.push_str(&s[last_emit..]);
out
}
fn try_match_harmony_run(bytes: &[u8], start: usize) -> Option<usize> {
let after_first = try_match_harmony(bytes, start)?;
let mut i = after_first;
while i < bytes.len() && (bytes[i].is_ascii_lowercase() || bytes[i] == b'_') {
i += 1;
}
if i > after_first && i < bytes.len() && bytes[i] == b'<' {
if let Some(end) = try_match_harmony(bytes, i) {
return Some(end);
}
}
Some(after_first)
}
fn try_match_harmony(bytes: &[u8], start: usize) -> Option<usize> {
debug_assert_eq!(bytes[start], b'<');
let mut i = start + 1;
let mut has_pipe = false;
if i < bytes.len() && bytes[i] == b'|' {
has_pipe = true;
i += 1;
}
let name_start = i;
while i < bytes.len() && (bytes[i].is_ascii_lowercase() || bytes[i] == b'_') {
i += 1;
}
if i == name_start {
return None;
}
if i < bytes.len() && bytes[i] == b'|' {
has_pipe = true;
i += 1;
}
if i < bytes.len() && bytes[i] == b'>' && has_pipe {
Some(i + 1)
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strip_harmony_removes_qwen36_channel_pair() {
let input = "<|channel>thought<channel|>\nthe actual reply";
assert_eq!(strip_harmony_separators(input), "\nthe actual reply");
}
#[test]
fn strip_harmony_removes_symmetric_markers() {
let input = "<|channel|>analysis<|message|>real content<|end|>";
assert_eq!(strip_harmony_separators(input), "real content");
}
#[test]
fn strip_harmony_handles_underscored_names() {
let input = "before <|tool_call|> after";
assert_eq!(strip_harmony_separators(input), "before after");
}
#[test]
fn strip_harmony_leaves_html_tags_alone() {
let input = "<a href=\"x\"><b>bold</b></a>";
assert_eq!(strip_harmony_separators(input), input);
}
#[test]
fn strip_harmony_leaves_plain_angle_brackets_alone() {
let input = "if x < y && y > z";
assert_eq!(strip_harmony_separators(input), input);
}
#[test]
fn strip_harmony_preserves_fenced_code() {
let input = "<|end|> outside\n```\n<|channel|>thought<|message|>\n```\n<|end|> after";
let expected = " outside\n```\n<|channel|>thought<|message|>\n```\n after";
assert_eq!(strip_harmony_separators(input), expected);
}
#[test]
fn strip_harmony_handles_marker_at_start_of_line() {
let input = "<|end|>";
assert_eq!(strip_harmony_separators(input), "");
}
#[test]
fn strip_harmony_preserves_multibyte_utf8() {
let input = "héllo 🦀 <|end|> wörld";
assert_eq!(strip_harmony_separators(input), "héllo 🦀 wörld");
}
#[test]
fn strip_harmony_unbalanced_marker_with_uppercase_is_left_alone() {
let input = "<|Channel|>";
assert_eq!(strip_harmony_separators(input), input);
}
#[test]
fn strip_harmony_no_op_on_clean_text() {
let input = "hello world\nthis is fine";
assert_eq!(strip_harmony_separators(input), input);
}
}