1use std::ffi::OsString;
5use std::io::{self, Write};
6use std::path::PathBuf;
7
8use clap::Parser;
9use harfrust::{
10 BufferClusterLevel, BufferFlags, Direction, Feature, FontRef, Language, SerializeFlags,
11 ShaperData, ShaperInstance, UnicodeBuffer, Variation,
12};
13
14#[derive(Clone, Parser)]
15#[command(name = "hr-shape", version, about = "Shape text using HarfRust")]
16pub struct Args {
17 #[arg(value_name = "FONT-FILE")]
19 font_file_pos: Option<PathBuf>,
20
21 #[arg(value_name = "TEXT")]
23 text_pos: Option<String>,
24
25 #[arg(long)]
27 font_file: Option<PathBuf>,
28
29 #[arg(short = 'y', long, default_value_t = 0)]
31 face_index: u32,
32
33 #[arg(long)]
35 font_ptem: Option<f32>,
36
37 #[arg(long, value_delimiter = ',')]
39 variations: Vec<Variation>,
40
41 #[arg(long)]
43 named_instance: Option<usize>,
44
45 #[arg(long)]
47 text: Option<String>,
48
49 #[arg(long)]
51 text_file: Option<PathBuf>,
52
53 #[arg(short = 'u', long, value_parser = parse_unicodes)]
55 unicodes: Option<String>,
56
57 #[arg(long)]
59 text_before: Option<String>,
60
61 #[arg(long)]
63 text_after: Option<String>,
64
65 #[arg(long, value_parser = parse_unicodes)]
67 unicodes_before: Option<String>,
68
69 #[arg(long, value_parser = parse_unicodes)]
71 unicodes_after: Option<String>,
72
73 #[arg(long)]
75 single_par: bool,
76
77 #[arg(long)]
79 direction: Option<Direction>,
80
81 #[arg(long)]
83 language: Option<Language>,
84
85 #[arg(long)]
87 script: Option<harfrust::Script>,
88
89 #[arg(long, value_delimiter = ',')]
91 features: Vec<Feature>,
92
93 #[arg(long)]
95 utf8_clusters: bool,
96
97 #[arg(long, value_parser = parse_cluster, default_value = "0")]
99 cluster_level: BufferClusterLevel,
100
101 #[arg(long)]
103 bot: bool,
104
105 #[arg(long)]
107 eot: bool,
108
109 #[arg(long)]
111 preserve_default_ignorables: bool,
112
113 #[arg(long)]
115 remove_default_ignorables: bool,
116
117 #[arg(long)]
119 not_found_variation_selector_glyph: Option<u32>,
120
121 #[arg(long)]
123 unsafe_to_concat: bool,
124
125 #[arg(long)]
127 safe_to_insert_tatweel: bool,
128
129 #[arg(long)]
131 verify: bool,
132
133 #[arg(long)]
135 show_text: bool,
136
137 #[arg(long)]
139 show_unicode: bool,
140
141 #[arg(long)]
143 show_line_num: bool,
144
145 #[arg(long)]
147 verbose: bool,
148
149 #[arg(short = 'v', hide = true)]
151 short_v: bool,
152
153 #[arg(long)]
155 no_glyph_names: bool,
156
157 #[arg(long)]
159 no_positions: bool,
160
161 #[arg(long)]
163 no_advances: bool,
164
165 #[arg(long)]
167 no_clusters: bool,
168
169 #[arg(long)]
171 show_extents: bool,
172
173 #[arg(long)]
175 show_flags: bool,
176
177 #[arg(long)]
179 ned: bool,
180
181 #[arg(short = 'o', long)]
183 output_file: Option<PathBuf>,
184
185 #[arg(short = 'n', long, default_value_t = 1)]
187 num_iterations: u32,
188
189 #[arg(long, hide = true)]
191 font_funcs: Option<String>,
192}
193
194pub fn try_main() -> Result<(), String> {
200 let args = Args::parse();
201 run_and_write(args)
202}
203
204pub fn run_and_write(args: Args) -> Result<(), String> {
210 let output_file = args.output_file.clone();
211 let output = render(args)?;
212 write_output(&output, output_file.as_ref())?;
213 Ok(())
214}
215
216pub fn run_from_args<I, T>(args: I) -> Result<String, String>
224where
225 I: IntoIterator<Item = T>,
226 T: Into<OsString> + Clone,
227{
228 let args = Args::try_parse_from(args).map_err(|e| e.to_string())?;
229 let output = render(args.clone())?;
230 if let Some(path) = args.output_file.as_ref() {
231 write_output(&output, Some(path))?;
232 }
233 Ok(output)
234}
235
236pub fn shape(font_path: &str, text: &str, options: &str) -> Result<String, String> {
245 let unicodes: Vec<String> = text
246 .chars()
247 .map(|c| format!("U+{:04X}", c as u32))
248 .collect();
249 let mut args = vec![
250 "hr-shape".to_string(),
251 "--font-file".to_string(),
252 font_path.to_string(),
253 "-u".to_string(),
254 unicodes.join(","),
255 "--single-par".to_string(),
256 ];
257 args.extend(
258 options
259 .split(' ')
260 .filter(|s| !s.is_empty())
261 .map(ToOwned::to_owned),
262 );
263 run_from_args(args)
264}
265
266pub fn render(mut args: Args) -> Result<String, String> {
272 normalize_args(&mut args);
273
274 let mut font_set_as_free_arg = false;
275 let font_path = if let Some(ref path) = args.font_file {
276 path.clone()
277 } else if let Some(ref path) = args.font_file_pos {
278 font_set_as_free_arg = true;
279 path.clone()
280 } else {
281 return Err("Error: font is not set.".to_string());
282 };
283
284 if !font_path.exists() {
285 return Err(format!("Error: '{}' does not exist.", font_path.display()));
286 }
287
288 let font_data = std::fs::read(&font_path)
289 .map_err(|e| format!("Error: cannot read '{}': {e}", font_path.display()))?;
290 let font = FontRef::from_index(&font_data, args.face_index)
291 .map_err(|_| format!("Error: face index {} not found.", args.face_index))?;
292
293 let data = ShaperData::new(&font);
294 let variations = &args.variations;
295 let instance = match args.named_instance {
296 Some(idx) => {
297 let mut inst = ShaperInstance::from_named_instance(&font, idx);
298 if !variations.is_empty() {
299 inst.set_variations(&font, variations);
300 }
301 inst
302 }
303 None => ShaperInstance::from_variations(&font, variations),
304 };
305 let shaper = data
306 .shaper(&font)
307 .instance(Some(&instance))
308 .point_size(args.font_ptem)
309 .build();
310
311 let pre_context = args
312 .unicodes_before
313 .as_deref()
314 .or(args.text_before.as_deref());
315 let post_context = args
316 .unicodes_after
317 .as_deref()
318 .or(args.text_after.as_deref());
319
320 let mut buf_flags = BufferFlags::default();
321 if args.bot {
322 buf_flags |= BufferFlags::BEGINNING_OF_TEXT;
323 }
324 if args.eot {
325 buf_flags |= BufferFlags::END_OF_TEXT;
326 }
327 if args.preserve_default_ignorables {
328 buf_flags |= BufferFlags::PRESERVE_DEFAULT_IGNORABLES;
329 }
330 if args.remove_default_ignorables {
331 buf_flags |= BufferFlags::REMOVE_DEFAULT_IGNORABLES;
332 }
333 if args.unsafe_to_concat {
334 buf_flags |= BufferFlags::PRODUCE_UNSAFE_TO_CONCAT;
335 }
336 if args.safe_to_insert_tatweel {
337 buf_flags |= BufferFlags::PRODUCE_SAFE_TO_INSERT_TATWEEL;
338 }
339 if args.verify {
340 buf_flags |= BufferFlags::VERIFY;
341 }
342
343 let no_clusters = args.no_clusters || args.ned;
344 let format_flags = {
345 let mut f = SerializeFlags::default();
346 if args.no_glyph_names {
347 f |= SerializeFlags::NO_GLYPH_NAMES;
348 }
349 if no_clusters {
350 f |= SerializeFlags::NO_CLUSTERS;
351 }
352 if args.no_positions {
353 f |= SerializeFlags::NO_POSITIONS;
354 }
355 if args.no_advances || args.ned {
356 f |= SerializeFlags::NO_ADVANCES;
357 }
358 if args.show_extents {
359 f |= SerializeFlags::GLYPH_EXTENTS;
360 }
361 if args.show_flags {
362 f |= SerializeFlags::GLYPH_FLAGS;
363 }
364 f.bits()
365 };
366
367 let language = args.language;
368 let features = &args.features;
369
370 let text = if let Some(ref path) = args.text_file {
371 if path == &PathBuf::from("-") {
372 read_stdin()?
373 } else {
374 std::fs::read_to_string(path)
375 .map_err(|e| format!("Error: cannot read '{}': {e}", path.display()))?
376 }
377 } else if font_set_as_free_arg {
378 if let Some(ref text) = args.text_pos {
379 text.clone()
380 } else if let Some(ref text) = args.unicodes {
381 text.clone()
382 } else if let Some(ref text) = args.text {
383 text.clone()
384 } else {
385 read_stdin()?
386 }
387 } else if let Some(ref text) = args.font_file_pos {
388 text.to_string_lossy().to_string()
389 } else if let Some(ref text) = args.unicodes {
390 text.clone()
391 } else if let Some(ref text) = args.text {
392 text.clone()
393 } else {
394 read_stdin()?
395 };
396
397 let lines: Vec<&str> = if args.single_par {
398 vec![&text]
399 } else {
400 text.split('\n').filter(|s| !s.is_empty()).collect()
401 };
402
403 let mut output = Vec::new();
404 for (line_idx, text) in lines.iter().enumerate() {
405 let line_no = line_idx + 1;
406
407 if args.show_text {
408 if args.show_line_num {
409 write!(output, "{line_no}: ").unwrap();
410 }
411 writeln!(output, "({text})").unwrap();
412 }
413
414 if args.show_unicode {
415 if args.show_line_num {
416 write!(output, "{line_no}: ").unwrap();
417 }
418 writeln!(output, "{}", serialize_unicode(text, args.utf8_clusters)).unwrap();
419 }
420
421 let glyph_buffer = {
422 let mut result = None;
423 for _ in 0..args.num_iterations {
424 let mut buffer = UnicodeBuffer::new();
425 buffer.push_str(text);
426
427 if let Some(d) = args.direction {
428 buffer.set_direction(d);
429 }
430 if let Some(ref lang) = language {
431 buffer.set_language(lang.clone());
432 }
433 if let Some(script) = args.script {
434 buffer.set_script(script);
435 }
436
437 buffer.set_cluster_level(args.cluster_level);
438 if !args.utf8_clusters {
439 buffer.reset_clusters();
440 }
441 if let Some(g) = args.not_found_variation_selector_glyph {
442 buffer.set_not_found_variation_selector_glyph(g);
443 }
444
445 buffer.set_flags(buf_flags);
446
447 if let Some(ctx) = pre_context {
448 buffer.set_pre_context(ctx);
449 }
450 if let Some(ctx) = post_context {
451 buffer.set_post_context(ctx);
452 }
453
454 buffer.guess_segment_properties();
455
456 result = Some(shaper.shape(buffer, features));
457 }
458 result.unwrap()
459 };
460
461 if args.show_line_num {
462 write!(output, "{line_no}: ").unwrap();
463 }
464 writeln!(
465 output,
466 "{}",
467 glyph_buffer.serialize(&shaper, SerializeFlags::from_bits_truncate(format_flags))
468 )
469 .unwrap();
470 }
471
472 String::from_utf8(output).map_err(|e| format!("Error: invalid UTF-8 output: {e}"))
473}
474
475fn normalize_args(args: &mut Args) {
476 if args.short_v {
477 args.verbose = true;
478 args.ned = true;
479 }
480 if args.verbose {
481 args.show_text = true;
482 args.show_unicode = true;
483 args.show_line_num = true;
484 }
485}
486
487fn write_output(output: &str, output_file: Option<&PathBuf>) -> Result<(), String> {
488 if let Some(path) = output_file {
489 let mut file = std::fs::File::create(path)
490 .map_err(|e| format!("Error: cannot create '{}': {e}", path.display()))?;
491 file.write_all(output.as_bytes())
492 .map_err(|e| format!("Error: cannot write '{}': {e}", path.display()))?;
493 } else {
494 io::stdout()
495 .lock()
496 .write_all(output.as_bytes())
497 .map_err(|e| format!("Error: writing stdout: {e}"))?;
498 }
499
500 Ok(())
501}
502
503fn read_stdin() -> Result<String, String> {
504 io::read_to_string(io::stdin()).map_err(|e| format!("Error: reading stdin: {e}"))
505}
506
507fn parse_unicodes(s: &str) -> Result<String, String> {
508 let mut text = String::new();
509 for token in s.split([',', ' ', ';', '\t']) {
510 let token = token.trim();
511 if token.is_empty() {
512 continue;
513 }
514 let hex = token
515 .strip_prefix("U+")
516 .or_else(|| token.strip_prefix("u+"))
517 .or_else(|| token.strip_prefix("0x"))
518 .or_else(|| token.strip_prefix("0X"))
519 .unwrap_or(token);
520
521 let u = u32::from_str_radix(hex, 16)
522 .map_err(|_| format!("'{token}' is not a valid codepoint"))?;
523 let c = char::try_from(u).map_err(|_| format!("'{token}' is not a valid codepoint"))?;
524 text.push(c);
525 }
526 Ok(text)
527}
528
529fn parse_cluster(s: &str) -> Result<BufferClusterLevel, String> {
530 match s {
531 "0" => Ok(BufferClusterLevel::MonotoneGraphemes),
532 "1" => Ok(BufferClusterLevel::MonotoneCharacters),
533 "2" => Ok(BufferClusterLevel::Characters),
534 "3" => Ok(BufferClusterLevel::Graphemes),
535 _ => Err("invalid cluster level".to_string()),
536 }
537}
538
539fn serialize_unicode(text: &str, utf8_clusters: bool) -> String {
540 use std::fmt::Write;
541
542 let mut s = String::new();
543 let mut byte_offset = 0;
544 for (char_idx, c) in text.chars().enumerate() {
545 s.push(if s.is_empty() { '<' } else { '|' });
546 let cluster = if utf8_clusters { byte_offset } else { char_idx };
547 write!(s, "U+{:04X}={cluster}", c as u32).unwrap();
548 byte_offset += c.len_utf8();
549 }
550 if !s.is_empty() {
551 s.push('>');
552 }
553 s
554}