seq_runtime/io.rs
1//! I/O Operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//!
5//! # Safety Contract
6//!
7//! **IMPORTANT:** These functions are designed to be called ONLY by compiler-generated code,
8//! not by end users or arbitrary C code. The compiler is responsible for:
9//!
10//! - Ensuring stack has correct types (verified by type checker)
11//! - Passing valid, null-terminated C strings to `push_string`
12//! - Never calling these functions directly from user code
13//!
14//! # String Handling
15//!
16//! String literals from the compiler must be valid UTF-8 C strings (null-terminated).
17//! Currently, each string literal is allocated as an owned `String`. See
18//! `docs/STRING_INTERNING_DESIGN.md` for discussion of future optimizations
19//! (interning, static references, etc.).
20
21use crate::stack::{Stack, pop, push};
22use crate::value::Value;
23use std::ffi::CStr;
24use std::io;
25use std::sync::LazyLock;
26
27/// Coroutine-aware stdout mutex.
28/// Uses may::sync::Mutex which yields the coroutine when contended instead of blocking the OS thread.
29/// By serializing access to stdout, we prevent RefCell borrow panics that occur when multiple
30/// coroutines on the same thread try to access stdout's internal RefCell concurrently.
31static STDOUT_MUTEX: LazyLock<may::sync::Mutex<()>> = LazyLock::new(|| may::sync::Mutex::new(()));
32
33/// Valid exit code range for Unix compatibility
34const EXIT_CODE_MIN: i64 = 0;
35const EXIT_CODE_MAX: i64 = 255;
36
37/// Write a string to stdout followed by a newline
38///
39/// Stack effect: ( str -- )
40///
41/// # Safety
42/// Stack must have a String value on top
43///
44/// # Concurrency
45/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
46/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
47/// This prevents RefCell borrow panics when multiple strands write concurrently.
48#[unsafe(no_mangle)]
49pub unsafe extern "C" fn patch_seq_write_line(stack: Stack) -> Stack {
50 assert!(!stack.is_null(), "write_line: stack is empty");
51
52 let (rest, value) = unsafe { pop(stack) };
53
54 match value {
55 Value::String(s) => {
56 // Acquire coroutine-aware mutex (yields if contended, doesn't block)
57 // This serializes access to stdout
58 let _guard = STDOUT_MUTEX.lock().unwrap();
59
60 // Write directly to fd 1 using libc to avoid Rust's std::io::stdout() RefCell.
61 // Rust's standard I/O uses RefCell which panics on concurrent access from
62 // multiple coroutines on the same thread.
63 // Byte-clean: write the underlying bytes directly to fd 1.
64 // libc::write takes a raw pointer + length, so we don't
65 // need a `&str`. Binary response bodies, ANSI escapes,
66 // arbitrary protocol output all flow through unchanged.
67 let bytes = s.as_bytes();
68 let newline = b"\n";
69 unsafe {
70 libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
71 libc::write(1, newline.as_ptr() as *const libc::c_void, newline.len());
72 }
73
74 rest
75 }
76 _ => panic!("write_line: expected String on stack, got {:?}", value),
77 }
78}
79
80/// Write a string to stdout without a trailing newline
81///
82/// Stack effect: ( str -- )
83///
84/// This is useful for protocols like LSP that require exact byte output
85/// without trailing newlines.
86///
87/// # Safety
88/// Stack must have a String value on top
89///
90/// # Concurrency
91/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
92/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
93#[unsafe(no_mangle)]
94pub unsafe extern "C" fn patch_seq_write(stack: Stack) -> Stack {
95 assert!(!stack.is_null(), "write: stack is empty");
96
97 let (rest, value) = unsafe { pop(stack) };
98
99 match value {
100 Value::String(s) => {
101 let _guard = STDOUT_MUTEX.lock().unwrap();
102
103 // Byte-clean: write the underlying bytes directly to fd 1.
104 let bytes = s.as_bytes();
105 unsafe {
106 libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
107 }
108
109 rest
110 }
111 _ => panic!("write: expected String on stack, got {:?}", value),
112 }
113}
114
115/// Read a line from stdin
116///
117/// Returns the line and a success flag:
118/// - ( line true ) on success (line includes trailing newline)
119/// - ( "" false ) on I/O error or EOF
120///
121/// Use `string.chomp` to remove trailing newlines if needed.
122///
123/// # Line Ending Normalization
124///
125/// Line endings are normalized to `\n` regardless of platform. Windows-style
126/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
127/// across different operating systems.
128///
129/// Stack effect: ( -- String Bool )
130///
131/// Errors are values, not crashes.
132///
133/// # Safety
134/// Always safe to call
135#[unsafe(no_mangle)]
136pub unsafe extern "C" fn patch_seq_read_line(stack: Stack) -> Stack {
137 use std::io::BufRead;
138
139 let stdin = io::stdin();
140 let mut line = String::new();
141
142 match stdin.lock().read_line(&mut line) {
143 Ok(0) => {
144 // EOF - return empty string and false
145 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
146 unsafe { push(stack, Value::Bool(false)) }
147 }
148 Ok(_) => {
149 // Normalize line endings: \r\n -> \n
150 if line.ends_with("\r\n") {
151 line.pop(); // remove \n
152 line.pop(); // remove \r
153 line.push('\n'); // add back \n
154 }
155 let stack = unsafe { push(stack, Value::String(line.into())) };
156 unsafe { push(stack, Value::Bool(true)) }
157 }
158 Err(_) => {
159 // I/O error - return empty string and false
160 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
161 unsafe { push(stack, Value::Bool(false)) }
162 }
163 }
164}
165
166/// Read a line from stdin with explicit EOF detection
167///
168/// Returns the line and a status flag:
169/// - ( line 1 ) on success (line includes trailing newline)
170/// - ( "" 0 ) at EOF or I/O error
171///
172/// Stack effect: ( -- String Int )
173///
174/// The `+` suffix indicates this returns a result pattern (value + status).
175/// Errors are values, not crashes.
176///
177/// # Line Ending Normalization
178///
179/// Line endings are normalized to `\n` regardless of platform. Windows-style
180/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
181/// across different operating systems.
182///
183/// # Safety
184/// Always safe to call
185#[unsafe(no_mangle)]
186pub unsafe extern "C" fn patch_seq_read_line_plus(stack: Stack) -> Stack {
187 use std::io::BufRead;
188
189 let stdin = io::stdin();
190 let mut line = String::new();
191
192 match stdin.lock().read_line(&mut line) {
193 Ok(0) => {
194 // EOF
195 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
196 unsafe { push(stack, Value::Int(0)) }
197 }
198 Ok(_) => {
199 // Normalize line endings: \r\n -> \n
200 if line.ends_with("\r\n") {
201 line.pop(); // remove \n
202 line.pop(); // remove \r
203 line.push('\n'); // add back \n
204 }
205 let stack = unsafe { push(stack, Value::String(line.into())) };
206 unsafe { push(stack, Value::Int(1)) }
207 }
208 Err(_) => {
209 // I/O error - treat like EOF
210 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
211 unsafe { push(stack, Value::Int(0)) }
212 }
213 }
214}
215
216/// Maximum bytes allowed for a single read_n call (10MB)
217/// This prevents accidental or malicious massive memory allocations.
218/// LSP messages are typically < 1MB, so 10MB provides generous headroom.
219const READ_N_MAX_BYTES: i64 = 10 * 1024 * 1024;
220
221/// Validates and extracts the byte count from a Value for read_n.
222/// Returns Ok(usize) on success, Err(message) on validation failure.
223fn validate_read_n_count(value: &Value) -> Result<usize, String> {
224 match value {
225 Value::Int(n) if *n < 0 => Err(format!(
226 "read_n: byte count must be non-negative, got {}",
227 n
228 )),
229 Value::Int(n) if *n > READ_N_MAX_BYTES => Err(format!(
230 "read_n: byte count {} exceeds maximum allowed ({})",
231 n, READ_N_MAX_BYTES
232 )),
233 Value::Int(n) => Ok(*n as usize),
234 _ => Err(format!("read_n: expected Int on stack, got {:?}", value)),
235 }
236}
237
238/// Read exactly N bytes from stdin
239///
240/// Returns the bytes read and a status flag:
241/// - ( string 1 ) on success (read all N bytes)
242/// - ( string 0 ) at EOF, partial read, or error (string may be shorter than N)
243///
244/// Stack effect: ( Int -- String Int )
245///
246/// Like `io.read-line+`, this returns a result pattern (value + status) to allow
247/// explicit EOF detection. The function name omits the `+` suffix for brevity
248/// since byte-count reads are inherently status-oriented.
249///
250/// Errors are values, not crashes.
251///
252/// This is used for protocols like LSP where message bodies are byte-counted
253/// and don't have trailing newlines.
254///
255/// # UTF-8 Handling
256/// The bytes are interpreted as UTF-8. Invalid UTF-8 sequences are replaced
257/// with the Unicode replacement character (U+FFFD). This is appropriate for
258/// text-based protocols like LSP but may not be suitable for binary data.
259///
260/// # Safety
261/// Stack must have an Int on top. The integer must be non-negative and
262/// not exceed READ_N_MAX_BYTES (10MB).
263#[unsafe(no_mangle)]
264pub unsafe extern "C" fn patch_seq_read_n(stack: Stack) -> Stack {
265 use std::io::Read;
266
267 assert!(!stack.is_null(), "read_n: stack is empty");
268
269 let (stack, value) = unsafe { pop(stack) };
270
271 // Validate input - return error status for invalid input
272 let n = match validate_read_n_count(&value) {
273 Ok(n) => n,
274 Err(_) => {
275 // Invalid input - return empty string and error status
276 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
277 return unsafe { push(stack, Value::Int(0)) };
278 }
279 };
280
281 let stdin = io::stdin();
282 let mut buffer = vec![0u8; n];
283 let mut total_read = 0;
284
285 {
286 let mut handle = stdin.lock();
287 while total_read < n {
288 match handle.read(&mut buffer[total_read..]) {
289 Ok(0) => break, // EOF
290 Ok(bytes_read) => total_read += bytes_read,
291 Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
292 Err(_) => break, // I/O error - stop reading, return what we have
293 }
294 }
295 }
296
297 // Truncate to actual bytes read
298 buffer.truncate(total_read);
299
300 // Convert to String (assuming UTF-8)
301 let s = String::from_utf8_lossy(&buffer).into_owned();
302
303 // Status: 1 if we read all N bytes, 0 otherwise
304 let status = if total_read == n { 1i64 } else { 0i64 };
305
306 let stack = unsafe { push(stack, Value::String(s.into())) };
307 unsafe { push(stack, Value::Int(status)) }
308}
309
310/// Convert an integer to a string
311///
312/// Stack effect: ( Int -- String )
313///
314/// # Safety
315/// Stack must have an Int value on top
316#[unsafe(no_mangle)]
317pub unsafe extern "C" fn patch_seq_int_to_string(stack: Stack) -> Stack {
318 assert!(!stack.is_null(), "int_to_string: stack is empty");
319
320 let (rest, value) = unsafe { pop(stack) };
321
322 match value {
323 Value::Int(n) => unsafe { push(rest, Value::String(n.to_string().into())) },
324 _ => panic!("int_to_string: expected Int on stack, got {:?}", value),
325 }
326}
327
328/// Push a C string literal onto the stack (for compiler-generated code).
329///
330/// Used by codegen paths whose source is always an ASCII identifier
331/// (variant tag comparisons, NULL-FFI fallbacks, etc.) — they have no
332/// embedded NULs, so the C-string convention is fine. Byte-clean
333/// string *literals* go through `patch_seq_push_string_bytes` instead.
334///
335/// In debug builds, this asserts the input is ASCII to catch a future
336/// codegen path that accidentally routes binary data here. In release
337/// the bytes are taken as-is — the comment above is the contract.
338///
339/// Stack effect: ( -- str )
340///
341/// # Safety
342/// The c_str pointer must be valid and null-terminated
343#[unsafe(no_mangle)]
344pub unsafe extern "C" fn patch_seq_push_string(stack: Stack, c_str: *const i8) -> Stack {
345 assert!(!c_str.is_null(), "push_string: null string pointer");
346
347 let bytes = unsafe { CStr::from_ptr(c_str).to_bytes() };
348 debug_assert!(
349 std::str::from_utf8(bytes).is_ok(),
350 "push_string: input must be valid UTF-8 (variant tags, identifier-shaped \
351 literals, FFI fallbacks); arbitrary binary string literals must use \
352 push_string_bytes instead",
353 );
354 let seqstr = crate::seqstring::global_bytes(bytes.to_vec());
355 unsafe { push(stack, Value::String(seqstr)) }
356}
357
358/// Push a byte-clean string literal onto the stack (for compiler-generated
359/// code). Carries an explicit length so embedded NULs and arbitrary bytes
360/// flow through unchanged — this is the codegen target for Seq string
361/// literals after the byte-cleanliness landing.
362///
363/// Stack effect: ( -- str )
364///
365/// # Safety
366/// `ptr` must point to at least `len` valid bytes. `ptr` may not be null
367/// unless `len` is zero.
368#[unsafe(no_mangle)]
369pub unsafe extern "C" fn patch_seq_push_string_bytes(
370 stack: Stack,
371 ptr: *const u8,
372 len: usize,
373) -> Stack {
374 let bytes = if len == 0 {
375 Vec::new()
376 } else {
377 assert!(
378 !ptr.is_null(),
379 "push_string_bytes: null pointer with non-zero length"
380 );
381 unsafe { std::slice::from_raw_parts(ptr, len).to_vec() }
382 };
383 let seqstr = crate::seqstring::global_bytes(bytes);
384 unsafe { push(stack, Value::String(seqstr)) }
385}
386
387/// Push a C string literal onto the stack as a Symbol (for compiler-generated code)
388///
389/// Stack effect: ( -- symbol )
390///
391/// # Safety
392/// The c_str pointer must be valid and null-terminated
393#[unsafe(no_mangle)]
394pub unsafe extern "C" fn patch_seq_push_symbol(stack: Stack, c_str: *const i8) -> Stack {
395 assert!(!c_str.is_null(), "push_symbol: null string pointer");
396
397 let s = unsafe {
398 CStr::from_ptr(c_str)
399 .to_str()
400 .expect("push_symbol: invalid UTF-8 in symbol literal")
401 .to_owned()
402 };
403
404 unsafe { push(stack, Value::Symbol(s.into())) }
405}
406
407/// Layout of static interned symbol data from LLVM IR
408///
409/// Matches the LLVM IR structure:
410/// `{ ptr, i64 len, i64 capacity, i8 global }`
411///
412/// # Safety Contract
413///
414/// This struct must ONLY be constructed by the compiler in static globals.
415/// Invariants that MUST hold:
416/// - `ptr` points to valid static UTF-8 string data with lifetime `'static`
417/// - `len` matches the actual byte length of the string
418/// - `capacity` MUST be 0 (marks symbol as interned/static)
419/// - `global` MUST be 1 (marks symbol as static allocation)
420///
421/// Violating these invariants causes undefined behavior (memory corruption,
422/// double-free, or null pointer dereference).
423#[repr(C)]
424pub struct InternedSymbolData {
425 ptr: *const u8,
426 len: i64,
427 capacity: i64, // MUST be 0 for interned symbols
428 global: i8, // MUST be 1 for interned symbols
429}
430
431/// Push an interned symbol onto the stack (Issue #166)
432///
433/// This pushes a compile-time symbol literal that shares static memory.
434/// The SeqString has capacity=0 to mark it as interned (never freed).
435///
436/// Stack effect: ( -- Symbol )
437///
438/// # Safety
439/// The symbol_data pointer must point to a valid static InternedSymbolData structure.
440#[unsafe(no_mangle)]
441pub unsafe extern "C" fn patch_seq_push_interned_symbol(
442 stack: Stack,
443 symbol_data: *const InternedSymbolData,
444) -> Stack {
445 assert!(
446 !symbol_data.is_null(),
447 "push_interned_symbol: null symbol data pointer"
448 );
449
450 let data = unsafe { &*symbol_data };
451
452 // Validate interned symbol invariants - these are safety-critical
453 // and must run in release builds to prevent memory corruption
454 assert!(!data.ptr.is_null(), "Interned symbol data pointer is null");
455 assert_eq!(data.capacity, 0, "Interned symbols must have capacity=0");
456 assert_ne!(data.global, 0, "Interned symbols must have global=1");
457
458 // Create SeqString that points to static data
459 // capacity=0 marks it as interned (Drop will skip deallocation)
460 // Safety: from_raw_parts requires valid ptr/len/capacity, which we trust
461 // from the LLVM-generated static data
462 let seq_str = unsafe {
463 crate::seqstring::SeqString::from_raw_parts(
464 data.ptr,
465 data.len as usize,
466 data.capacity as usize, // 0 for interned
467 data.global != 0, // true for interned
468 )
469 };
470
471 unsafe { push(stack, Value::Symbol(seq_str)) }
472}
473
474/// Push a SeqString value onto the stack
475///
476/// This is used when we already have a SeqString (e.g., from closures).
477/// Unlike push_string which takes a C string, this takes a SeqString by value.
478///
479/// Stack effect: ( -- String )
480///
481/// # Safety
482/// The SeqString must be valid. This is only called from LLVM-generated code, not actual C code.
483#[allow(improper_ctypes_definitions)]
484#[unsafe(no_mangle)]
485pub unsafe extern "C" fn patch_seq_push_seqstring(
486 stack: Stack,
487 seq_str: crate::seqstring::SeqString,
488) -> Stack {
489 unsafe { push(stack, Value::String(seq_str)) }
490}
491
492/// Convert a Symbol to a String
493///
494/// Stack effect: ( Symbol -- String )
495///
496/// # Safety
497/// Stack must have a Symbol on top.
498#[unsafe(no_mangle)]
499pub unsafe extern "C" fn patch_seq_symbol_to_string(stack: Stack) -> Stack {
500 assert!(!stack.is_null(), "symbol_to_string: stack is empty");
501
502 let (rest, value) = unsafe { pop(stack) };
503
504 match value {
505 Value::Symbol(s) => unsafe { push(rest, Value::String(s)) },
506 _ => panic!(
507 "symbol_to_string: expected Symbol on stack, got {:?}",
508 value
509 ),
510 }
511}
512
513/// Convert a String to a Symbol
514///
515/// Stack effect: ( String -- Symbol )
516///
517/// # Safety
518/// Stack must have a String on top.
519#[unsafe(no_mangle)]
520pub unsafe extern "C" fn patch_seq_string_to_symbol(stack: Stack) -> Stack {
521 assert!(!stack.is_null(), "string_to_symbol: stack is empty");
522
523 let (rest, value) = unsafe { pop(stack) };
524
525 match value {
526 Value::String(s) => unsafe { push(rest, Value::Symbol(s)) },
527 _ => panic!(
528 "string_to_symbol: expected String on stack, got {:?}",
529 value
530 ),
531 }
532}
533
534/// Exit the program with a status code
535///
536/// Stack effect: ( exit_code -- )
537///
538/// # Safety
539/// Stack must have an Int on top. Never returns.
540#[unsafe(no_mangle)]
541pub unsafe extern "C" fn patch_seq_exit_op(stack: Stack) -> ! {
542 assert!(!stack.is_null(), "exit_op: stack is empty");
543
544 let (_rest, value) = unsafe { pop(stack) };
545
546 match value {
547 Value::Int(code) => {
548 // Explicitly validate exit code is in Unix-compatible range
549 if !(EXIT_CODE_MIN..=EXIT_CODE_MAX).contains(&code) {
550 panic!(
551 "exit_op: exit code must be in range {}-{}, got {}",
552 EXIT_CODE_MIN, EXIT_CODE_MAX, code
553 );
554 }
555 std::process::exit(code as i32);
556 }
557 _ => panic!("exit_op: expected Int on stack, got {:?}", value),
558 }
559}
560
561// Public re-exports with short names for internal use
562pub use patch_seq_exit_op as exit_op;
563pub use patch_seq_int_to_string as int_to_string;
564pub use patch_seq_push_interned_symbol as push_interned_symbol;
565pub use patch_seq_push_seqstring as push_seqstring;
566pub use patch_seq_push_string as push_string;
567pub use patch_seq_push_symbol as push_symbol;
568pub use patch_seq_read_line as read_line;
569pub use patch_seq_read_line_plus as read_line_plus;
570pub use patch_seq_read_n as read_n;
571pub use patch_seq_string_to_symbol as string_to_symbol;
572pub use patch_seq_symbol_to_string as symbol_to_string;
573pub use patch_seq_write as write;
574pub use patch_seq_write_line as write_line;
575
576#[cfg(test)]
577mod tests;