seq_runtime/io.rs
1//! I/O Operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//!
5//! # Safety Contract
6//!
7//! **IMPORTANT:** These functions are designed to be called ONLY by compiler-generated code,
8//! not by end users or arbitrary C code. The compiler is responsible for:
9//!
10//! - Ensuring stack has correct types (verified by type checker)
11//! - Passing valid, null-terminated C strings to `push_string`
12//! - Never calling these functions directly from user code
13//!
14//! # String Handling
15//!
16//! String literals from the compiler must be valid UTF-8 C strings (null-terminated).
17//! Currently, each string literal is allocated as an owned `String`. See
18//! `docs/STRING_INTERNING_DESIGN.md` for discussion of future optimizations
19//! (interning, static references, etc.).
20
21use crate::stack::{Stack, pop, push};
22use crate::value::Value;
23use std::ffi::CStr;
24use std::io;
25use std::sync::LazyLock;
26
27/// Coroutine-aware stdout mutex.
28/// Uses may::sync::Mutex which yields the coroutine when contended instead of blocking the OS thread.
29/// By serializing access to stdout, we prevent RefCell borrow panics that occur when multiple
30/// coroutines on the same thread try to access stdout's internal RefCell concurrently.
31static STDOUT_MUTEX: LazyLock<may::sync::Mutex<()>> = LazyLock::new(|| may::sync::Mutex::new(()));
32
33/// Valid exit code range for Unix compatibility
34const EXIT_CODE_MIN: i64 = 0;
35const EXIT_CODE_MAX: i64 = 255;
36
37/// Write a string to stdout followed by a newline
38///
39/// Stack effect: ( str -- )
40///
41/// # Safety
42/// Stack must have a String value on top
43///
44/// # Concurrency
45/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
46/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
47/// This prevents RefCell borrow panics when multiple strands write concurrently.
48#[unsafe(no_mangle)]
49pub unsafe extern "C" fn patch_seq_write_line(stack: Stack) -> Stack {
50 assert!(!stack.is_null(), "write_line: stack is empty");
51
52 let (rest, value) = unsafe { pop(stack) };
53
54 match value {
55 Value::String(s) => {
56 // Acquire coroutine-aware mutex (yields if contended, doesn't block)
57 // This serializes access to stdout
58 let _guard = STDOUT_MUTEX.lock().unwrap();
59
60 // Write directly to fd 1 using libc to avoid Rust's std::io::stdout() RefCell.
61 // Rust's standard I/O uses RefCell which panics on concurrent access from
62 // multiple coroutines on the same thread.
63 // Byte-clean: write the underlying bytes directly to fd 1.
64 // libc::write takes a raw pointer + length, so we don't
65 // need a `&str`. Binary response bodies, ANSI escapes,
66 // arbitrary protocol output all flow through unchanged.
67 let bytes = s.as_bytes();
68 let newline = b"\n";
69 unsafe {
70 libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
71 libc::write(1, newline.as_ptr() as *const libc::c_void, newline.len());
72 }
73
74 rest
75 }
76 _ => panic!("write_line: expected String on stack, got {:?}", value),
77 }
78}
79
80/// Write a string to stdout without a trailing newline
81///
82/// Stack effect: ( str -- )
83///
84/// This is useful for protocols like LSP that require exact byte output
85/// without trailing newlines.
86///
87/// # Safety
88/// Stack must have a String value on top
89///
90/// # Concurrency
91/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
92/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
93#[unsafe(no_mangle)]
94pub unsafe extern "C" fn patch_seq_write(stack: Stack) -> Stack {
95 assert!(!stack.is_null(), "write: stack is empty");
96
97 let (rest, value) = unsafe { pop(stack) };
98
99 match value {
100 Value::String(s) => {
101 let _guard = STDOUT_MUTEX.lock().unwrap();
102
103 // Byte-clean: write the underlying bytes directly to fd 1.
104 let bytes = s.as_bytes();
105 unsafe {
106 libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
107 }
108
109 rest
110 }
111 _ => panic!("write: expected String on stack, got {:?}", value),
112 }
113}
114
115/// Read a line from stdin
116///
117/// Returns the line and a success flag:
118/// - ( line true ) on success (line includes trailing newline)
119/// - ( "" false ) on I/O error or EOF
120///
121/// Use `string.chomp` to remove trailing newlines if needed.
122///
123/// # Line Ending Normalization
124///
125/// Line endings are normalized to `\n` regardless of platform. Windows-style
126/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
127/// across different operating systems.
128///
129/// Stack effect: ( -- String Bool )
130///
131/// Errors are values, not crashes.
132///
133/// # Safety
134/// Always safe to call
135#[unsafe(no_mangle)]
136pub unsafe extern "C" fn patch_seq_read_line(stack: Stack) -> Stack {
137 use std::io::BufRead;
138
139 let stdin = io::stdin();
140 let mut line = String::new();
141
142 match stdin.lock().read_line(&mut line) {
143 Ok(0) => {
144 // EOF - return empty string and false
145 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
146 unsafe { push(stack, Value::Bool(false)) }
147 }
148 Ok(_) => {
149 // Normalize line endings: \r\n -> \n
150 if line.ends_with("\r\n") {
151 line.pop(); // remove \n
152 line.pop(); // remove \r
153 line.push('\n'); // add back \n
154 }
155 let stack = unsafe { push(stack, Value::String(line.into())) };
156 unsafe { push(stack, Value::Bool(true)) }
157 }
158 Err(_) => {
159 // I/O error - return empty string and false
160 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
161 unsafe { push(stack, Value::Bool(false)) }
162 }
163 }
164}
165
166/// Maximum bytes allowed for a single read_n call (10MB)
167/// This prevents accidental or malicious massive memory allocations.
168/// LSP messages are typically < 1MB, so 10MB provides generous headroom.
169const READ_N_MAX_BYTES: i64 = 10 * 1024 * 1024;
170
171/// Validates and extracts the byte count from a Value for read_n.
172/// Returns Ok(usize) on success, Err(message) on validation failure.
173fn validate_read_n_count(value: &Value) -> Result<usize, String> {
174 match value {
175 Value::Int(n) if *n < 0 => Err(format!(
176 "read_n: byte count must be non-negative, got {}",
177 n
178 )),
179 Value::Int(n) if *n > READ_N_MAX_BYTES => Err(format!(
180 "read_n: byte count {} exceeds maximum allowed ({})",
181 n, READ_N_MAX_BYTES
182 )),
183 Value::Int(n) => Ok(*n as usize),
184 _ => Err(format!("read_n: expected Int on stack, got {:?}", value)),
185 }
186}
187
188/// Read exactly N bytes from stdin
189///
190/// Returns the bytes read and a status flag:
191/// - ( string 1 ) on success (read all N bytes)
192/// - ( string 0 ) at EOF, partial read, or error (string may be shorter than N)
193///
194/// Stack effect: ( Int -- String Int )
195///
196/// Like `io.read-line`, this returns a result pattern (value + status) to allow
197/// explicit EOF detection. The function name omits the `+` suffix for brevity
198/// since byte-count reads are inherently status-oriented.
199///
200/// Errors are values, not crashes.
201///
202/// This is used for protocols like LSP where message bodies are byte-counted
203/// and don't have trailing newlines.
204///
205/// # UTF-8 Handling
206/// The bytes are interpreted as UTF-8. Invalid UTF-8 sequences are replaced
207/// with the Unicode replacement character (U+FFFD). This is appropriate for
208/// text-based protocols like LSP but may not be suitable for binary data.
209///
210/// # Safety
211/// Stack must have an Int on top. The integer must be non-negative and
212/// not exceed READ_N_MAX_BYTES (10MB).
213#[unsafe(no_mangle)]
214pub unsafe extern "C" fn patch_seq_read_n(stack: Stack) -> Stack {
215 use std::io::Read;
216
217 assert!(!stack.is_null(), "read_n: stack is empty");
218
219 let (stack, value) = unsafe { pop(stack) };
220
221 // Validate input - return error status for invalid input
222 let n = match validate_read_n_count(&value) {
223 Ok(n) => n,
224 Err(_) => {
225 // Invalid input - return empty string and error status
226 let stack = unsafe { push(stack, Value::String("".to_string().into())) };
227 return unsafe { push(stack, Value::Int(0)) };
228 }
229 };
230
231 let stdin = io::stdin();
232 let mut buffer = vec![0u8; n];
233 let mut total_read = 0;
234
235 {
236 let mut handle = stdin.lock();
237 while total_read < n {
238 match handle.read(&mut buffer[total_read..]) {
239 Ok(0) => break, // EOF
240 Ok(bytes_read) => total_read += bytes_read,
241 Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
242 Err(_) => break, // I/O error - stop reading, return what we have
243 }
244 }
245 }
246
247 // Truncate to actual bytes read
248 buffer.truncate(total_read);
249
250 // Convert to String (assuming UTF-8)
251 let s = String::from_utf8_lossy(&buffer).into_owned();
252
253 // Status: 1 if we read all N bytes, 0 otherwise
254 let status = if total_read == n { 1i64 } else { 0i64 };
255
256 let stack = unsafe { push(stack, Value::String(s.into())) };
257 unsafe { push(stack, Value::Int(status)) }
258}
259
260/// Convert an integer to a string
261///
262/// Stack effect: ( Int -- String )
263///
264/// # Safety
265/// Stack must have an Int value on top
266#[unsafe(no_mangle)]
267pub unsafe extern "C" fn patch_seq_int_to_string(stack: Stack) -> Stack {
268 assert!(!stack.is_null(), "int_to_string: stack is empty");
269
270 let (rest, value) = unsafe { pop(stack) };
271
272 match value {
273 Value::Int(n) => unsafe { push(rest, Value::String(n.to_string().into())) },
274 _ => panic!("int_to_string: expected Int on stack, got {:?}", value),
275 }
276}
277
278/// Push a C string literal onto the stack (for compiler-generated code).
279///
280/// Used by codegen paths whose source is always an ASCII identifier
281/// (variant tag comparisons, NULL-FFI fallbacks, etc.) — they have no
282/// embedded NULs, so the C-string convention is fine. Byte-clean
283/// string *literals* go through `patch_seq_push_string_bytes` instead.
284///
285/// In debug builds, this asserts the input is ASCII to catch a future
286/// codegen path that accidentally routes binary data here. In release
287/// the bytes are taken as-is — the comment above is the contract.
288///
289/// Stack effect: ( -- str )
290///
291/// # Safety
292/// The c_str pointer must be valid and null-terminated
293#[unsafe(no_mangle)]
294pub unsafe extern "C" fn patch_seq_push_string(stack: Stack, c_str: *const i8) -> Stack {
295 assert!(!c_str.is_null(), "push_string: null string pointer");
296
297 let bytes = unsafe { CStr::from_ptr(c_str).to_bytes() };
298 debug_assert!(
299 std::str::from_utf8(bytes).is_ok(),
300 "push_string: input must be valid UTF-8 (variant tags, identifier-shaped \
301 literals, FFI fallbacks); arbitrary binary string literals must use \
302 push_string_bytes instead",
303 );
304 let seqstr = crate::seqstring::global_bytes(bytes.to_vec());
305 unsafe { push(stack, Value::String(seqstr)) }
306}
307
308/// Push a byte-clean string literal onto the stack (for compiler-generated
309/// code). Carries an explicit length so embedded NULs and arbitrary bytes
310/// flow through unchanged — this is the codegen target for Seq string
311/// literals after the byte-cleanliness landing.
312///
313/// Stack effect: ( -- str )
314///
315/// # Safety
316/// `ptr` must point to at least `len` valid bytes. `ptr` may not be null
317/// unless `len` is zero.
318#[unsafe(no_mangle)]
319pub unsafe extern "C" fn patch_seq_push_string_bytes(
320 stack: Stack,
321 ptr: *const u8,
322 len: usize,
323) -> Stack {
324 let bytes = if len == 0 {
325 Vec::new()
326 } else {
327 assert!(
328 !ptr.is_null(),
329 "push_string_bytes: null pointer with non-zero length"
330 );
331 unsafe { std::slice::from_raw_parts(ptr, len).to_vec() }
332 };
333 let seqstr = crate::seqstring::global_bytes(bytes);
334 unsafe { push(stack, Value::String(seqstr)) }
335}
336
337/// Push a C string literal onto the stack as a Symbol (for compiler-generated code)
338///
339/// Stack effect: ( -- symbol )
340///
341/// # Safety
342/// The c_str pointer must be valid and null-terminated
343#[unsafe(no_mangle)]
344pub unsafe extern "C" fn patch_seq_push_symbol(stack: Stack, c_str: *const i8) -> Stack {
345 assert!(!c_str.is_null(), "push_symbol: null string pointer");
346
347 let s = unsafe {
348 CStr::from_ptr(c_str)
349 .to_str()
350 .expect("push_symbol: invalid UTF-8 in symbol literal")
351 .to_owned()
352 };
353
354 unsafe { push(stack, Value::Symbol(s.into())) }
355}
356
357/// Layout of static interned symbol data from LLVM IR
358///
359/// Matches the LLVM IR structure:
360/// `{ ptr, i64 len, i64 capacity, i8 global }`
361///
362/// # Safety Contract
363///
364/// This struct must ONLY be constructed by the compiler in static globals.
365/// Invariants that MUST hold:
366/// - `ptr` points to valid static UTF-8 string data with lifetime `'static`
367/// - `len` matches the actual byte length of the string
368/// - `capacity` MUST be 0 (marks symbol as interned/static)
369/// - `global` MUST be 1 (marks symbol as static allocation)
370///
371/// Violating these invariants causes undefined behavior (memory corruption,
372/// double-free, or null pointer dereference).
373#[repr(C)]
374pub struct InternedSymbolData {
375 ptr: *const u8,
376 len: i64,
377 capacity: i64, // MUST be 0 for interned symbols
378 global: i8, // MUST be 1 for interned symbols
379}
380
381/// Push an interned symbol onto the stack (Issue #166)
382///
383/// This pushes a compile-time symbol literal that shares static memory.
384/// The SeqString has capacity=0 to mark it as interned (never freed).
385///
386/// Stack effect: ( -- Symbol )
387///
388/// # Safety
389/// The symbol_data pointer must point to a valid static InternedSymbolData structure.
390#[unsafe(no_mangle)]
391pub unsafe extern "C" fn patch_seq_push_interned_symbol(
392 stack: Stack,
393 symbol_data: *const InternedSymbolData,
394) -> Stack {
395 assert!(
396 !symbol_data.is_null(),
397 "push_interned_symbol: null symbol data pointer"
398 );
399
400 let data = unsafe { &*symbol_data };
401
402 // Validate interned symbol invariants - these are safety-critical
403 // and must run in release builds to prevent memory corruption
404 assert!(!data.ptr.is_null(), "Interned symbol data pointer is null");
405 assert_eq!(data.capacity, 0, "Interned symbols must have capacity=0");
406 assert_ne!(data.global, 0, "Interned symbols must have global=1");
407
408 // Create SeqString that points to static data
409 // capacity=0 marks it as interned (Drop will skip deallocation)
410 // Safety: from_raw_parts requires valid ptr/len/capacity, which we trust
411 // from the LLVM-generated static data
412 let seq_str = unsafe {
413 crate::seqstring::SeqString::from_raw_parts(
414 data.ptr,
415 data.len as usize,
416 data.capacity as usize, // 0 for interned
417 data.global != 0, // true for interned
418 )
419 };
420
421 unsafe { push(stack, Value::Symbol(seq_str)) }
422}
423
424/// Push a SeqString value onto the stack
425///
426/// This is used when we already have a SeqString (e.g., from closures).
427/// Unlike push_string which takes a C string, this takes a SeqString by value.
428///
429/// Stack effect: ( -- String )
430///
431/// # Safety
432/// The SeqString must be valid. This is only called from LLVM-generated code, not actual C code.
433#[allow(improper_ctypes_definitions)]
434#[unsafe(no_mangle)]
435pub unsafe extern "C" fn patch_seq_push_seqstring(
436 stack: Stack,
437 seq_str: crate::seqstring::SeqString,
438) -> Stack {
439 unsafe { push(stack, Value::String(seq_str)) }
440}
441
442/// Convert a Symbol to a String
443///
444/// Stack effect: ( Symbol -- String )
445///
446/// # Safety
447/// Stack must have a Symbol on top.
448#[unsafe(no_mangle)]
449pub unsafe extern "C" fn patch_seq_symbol_to_string(stack: Stack) -> Stack {
450 assert!(!stack.is_null(), "symbol_to_string: stack is empty");
451
452 let (rest, value) = unsafe { pop(stack) };
453
454 match value {
455 Value::Symbol(s) => unsafe { push(rest, Value::String(s)) },
456 _ => panic!(
457 "symbol_to_string: expected Symbol on stack, got {:?}",
458 value
459 ),
460 }
461}
462
463/// Convert a String to a Symbol
464///
465/// Stack effect: ( String -- Symbol )
466///
467/// # Safety
468/// Stack must have a String on top.
469#[unsafe(no_mangle)]
470pub unsafe extern "C" fn patch_seq_string_to_symbol(stack: Stack) -> Stack {
471 assert!(!stack.is_null(), "string_to_symbol: stack is empty");
472
473 let (rest, value) = unsafe { pop(stack) };
474
475 match value {
476 Value::String(s) => unsafe { push(rest, Value::Symbol(s)) },
477 _ => panic!(
478 "string_to_symbol: expected String on stack, got {:?}",
479 value
480 ),
481 }
482}
483
484/// Exit the program with a status code
485///
486/// Stack effect: ( exit_code -- )
487///
488/// # Safety
489/// Stack must have an Int on top. Never returns.
490#[unsafe(no_mangle)]
491pub unsafe extern "C" fn patch_seq_exit_op(stack: Stack) -> ! {
492 assert!(!stack.is_null(), "exit_op: stack is empty");
493
494 let (_rest, value) = unsafe { pop(stack) };
495
496 match value {
497 Value::Int(code) => {
498 // Explicitly validate exit code is in Unix-compatible range
499 if !(EXIT_CODE_MIN..=EXIT_CODE_MAX).contains(&code) {
500 panic!(
501 "exit_op: exit code must be in range {}-{}, got {}",
502 EXIT_CODE_MIN, EXIT_CODE_MAX, code
503 );
504 }
505 std::process::exit(code as i32);
506 }
507 _ => panic!("exit_op: expected Int on stack, got {:?}", value),
508 }
509}
510
511// Public re-exports with short names for internal use
512pub use patch_seq_exit_op as exit_op;
513pub use patch_seq_int_to_string as int_to_string;
514pub use patch_seq_push_interned_symbol as push_interned_symbol;
515pub use patch_seq_push_seqstring as push_seqstring;
516pub use patch_seq_push_string as push_string;
517pub use patch_seq_push_symbol as push_symbol;
518pub use patch_seq_read_line as read_line;
519pub use patch_seq_read_n as read_n;
520pub use patch_seq_string_to_symbol as string_to_symbol;
521pub use patch_seq_symbol_to_string as symbol_to_string;
522pub use patch_seq_write as write;
523pub use patch_seq_write_line as write_line;
524
525#[cfg(test)]
526mod tests;