Skip to main content

seq_runtime/
io.rs

1//! I/O Operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//!
5//! # Safety Contract
6//!
7//! **IMPORTANT:** These functions are designed to be called ONLY by compiler-generated code,
8//! not by end users or arbitrary C code. The compiler is responsible for:
9//!
10//! - Ensuring stack has correct types (verified by type checker)
11//! - Passing valid, null-terminated C strings to `push_string`
12//! - Never calling these functions directly from user code
13//!
14//! # String Handling
15//!
16//! String literals from the compiler must be valid UTF-8 C strings (null-terminated).
17//! Currently, each string literal is allocated as an owned `String`. See
18//! `docs/STRING_INTERNING_DESIGN.md` for discussion of future optimizations
19//! (interning, static references, etc.).
20
21use crate::stack::{Stack, pop, push};
22use crate::value::Value;
23use std::ffi::CStr;
24use std::io;
25use std::sync::LazyLock;
26
27/// Coroutine-aware stdout mutex.
28/// Uses may::sync::Mutex which yields the coroutine when contended instead of blocking the OS thread.
29/// By serializing access to stdout, we prevent RefCell borrow panics that occur when multiple
30/// coroutines on the same thread try to access stdout's internal RefCell concurrently.
31static STDOUT_MUTEX: LazyLock<may::sync::Mutex<()>> = LazyLock::new(|| may::sync::Mutex::new(()));
32
33/// Valid exit code range for Unix compatibility
34const EXIT_CODE_MIN: i64 = 0;
35const EXIT_CODE_MAX: i64 = 255;
36
37/// Write a string to stdout followed by a newline
38///
39/// Stack effect: ( str -- )
40///
41/// # Safety
42/// Stack must have a String value on top
43///
44/// # Concurrency
45/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
46/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
47/// This prevents RefCell borrow panics when multiple strands write concurrently.
48#[unsafe(no_mangle)]
49pub unsafe extern "C" fn patch_seq_write_line(stack: Stack) -> Stack {
50    assert!(!stack.is_null(), "write_line: stack is empty");
51
52    let (rest, value) = unsafe { pop(stack) };
53
54    match value {
55        Value::String(s) => {
56            // Acquire coroutine-aware mutex (yields if contended, doesn't block)
57            // This serializes access to stdout
58            let _guard = STDOUT_MUTEX.lock().unwrap();
59
60            // Write directly to fd 1 using libc to avoid Rust's std::io::stdout() RefCell.
61            // Rust's standard I/O uses RefCell which panics on concurrent access from
62            // multiple coroutines on the same thread.
63            // Byte-clean: write the underlying bytes directly to fd 1.
64            // libc::write takes a raw pointer + length, so we don't
65            // need a `&str`. Binary response bodies, ANSI escapes,
66            // arbitrary protocol output all flow through unchanged.
67            let bytes = s.as_bytes();
68            let newline = b"\n";
69            unsafe {
70                libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
71                libc::write(1, newline.as_ptr() as *const libc::c_void, newline.len());
72            }
73
74            rest
75        }
76        _ => panic!("write_line: expected String on stack, got {:?}", value),
77    }
78}
79
80/// Write a string to stdout without a trailing newline
81///
82/// Stack effect: ( str -- )
83///
84/// This is useful for protocols like LSP that require exact byte output
85/// without trailing newlines.
86///
87/// # Safety
88/// Stack must have a String value on top
89///
90/// # Concurrency
91/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
92/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
93#[unsafe(no_mangle)]
94pub unsafe extern "C" fn patch_seq_write(stack: Stack) -> Stack {
95    assert!(!stack.is_null(), "write: stack is empty");
96
97    let (rest, value) = unsafe { pop(stack) };
98
99    match value {
100        Value::String(s) => {
101            let _guard = STDOUT_MUTEX.lock().unwrap();
102
103            // Byte-clean: write the underlying bytes directly to fd 1.
104            let bytes = s.as_bytes();
105            unsafe {
106                libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
107            }
108
109            rest
110        }
111        _ => panic!("write: expected String on stack, got {:?}", value),
112    }
113}
114
115/// Read a line from stdin
116///
117/// Returns the line and a success flag:
118/// - ( line true ) on success (line includes trailing newline)
119/// - ( "" false ) on I/O error or EOF
120///
121/// Use `string.chomp` to remove trailing newlines if needed.
122///
123/// # Line Ending Normalization
124///
125/// Line endings are normalized to `\n` regardless of platform. Windows-style
126/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
127/// across different operating systems.
128///
129/// Stack effect: ( -- String Bool )
130///
131/// Errors are values, not crashes.
132///
133/// # Safety
134/// Always safe to call
135#[unsafe(no_mangle)]
136pub unsafe extern "C" fn patch_seq_read_line(stack: Stack) -> Stack {
137    use std::io::BufRead;
138
139    let stdin = io::stdin();
140    let mut line = String::new();
141
142    match stdin.lock().read_line(&mut line) {
143        Ok(0) => {
144            // EOF - return empty string and false
145            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
146            unsafe { push(stack, Value::Bool(false)) }
147        }
148        Ok(_) => {
149            // Normalize line endings: \r\n -> \n
150            if line.ends_with("\r\n") {
151                line.pop(); // remove \n
152                line.pop(); // remove \r
153                line.push('\n'); // add back \n
154            }
155            let stack = unsafe { push(stack, Value::String(line.into())) };
156            unsafe { push(stack, Value::Bool(true)) }
157        }
158        Err(_) => {
159            // I/O error - return empty string and false
160            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
161            unsafe { push(stack, Value::Bool(false)) }
162        }
163    }
164}
165
166/// Maximum bytes allowed for a single read_n call (10MB)
167/// This prevents accidental or malicious massive memory allocations.
168/// LSP messages are typically < 1MB, so 10MB provides generous headroom.
169const READ_N_MAX_BYTES: i64 = 10 * 1024 * 1024;
170
171/// Validates and extracts the byte count from a Value for read_n.
172/// Returns Ok(usize) on success, Err(message) on validation failure.
173fn validate_read_n_count(value: &Value) -> Result<usize, String> {
174    match value {
175        Value::Int(n) if *n < 0 => Err(format!(
176            "read_n: byte count must be non-negative, got {}",
177            n
178        )),
179        Value::Int(n) if *n > READ_N_MAX_BYTES => Err(format!(
180            "read_n: byte count {} exceeds maximum allowed ({})",
181            n, READ_N_MAX_BYTES
182        )),
183        Value::Int(n) => Ok(*n as usize),
184        _ => Err(format!("read_n: expected Int on stack, got {:?}", value)),
185    }
186}
187
188/// Read exactly N bytes from stdin
189///
190/// Returns the bytes read and a status flag:
191/// - ( string 1 ) on success (read all N bytes)
192/// - ( string 0 ) at EOF, partial read, or error (string may be shorter than N)
193///
194/// Stack effect: ( Int -- String Int )
195///
196/// Like `io.read-line`, this returns a result pattern (value + status) to allow
197/// explicit EOF detection. The function name omits the `+` suffix for brevity
198/// since byte-count reads are inherently status-oriented.
199///
200/// Errors are values, not crashes.
201///
202/// This is used for protocols like LSP where message bodies are byte-counted
203/// and don't have trailing newlines.
204///
205/// # UTF-8 Handling
206/// The bytes are interpreted as UTF-8. Invalid UTF-8 sequences are replaced
207/// with the Unicode replacement character (U+FFFD). This is appropriate for
208/// text-based protocols like LSP but may not be suitable for binary data.
209///
210/// # Safety
211/// Stack must have an Int on top. The integer must be non-negative and
212/// not exceed READ_N_MAX_BYTES (10MB).
213#[unsafe(no_mangle)]
214pub unsafe extern "C" fn patch_seq_read_n(stack: Stack) -> Stack {
215    use std::io::Read;
216
217    assert!(!stack.is_null(), "read_n: stack is empty");
218
219    let (stack, value) = unsafe { pop(stack) };
220
221    // Validate input - return error status for invalid input
222    let n = match validate_read_n_count(&value) {
223        Ok(n) => n,
224        Err(_) => {
225            // Invalid input - return empty string and error status
226            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
227            return unsafe { push(stack, Value::Int(0)) };
228        }
229    };
230
231    let stdin = io::stdin();
232    let mut buffer = vec![0u8; n];
233    let mut total_read = 0;
234
235    {
236        let mut handle = stdin.lock();
237        while total_read < n {
238            match handle.read(&mut buffer[total_read..]) {
239                Ok(0) => break, // EOF
240                Ok(bytes_read) => total_read += bytes_read,
241                Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
242                Err(_) => break, // I/O error - stop reading, return what we have
243            }
244        }
245    }
246
247    // Truncate to actual bytes read
248    buffer.truncate(total_read);
249
250    // Convert to String (assuming UTF-8)
251    let s = String::from_utf8_lossy(&buffer).into_owned();
252
253    // Status: 1 if we read all N bytes, 0 otherwise
254    let status = if total_read == n { 1i64 } else { 0i64 };
255
256    let stack = unsafe { push(stack, Value::String(s.into())) };
257    unsafe { push(stack, Value::Int(status)) }
258}
259
260/// Convert an integer to a string
261///
262/// Stack effect: ( Int -- String )
263///
264/// # Safety
265/// Stack must have an Int value on top
266#[unsafe(no_mangle)]
267pub unsafe extern "C" fn patch_seq_int_to_string(stack: Stack) -> Stack {
268    assert!(!stack.is_null(), "int_to_string: stack is empty");
269
270    let (rest, value) = unsafe { pop(stack) };
271
272    match value {
273        Value::Int(n) => unsafe { push(rest, Value::String(n.to_string().into())) },
274        _ => panic!("int_to_string: expected Int on stack, got {:?}", value),
275    }
276}
277
278/// Push a C string literal onto the stack (for compiler-generated code).
279///
280/// Used by codegen paths whose source is always an ASCII identifier
281/// (variant tag comparisons, NULL-FFI fallbacks, etc.) — they have no
282/// embedded NULs, so the C-string convention is fine. Byte-clean
283/// string *literals* go through `patch_seq_push_string_bytes` instead.
284///
285/// In debug builds, this asserts the input is ASCII to catch a future
286/// codegen path that accidentally routes binary data here. In release
287/// the bytes are taken as-is — the comment above is the contract.
288///
289/// Stack effect: ( -- str )
290///
291/// # Safety
292/// The c_str pointer must be valid and null-terminated
293#[unsafe(no_mangle)]
294pub unsafe extern "C" fn patch_seq_push_string(stack: Stack, c_str: *const i8) -> Stack {
295    assert!(!c_str.is_null(), "push_string: null string pointer");
296
297    let bytes = unsafe { CStr::from_ptr(c_str).to_bytes() };
298    debug_assert!(
299        std::str::from_utf8(bytes).is_ok(),
300        "push_string: input must be valid UTF-8 (variant tags, identifier-shaped \
301         literals, FFI fallbacks); arbitrary binary string literals must use \
302         push_string_bytes instead",
303    );
304    let seqstr = crate::seqstring::global_bytes(bytes.to_vec());
305    unsafe { push(stack, Value::String(seqstr)) }
306}
307
308/// Push a byte-clean string literal onto the stack (for compiler-generated
309/// code). Carries an explicit length so embedded NULs and arbitrary bytes
310/// flow through unchanged — this is the codegen target for Seq string
311/// literals after the byte-cleanliness landing.
312///
313/// Stack effect: ( -- str )
314///
315/// # Safety
316/// `ptr` must point to at least `len` valid bytes. `ptr` may not be null
317/// unless `len` is zero.
318#[unsafe(no_mangle)]
319pub unsafe extern "C" fn patch_seq_push_string_bytes(
320    stack: Stack,
321    ptr: *const u8,
322    len: usize,
323) -> Stack {
324    let bytes = if len == 0 {
325        Vec::new()
326    } else {
327        assert!(
328            !ptr.is_null(),
329            "push_string_bytes: null pointer with non-zero length"
330        );
331        unsafe { std::slice::from_raw_parts(ptr, len).to_vec() }
332    };
333    let seqstr = crate::seqstring::global_bytes(bytes);
334    unsafe { push(stack, Value::String(seqstr)) }
335}
336
337/// Push a C string literal onto the stack as a Symbol (for compiler-generated code)
338///
339/// Stack effect: ( -- symbol )
340///
341/// # Safety
342/// The c_str pointer must be valid and null-terminated
343#[unsafe(no_mangle)]
344pub unsafe extern "C" fn patch_seq_push_symbol(stack: Stack, c_str: *const i8) -> Stack {
345    assert!(!c_str.is_null(), "push_symbol: null string pointer");
346
347    let s = unsafe {
348        CStr::from_ptr(c_str)
349            .to_str()
350            .expect("push_symbol: invalid UTF-8 in symbol literal")
351            .to_owned()
352    };
353
354    unsafe { push(stack, Value::Symbol(s.into())) }
355}
356
357/// Layout of static interned symbol data from LLVM IR
358///
359/// Matches the LLVM IR structure:
360/// `{ ptr, i64 len, i64 capacity, i8 global }`
361///
362/// # Safety Contract
363///
364/// This struct must ONLY be constructed by the compiler in static globals.
365/// Invariants that MUST hold:
366/// - `ptr` points to valid static UTF-8 string data with lifetime `'static`
367/// - `len` matches the actual byte length of the string
368/// - `capacity` MUST be 0 (marks symbol as interned/static)
369/// - `global` MUST be 1 (marks symbol as static allocation)
370///
371/// Violating these invariants causes undefined behavior (memory corruption,
372/// double-free, or null pointer dereference).
373#[repr(C)]
374pub struct InternedSymbolData {
375    ptr: *const u8,
376    len: i64,
377    capacity: i64, // MUST be 0 for interned symbols
378    global: i8,    // MUST be 1 for interned symbols
379}
380
381/// Push an interned symbol onto the stack (Issue #166)
382///
383/// This pushes a compile-time symbol literal that shares static memory.
384/// The SeqString has capacity=0 to mark it as interned (never freed).
385///
386/// Stack effect: ( -- Symbol )
387///
388/// # Safety
389/// The symbol_data pointer must point to a valid static InternedSymbolData structure.
390#[unsafe(no_mangle)]
391pub unsafe extern "C" fn patch_seq_push_interned_symbol(
392    stack: Stack,
393    symbol_data: *const InternedSymbolData,
394) -> Stack {
395    assert!(
396        !symbol_data.is_null(),
397        "push_interned_symbol: null symbol data pointer"
398    );
399
400    let data = unsafe { &*symbol_data };
401
402    // Validate interned symbol invariants - these are safety-critical
403    // and must run in release builds to prevent memory corruption
404    assert!(!data.ptr.is_null(), "Interned symbol data pointer is null");
405    assert_eq!(data.capacity, 0, "Interned symbols must have capacity=0");
406    assert_ne!(data.global, 0, "Interned symbols must have global=1");
407
408    // Create SeqString that points to static data
409    // capacity=0 marks it as interned (Drop will skip deallocation)
410    // Safety: from_raw_parts requires valid ptr/len/capacity, which we trust
411    // from the LLVM-generated static data
412    let seq_str = unsafe {
413        crate::seqstring::SeqString::from_raw_parts(
414            data.ptr,
415            data.len as usize,
416            data.capacity as usize, // 0 for interned
417            data.global != 0,       // true for interned
418        )
419    };
420
421    unsafe { push(stack, Value::Symbol(seq_str)) }
422}
423
424/// Push a SeqString value onto the stack
425///
426/// This is used when we already have a SeqString (e.g., from closures).
427/// Unlike push_string which takes a C string, this takes a SeqString by value.
428///
429/// Stack effect: ( -- String )
430///
431/// # Safety
432/// The SeqString must be valid. This is only called from LLVM-generated code, not actual C code.
433#[allow(improper_ctypes_definitions)]
434#[unsafe(no_mangle)]
435pub unsafe extern "C" fn patch_seq_push_seqstring(
436    stack: Stack,
437    seq_str: crate::seqstring::SeqString,
438) -> Stack {
439    unsafe { push(stack, Value::String(seq_str)) }
440}
441
442/// Convert a Symbol to a String
443///
444/// Stack effect: ( Symbol -- String )
445///
446/// # Safety
447/// Stack must have a Symbol on top.
448#[unsafe(no_mangle)]
449pub unsafe extern "C" fn patch_seq_symbol_to_string(stack: Stack) -> Stack {
450    assert!(!stack.is_null(), "symbol_to_string: stack is empty");
451
452    let (rest, value) = unsafe { pop(stack) };
453
454    match value {
455        Value::Symbol(s) => unsafe { push(rest, Value::String(s)) },
456        _ => panic!(
457            "symbol_to_string: expected Symbol on stack, got {:?}",
458            value
459        ),
460    }
461}
462
463/// Convert a String to a Symbol
464///
465/// Stack effect: ( String -- Symbol )
466///
467/// # Safety
468/// Stack must have a String on top.
469#[unsafe(no_mangle)]
470pub unsafe extern "C" fn patch_seq_string_to_symbol(stack: Stack) -> Stack {
471    assert!(!stack.is_null(), "string_to_symbol: stack is empty");
472
473    let (rest, value) = unsafe { pop(stack) };
474
475    match value {
476        Value::String(s) => unsafe { push(rest, Value::Symbol(s)) },
477        _ => panic!(
478            "string_to_symbol: expected String on stack, got {:?}",
479            value
480        ),
481    }
482}
483
484/// Exit the program with a status code
485///
486/// Stack effect: ( exit_code -- )
487///
488/// # Safety
489/// Stack must have an Int on top. Never returns.
490#[unsafe(no_mangle)]
491pub unsafe extern "C" fn patch_seq_exit_op(stack: Stack) -> ! {
492    assert!(!stack.is_null(), "exit_op: stack is empty");
493
494    let (_rest, value) = unsafe { pop(stack) };
495
496    match value {
497        Value::Int(code) => {
498            // Explicitly validate exit code is in Unix-compatible range
499            if !(EXIT_CODE_MIN..=EXIT_CODE_MAX).contains(&code) {
500                panic!(
501                    "exit_op: exit code must be in range {}-{}, got {}",
502                    EXIT_CODE_MIN, EXIT_CODE_MAX, code
503                );
504            }
505            std::process::exit(code as i32);
506        }
507        _ => panic!("exit_op: expected Int on stack, got {:?}", value),
508    }
509}
510
511// Public re-exports with short names for internal use
512pub use patch_seq_exit_op as exit_op;
513pub use patch_seq_int_to_string as int_to_string;
514pub use patch_seq_push_interned_symbol as push_interned_symbol;
515pub use patch_seq_push_seqstring as push_seqstring;
516pub use patch_seq_push_string as push_string;
517pub use patch_seq_push_symbol as push_symbol;
518pub use patch_seq_read_line as read_line;
519pub use patch_seq_read_n as read_n;
520pub use patch_seq_string_to_symbol as string_to_symbol;
521pub use patch_seq_symbol_to_string as symbol_to_string;
522pub use patch_seq_write as write;
523pub use patch_seq_write_line as write_line;
524
525#[cfg(test)]
526mod tests;