Skip to main content

seq_runtime/
io.rs

1//! I/O Operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//!
5//! # Safety Contract
6//!
7//! **IMPORTANT:** These functions are designed to be called ONLY by compiler-generated code,
8//! not by end users or arbitrary C code. The compiler is responsible for:
9//!
10//! - Ensuring stack has correct types (verified by type checker)
11//! - Passing valid, null-terminated C strings to `push_string`
12//! - Never calling these functions directly from user code
13//!
14//! # String Handling
15//!
16//! String literals from the compiler must be valid UTF-8 C strings (null-terminated).
17//! Currently, each string literal is allocated as an owned `String`. See
18//! `docs/STRING_INTERNING_DESIGN.md` for discussion of future optimizations
19//! (interning, static references, etc.).
20
21use crate::stack::{Stack, pop, push};
22use crate::value::Value;
23use std::ffi::CStr;
24use std::io;
25use std::sync::LazyLock;
26
27/// Coroutine-aware stdout mutex.
28/// Uses may::sync::Mutex which yields the coroutine when contended instead of blocking the OS thread.
29/// By serializing access to stdout, we prevent RefCell borrow panics that occur when multiple
30/// coroutines on the same thread try to access stdout's internal RefCell concurrently.
31static STDOUT_MUTEX: LazyLock<may::sync::Mutex<()>> = LazyLock::new(|| may::sync::Mutex::new(()));
32
33/// Valid exit code range for Unix compatibility
34const EXIT_CODE_MIN: i64 = 0;
35const EXIT_CODE_MAX: i64 = 255;
36
37/// Write a string to stdout followed by a newline
38///
39/// Stack effect: ( str -- )
40///
41/// # Safety
42/// Stack must have a String value on top
43///
44/// # Concurrency
45/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
46/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
47/// This prevents RefCell borrow panics when multiple strands write concurrently.
48#[unsafe(no_mangle)]
49pub unsafe extern "C" fn patch_seq_write_line(stack: Stack) -> Stack {
50    assert!(!stack.is_null(), "write_line: stack is empty");
51
52    let (rest, value) = unsafe { pop(stack) };
53
54    match value {
55        Value::String(s) => {
56            // Acquire coroutine-aware mutex (yields if contended, doesn't block)
57            // This serializes access to stdout
58            let _guard = STDOUT_MUTEX.lock().unwrap();
59
60            // Write directly to fd 1 using libc to avoid Rust's std::io::stdout() RefCell.
61            // Rust's standard I/O uses RefCell which panics on concurrent access from
62            // multiple coroutines on the same thread.
63            // Byte-clean: write the underlying bytes directly to fd 1.
64            // libc::write takes a raw pointer + length, so we don't
65            // need a `&str`. Binary response bodies, ANSI escapes,
66            // arbitrary protocol output all flow through unchanged.
67            let bytes = s.as_bytes();
68            let newline = b"\n";
69            unsafe {
70                libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
71                libc::write(1, newline.as_ptr() as *const libc::c_void, newline.len());
72            }
73
74            rest
75        }
76        _ => panic!("write_line: expected String on stack, got {:?}", value),
77    }
78}
79
80/// Write a string to stdout without a trailing newline
81///
82/// Stack effect: ( str -- )
83///
84/// This is useful for protocols like LSP that require exact byte output
85/// without trailing newlines.
86///
87/// # Safety
88/// Stack must have a String value on top
89///
90/// # Concurrency
91/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
92/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
93#[unsafe(no_mangle)]
94pub unsafe extern "C" fn patch_seq_write(stack: Stack) -> Stack {
95    assert!(!stack.is_null(), "write: stack is empty");
96
97    let (rest, value) = unsafe { pop(stack) };
98
99    match value {
100        Value::String(s) => {
101            let _guard = STDOUT_MUTEX.lock().unwrap();
102
103            // Byte-clean: write the underlying bytes directly to fd 1.
104            let bytes = s.as_bytes();
105            unsafe {
106                libc::write(1, bytes.as_ptr() as *const libc::c_void, bytes.len());
107            }
108
109            rest
110        }
111        _ => panic!("write: expected String on stack, got {:?}", value),
112    }
113}
114
115/// Read a line from stdin
116///
117/// Returns the line and a success flag:
118/// - ( line true ) on success (line includes trailing newline)
119/// - ( "" false ) on I/O error or EOF
120///
121/// Use `string.chomp` to remove trailing newlines if needed.
122///
123/// # Line Ending Normalization
124///
125/// Line endings are normalized to `\n` regardless of platform. Windows-style
126/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
127/// across different operating systems.
128///
129/// Stack effect: ( -- String Bool )
130///
131/// Errors are values, not crashes.
132///
133/// # Safety
134/// Always safe to call
135#[unsafe(no_mangle)]
136pub unsafe extern "C" fn patch_seq_read_line(stack: Stack) -> Stack {
137    use std::io::BufRead;
138
139    let stdin = io::stdin();
140    let mut line = String::new();
141
142    match stdin.lock().read_line(&mut line) {
143        Ok(0) => {
144            // EOF - return empty string and false
145            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
146            unsafe { push(stack, Value::Bool(false)) }
147        }
148        Ok(_) => {
149            // Normalize line endings: \r\n -> \n
150            if line.ends_with("\r\n") {
151                line.pop(); // remove \n
152                line.pop(); // remove \r
153                line.push('\n'); // add back \n
154            }
155            let stack = unsafe { push(stack, Value::String(line.into())) };
156            unsafe { push(stack, Value::Bool(true)) }
157        }
158        Err(_) => {
159            // I/O error - return empty string and false
160            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
161            unsafe { push(stack, Value::Bool(false)) }
162        }
163    }
164}
165
166/// Read a line from stdin with explicit EOF detection
167///
168/// Returns the line and a status flag:
169/// - ( line 1 ) on success (line includes trailing newline)
170/// - ( "" 0 ) at EOF or I/O error
171///
172/// Stack effect: ( -- String Int )
173///
174/// The `+` suffix indicates this returns a result pattern (value + status).
175/// Errors are values, not crashes.
176///
177/// # Line Ending Normalization
178///
179/// Line endings are normalized to `\n` regardless of platform. Windows-style
180/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
181/// across different operating systems.
182///
183/// # Safety
184/// Always safe to call
185#[unsafe(no_mangle)]
186pub unsafe extern "C" fn patch_seq_read_line_plus(stack: Stack) -> Stack {
187    use std::io::BufRead;
188
189    let stdin = io::stdin();
190    let mut line = String::new();
191
192    match stdin.lock().read_line(&mut line) {
193        Ok(0) => {
194            // EOF
195            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
196            unsafe { push(stack, Value::Int(0)) }
197        }
198        Ok(_) => {
199            // Normalize line endings: \r\n -> \n
200            if line.ends_with("\r\n") {
201                line.pop(); // remove \n
202                line.pop(); // remove \r
203                line.push('\n'); // add back \n
204            }
205            let stack = unsafe { push(stack, Value::String(line.into())) };
206            unsafe { push(stack, Value::Int(1)) }
207        }
208        Err(_) => {
209            // I/O error - treat like EOF
210            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
211            unsafe { push(stack, Value::Int(0)) }
212        }
213    }
214}
215
216/// Maximum bytes allowed for a single read_n call (10MB)
217/// This prevents accidental or malicious massive memory allocations.
218/// LSP messages are typically < 1MB, so 10MB provides generous headroom.
219const READ_N_MAX_BYTES: i64 = 10 * 1024 * 1024;
220
221/// Validates and extracts the byte count from a Value for read_n.
222/// Returns Ok(usize) on success, Err(message) on validation failure.
223fn validate_read_n_count(value: &Value) -> Result<usize, String> {
224    match value {
225        Value::Int(n) if *n < 0 => Err(format!(
226            "read_n: byte count must be non-negative, got {}",
227            n
228        )),
229        Value::Int(n) if *n > READ_N_MAX_BYTES => Err(format!(
230            "read_n: byte count {} exceeds maximum allowed ({})",
231            n, READ_N_MAX_BYTES
232        )),
233        Value::Int(n) => Ok(*n as usize),
234        _ => Err(format!("read_n: expected Int on stack, got {:?}", value)),
235    }
236}
237
238/// Read exactly N bytes from stdin
239///
240/// Returns the bytes read and a status flag:
241/// - ( string 1 ) on success (read all N bytes)
242/// - ( string 0 ) at EOF, partial read, or error (string may be shorter than N)
243///
244/// Stack effect: ( Int -- String Int )
245///
246/// Like `io.read-line+`, this returns a result pattern (value + status) to allow
247/// explicit EOF detection. The function name omits the `+` suffix for brevity
248/// since byte-count reads are inherently status-oriented.
249///
250/// Errors are values, not crashes.
251///
252/// This is used for protocols like LSP where message bodies are byte-counted
253/// and don't have trailing newlines.
254///
255/// # UTF-8 Handling
256/// The bytes are interpreted as UTF-8. Invalid UTF-8 sequences are replaced
257/// with the Unicode replacement character (U+FFFD). This is appropriate for
258/// text-based protocols like LSP but may not be suitable for binary data.
259///
260/// # Safety
261/// Stack must have an Int on top. The integer must be non-negative and
262/// not exceed READ_N_MAX_BYTES (10MB).
263#[unsafe(no_mangle)]
264pub unsafe extern "C" fn patch_seq_read_n(stack: Stack) -> Stack {
265    use std::io::Read;
266
267    assert!(!stack.is_null(), "read_n: stack is empty");
268
269    let (stack, value) = unsafe { pop(stack) };
270
271    // Validate input - return error status for invalid input
272    let n = match validate_read_n_count(&value) {
273        Ok(n) => n,
274        Err(_) => {
275            // Invalid input - return empty string and error status
276            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
277            return unsafe { push(stack, Value::Int(0)) };
278        }
279    };
280
281    let stdin = io::stdin();
282    let mut buffer = vec![0u8; n];
283    let mut total_read = 0;
284
285    {
286        let mut handle = stdin.lock();
287        while total_read < n {
288            match handle.read(&mut buffer[total_read..]) {
289                Ok(0) => break, // EOF
290                Ok(bytes_read) => total_read += bytes_read,
291                Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
292                Err(_) => break, // I/O error - stop reading, return what we have
293            }
294        }
295    }
296
297    // Truncate to actual bytes read
298    buffer.truncate(total_read);
299
300    // Convert to String (assuming UTF-8)
301    let s = String::from_utf8_lossy(&buffer).into_owned();
302
303    // Status: 1 if we read all N bytes, 0 otherwise
304    let status = if total_read == n { 1i64 } else { 0i64 };
305
306    let stack = unsafe { push(stack, Value::String(s.into())) };
307    unsafe { push(stack, Value::Int(status)) }
308}
309
310/// Convert an integer to a string
311///
312/// Stack effect: ( Int -- String )
313///
314/// # Safety
315/// Stack must have an Int value on top
316#[unsafe(no_mangle)]
317pub unsafe extern "C" fn patch_seq_int_to_string(stack: Stack) -> Stack {
318    assert!(!stack.is_null(), "int_to_string: stack is empty");
319
320    let (rest, value) = unsafe { pop(stack) };
321
322    match value {
323        Value::Int(n) => unsafe { push(rest, Value::String(n.to_string().into())) },
324        _ => panic!("int_to_string: expected Int on stack, got {:?}", value),
325    }
326}
327
328/// Push a C string literal onto the stack (for compiler-generated code).
329///
330/// Used by codegen paths whose source is always an ASCII identifier
331/// (variant tag comparisons, NULL-FFI fallbacks, etc.) — they have no
332/// embedded NULs, so the C-string convention is fine. Byte-clean
333/// string *literals* go through `patch_seq_push_string_bytes` instead.
334///
335/// In debug builds, this asserts the input is ASCII to catch a future
336/// codegen path that accidentally routes binary data here. In release
337/// the bytes are taken as-is — the comment above is the contract.
338///
339/// Stack effect: ( -- str )
340///
341/// # Safety
342/// The c_str pointer must be valid and null-terminated
343#[unsafe(no_mangle)]
344pub unsafe extern "C" fn patch_seq_push_string(stack: Stack, c_str: *const i8) -> Stack {
345    assert!(!c_str.is_null(), "push_string: null string pointer");
346
347    let bytes = unsafe { CStr::from_ptr(c_str).to_bytes() };
348    debug_assert!(
349        std::str::from_utf8(bytes).is_ok(),
350        "push_string: input must be valid UTF-8 (variant tags, identifier-shaped \
351         literals, FFI fallbacks); arbitrary binary string literals must use \
352         push_string_bytes instead",
353    );
354    let seqstr = crate::seqstring::global_bytes(bytes.to_vec());
355    unsafe { push(stack, Value::String(seqstr)) }
356}
357
358/// Push a byte-clean string literal onto the stack (for compiler-generated
359/// code). Carries an explicit length so embedded NULs and arbitrary bytes
360/// flow through unchanged — this is the codegen target for Seq string
361/// literals after the byte-cleanliness landing.
362///
363/// Stack effect: ( -- str )
364///
365/// # Safety
366/// `ptr` must point to at least `len` valid bytes. `ptr` may not be null
367/// unless `len` is zero.
368#[unsafe(no_mangle)]
369pub unsafe extern "C" fn patch_seq_push_string_bytes(
370    stack: Stack,
371    ptr: *const u8,
372    len: usize,
373) -> Stack {
374    let bytes = if len == 0 {
375        Vec::new()
376    } else {
377        assert!(
378            !ptr.is_null(),
379            "push_string_bytes: null pointer with non-zero length"
380        );
381        unsafe { std::slice::from_raw_parts(ptr, len).to_vec() }
382    };
383    let seqstr = crate::seqstring::global_bytes(bytes);
384    unsafe { push(stack, Value::String(seqstr)) }
385}
386
387/// Push a C string literal onto the stack as a Symbol (for compiler-generated code)
388///
389/// Stack effect: ( -- symbol )
390///
391/// # Safety
392/// The c_str pointer must be valid and null-terminated
393#[unsafe(no_mangle)]
394pub unsafe extern "C" fn patch_seq_push_symbol(stack: Stack, c_str: *const i8) -> Stack {
395    assert!(!c_str.is_null(), "push_symbol: null string pointer");
396
397    let s = unsafe {
398        CStr::from_ptr(c_str)
399            .to_str()
400            .expect("push_symbol: invalid UTF-8 in symbol literal")
401            .to_owned()
402    };
403
404    unsafe { push(stack, Value::Symbol(s.into())) }
405}
406
407/// Layout of static interned symbol data from LLVM IR
408///
409/// Matches the LLVM IR structure:
410/// `{ ptr, i64 len, i64 capacity, i8 global }`
411///
412/// # Safety Contract
413///
414/// This struct must ONLY be constructed by the compiler in static globals.
415/// Invariants that MUST hold:
416/// - `ptr` points to valid static UTF-8 string data with lifetime `'static`
417/// - `len` matches the actual byte length of the string
418/// - `capacity` MUST be 0 (marks symbol as interned/static)
419/// - `global` MUST be 1 (marks symbol as static allocation)
420///
421/// Violating these invariants causes undefined behavior (memory corruption,
422/// double-free, or null pointer dereference).
423#[repr(C)]
424pub struct InternedSymbolData {
425    ptr: *const u8,
426    len: i64,
427    capacity: i64, // MUST be 0 for interned symbols
428    global: i8,    // MUST be 1 for interned symbols
429}
430
431/// Push an interned symbol onto the stack (Issue #166)
432///
433/// This pushes a compile-time symbol literal that shares static memory.
434/// The SeqString has capacity=0 to mark it as interned (never freed).
435///
436/// Stack effect: ( -- Symbol )
437///
438/// # Safety
439/// The symbol_data pointer must point to a valid static InternedSymbolData structure.
440#[unsafe(no_mangle)]
441pub unsafe extern "C" fn patch_seq_push_interned_symbol(
442    stack: Stack,
443    symbol_data: *const InternedSymbolData,
444) -> Stack {
445    assert!(
446        !symbol_data.is_null(),
447        "push_interned_symbol: null symbol data pointer"
448    );
449
450    let data = unsafe { &*symbol_data };
451
452    // Validate interned symbol invariants - these are safety-critical
453    // and must run in release builds to prevent memory corruption
454    assert!(!data.ptr.is_null(), "Interned symbol data pointer is null");
455    assert_eq!(data.capacity, 0, "Interned symbols must have capacity=0");
456    assert_ne!(data.global, 0, "Interned symbols must have global=1");
457
458    // Create SeqString that points to static data
459    // capacity=0 marks it as interned (Drop will skip deallocation)
460    // Safety: from_raw_parts requires valid ptr/len/capacity, which we trust
461    // from the LLVM-generated static data
462    let seq_str = unsafe {
463        crate::seqstring::SeqString::from_raw_parts(
464            data.ptr,
465            data.len as usize,
466            data.capacity as usize, // 0 for interned
467            data.global != 0,       // true for interned
468        )
469    };
470
471    unsafe { push(stack, Value::Symbol(seq_str)) }
472}
473
474/// Push a SeqString value onto the stack
475///
476/// This is used when we already have a SeqString (e.g., from closures).
477/// Unlike push_string which takes a C string, this takes a SeqString by value.
478///
479/// Stack effect: ( -- String )
480///
481/// # Safety
482/// The SeqString must be valid. This is only called from LLVM-generated code, not actual C code.
483#[allow(improper_ctypes_definitions)]
484#[unsafe(no_mangle)]
485pub unsafe extern "C" fn patch_seq_push_seqstring(
486    stack: Stack,
487    seq_str: crate::seqstring::SeqString,
488) -> Stack {
489    unsafe { push(stack, Value::String(seq_str)) }
490}
491
492/// Convert a Symbol to a String
493///
494/// Stack effect: ( Symbol -- String )
495///
496/// # Safety
497/// Stack must have a Symbol on top.
498#[unsafe(no_mangle)]
499pub unsafe extern "C" fn patch_seq_symbol_to_string(stack: Stack) -> Stack {
500    assert!(!stack.is_null(), "symbol_to_string: stack is empty");
501
502    let (rest, value) = unsafe { pop(stack) };
503
504    match value {
505        Value::Symbol(s) => unsafe { push(rest, Value::String(s)) },
506        _ => panic!(
507            "symbol_to_string: expected Symbol on stack, got {:?}",
508            value
509        ),
510    }
511}
512
513/// Convert a String to a Symbol
514///
515/// Stack effect: ( String -- Symbol )
516///
517/// # Safety
518/// Stack must have a String on top.
519#[unsafe(no_mangle)]
520pub unsafe extern "C" fn patch_seq_string_to_symbol(stack: Stack) -> Stack {
521    assert!(!stack.is_null(), "string_to_symbol: stack is empty");
522
523    let (rest, value) = unsafe { pop(stack) };
524
525    match value {
526        Value::String(s) => unsafe { push(rest, Value::Symbol(s)) },
527        _ => panic!(
528            "string_to_symbol: expected String on stack, got {:?}",
529            value
530        ),
531    }
532}
533
534/// Exit the program with a status code
535///
536/// Stack effect: ( exit_code -- )
537///
538/// # Safety
539/// Stack must have an Int on top. Never returns.
540#[unsafe(no_mangle)]
541pub unsafe extern "C" fn patch_seq_exit_op(stack: Stack) -> ! {
542    assert!(!stack.is_null(), "exit_op: stack is empty");
543
544    let (_rest, value) = unsafe { pop(stack) };
545
546    match value {
547        Value::Int(code) => {
548            // Explicitly validate exit code is in Unix-compatible range
549            if !(EXIT_CODE_MIN..=EXIT_CODE_MAX).contains(&code) {
550                panic!(
551                    "exit_op: exit code must be in range {}-{}, got {}",
552                    EXIT_CODE_MIN, EXIT_CODE_MAX, code
553                );
554            }
555            std::process::exit(code as i32);
556        }
557        _ => panic!("exit_op: expected Int on stack, got {:?}", value),
558    }
559}
560
561// Public re-exports with short names for internal use
562pub use patch_seq_exit_op as exit_op;
563pub use patch_seq_int_to_string as int_to_string;
564pub use patch_seq_push_interned_symbol as push_interned_symbol;
565pub use patch_seq_push_seqstring as push_seqstring;
566pub use patch_seq_push_string as push_string;
567pub use patch_seq_push_symbol as push_symbol;
568pub use patch_seq_read_line as read_line;
569pub use patch_seq_read_line_plus as read_line_plus;
570pub use patch_seq_read_n as read_n;
571pub use patch_seq_string_to_symbol as string_to_symbol;
572pub use patch_seq_symbol_to_string as symbol_to_string;
573pub use patch_seq_write as write;
574pub use patch_seq_write_line as write_line;
575
576#[cfg(test)]
577mod tests;