seq_runtime/
io.rs

1//! I/O Operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//!
5//! # Safety Contract
6//!
7//! **IMPORTANT:** These functions are designed to be called ONLY by compiler-generated code,
8//! not by end users or arbitrary C code. The compiler is responsible for:
9//!
10//! - Ensuring stack has correct types (verified by type checker)
11//! - Passing valid, null-terminated C strings to `push_string`
12//! - Never calling these functions directly from user code
13//!
14//! # String Handling
15//!
16//! String literals from the compiler must be valid UTF-8 C strings (null-terminated).
17//! Currently, each string literal is allocated as an owned `String`. See
18//! `docs/STRING_INTERNING_DESIGN.md` for discussion of future optimizations
19//! (interning, static references, etc.).
20
21use crate::stack::{Stack, pop, push};
22use crate::value::Value;
23use std::ffi::CStr;
24use std::io;
25use std::sync::LazyLock;
26
27/// Coroutine-aware stdout mutex.
28/// Uses may::sync::Mutex which yields the coroutine when contended instead of blocking the OS thread.
29/// By serializing access to stdout, we prevent RefCell borrow panics that occur when multiple
30/// coroutines on the same thread try to access stdout's internal RefCell concurrently.
31static STDOUT_MUTEX: LazyLock<may::sync::Mutex<()>> = LazyLock::new(|| may::sync::Mutex::new(()));
32
33/// Valid exit code range for Unix compatibility
34const EXIT_CODE_MIN: i64 = 0;
35const EXIT_CODE_MAX: i64 = 255;
36
37/// Write a string to stdout followed by a newline
38///
39/// Stack effect: ( str -- )
40///
41/// # Safety
42/// Stack must have a String value on top
43///
44/// # Concurrency
45/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
46/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
47/// This prevents RefCell borrow panics when multiple strands write concurrently.
48#[unsafe(no_mangle)]
49pub unsafe extern "C" fn patch_seq_write_line(stack: Stack) -> Stack {
50    assert!(!stack.is_null(), "write_line: stack is empty");
51
52    let (rest, value) = unsafe { pop(stack) };
53
54    match value {
55        Value::String(s) => {
56            // Acquire coroutine-aware mutex (yields if contended, doesn't block)
57            // This serializes access to stdout
58            let _guard = STDOUT_MUTEX.lock().unwrap();
59
60            // Write directly to fd 1 using libc to avoid Rust's std::io::stdout() RefCell.
61            // Rust's standard I/O uses RefCell which panics on concurrent access from
62            // multiple coroutines on the same thread.
63            let str_slice = s.as_str();
64            let newline = b"\n";
65            unsafe {
66                libc::write(
67                    1,
68                    str_slice.as_ptr() as *const libc::c_void,
69                    str_slice.len(),
70                );
71                libc::write(1, newline.as_ptr() as *const libc::c_void, newline.len());
72            }
73
74            rest
75        }
76        _ => panic!("write_line: expected String on stack, got {:?}", value),
77    }
78}
79
80/// Write a string to stdout without a trailing newline
81///
82/// Stack effect: ( str -- )
83///
84/// This is useful for protocols like LSP that require exact byte output
85/// without trailing newlines.
86///
87/// # Safety
88/// Stack must have a String value on top
89///
90/// # Concurrency
91/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
92/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
93#[unsafe(no_mangle)]
94pub unsafe extern "C" fn patch_seq_write(stack: Stack) -> Stack {
95    assert!(!stack.is_null(), "write: stack is empty");
96
97    let (rest, value) = unsafe { pop(stack) };
98
99    match value {
100        Value::String(s) => {
101            let _guard = STDOUT_MUTEX.lock().unwrap();
102
103            let str_slice = s.as_str();
104            unsafe {
105                libc::write(
106                    1,
107                    str_slice.as_ptr() as *const libc::c_void,
108                    str_slice.len(),
109                );
110            }
111
112            rest
113        }
114        _ => panic!("write: expected String on stack, got {:?}", value),
115    }
116}
117
118/// Read a line from stdin
119///
120/// Returns the line and a success flag:
121/// - ( line true ) on success (line includes trailing newline)
122/// - ( "" false ) on I/O error or EOF
123///
124/// Use `string.chomp` to remove trailing newlines if needed.
125///
126/// # Line Ending Normalization
127///
128/// Line endings are normalized to `\n` regardless of platform. Windows-style
129/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
130/// across different operating systems.
131///
132/// Stack effect: ( -- String Bool )
133///
134/// Errors are values, not crashes.
135///
136/// # Safety
137/// Always safe to call
138#[unsafe(no_mangle)]
139pub unsafe extern "C" fn patch_seq_read_line(stack: Stack) -> Stack {
140    use std::io::BufRead;
141
142    let stdin = io::stdin();
143    let mut line = String::new();
144
145    match stdin.lock().read_line(&mut line) {
146        Ok(0) => {
147            // EOF - return empty string and false
148            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
149            unsafe { push(stack, Value::Bool(false)) }
150        }
151        Ok(_) => {
152            // Normalize line endings: \r\n -> \n
153            if line.ends_with("\r\n") {
154                line.pop(); // remove \n
155                line.pop(); // remove \r
156                line.push('\n'); // add back \n
157            }
158            let stack = unsafe { push(stack, Value::String(line.into())) };
159            unsafe { push(stack, Value::Bool(true)) }
160        }
161        Err(_) => {
162            // I/O error - return empty string and false
163            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
164            unsafe { push(stack, Value::Bool(false)) }
165        }
166    }
167}
168
169/// Read a line from stdin with explicit EOF detection
170///
171/// Returns the line and a status flag:
172/// - ( line 1 ) on success (line includes trailing newline)
173/// - ( "" 0 ) at EOF or I/O error
174///
175/// Stack effect: ( -- String Int )
176///
177/// The `+` suffix indicates this returns a result pattern (value + status).
178/// Errors are values, not crashes.
179///
180/// # Line Ending Normalization
181///
182/// Line endings are normalized to `\n` regardless of platform. Windows-style
183/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
184/// across different operating systems.
185///
186/// # Safety
187/// Always safe to call
188#[unsafe(no_mangle)]
189pub unsafe extern "C" fn patch_seq_read_line_plus(stack: Stack) -> Stack {
190    use std::io::BufRead;
191
192    let stdin = io::stdin();
193    let mut line = String::new();
194
195    match stdin.lock().read_line(&mut line) {
196        Ok(0) => {
197            // EOF
198            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
199            unsafe { push(stack, Value::Int(0)) }
200        }
201        Ok(_) => {
202            // Normalize line endings: \r\n -> \n
203            if line.ends_with("\r\n") {
204                line.pop(); // remove \n
205                line.pop(); // remove \r
206                line.push('\n'); // add back \n
207            }
208            let stack = unsafe { push(stack, Value::String(line.into())) };
209            unsafe { push(stack, Value::Int(1)) }
210        }
211        Err(_) => {
212            // I/O error - treat like EOF
213            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
214            unsafe { push(stack, Value::Int(0)) }
215        }
216    }
217}
218
219/// Maximum bytes allowed for a single read_n call (10MB)
220/// This prevents accidental or malicious massive memory allocations.
221/// LSP messages are typically < 1MB, so 10MB provides generous headroom.
222const READ_N_MAX_BYTES: i64 = 10 * 1024 * 1024;
223
224/// Validates and extracts the byte count from a Value for read_n.
225/// Returns Ok(usize) on success, Err(message) on validation failure.
226fn validate_read_n_count(value: &Value) -> Result<usize, String> {
227    match value {
228        Value::Int(n) if *n < 0 => Err(format!(
229            "read_n: byte count must be non-negative, got {}",
230            n
231        )),
232        Value::Int(n) if *n > READ_N_MAX_BYTES => Err(format!(
233            "read_n: byte count {} exceeds maximum allowed ({})",
234            n, READ_N_MAX_BYTES
235        )),
236        Value::Int(n) => Ok(*n as usize),
237        _ => Err(format!("read_n: expected Int on stack, got {:?}", value)),
238    }
239}
240
241/// Read exactly N bytes from stdin
242///
243/// Returns the bytes read and a status flag:
244/// - ( string 1 ) on success (read all N bytes)
245/// - ( string 0 ) at EOF, partial read, or error (string may be shorter than N)
246///
247/// Stack effect: ( Int -- String Int )
248///
249/// Like `io.read-line+`, this returns a result pattern (value + status) to allow
250/// explicit EOF detection. The function name omits the `+` suffix for brevity
251/// since byte-count reads are inherently status-oriented.
252///
253/// Errors are values, not crashes.
254///
255/// This is used for protocols like LSP where message bodies are byte-counted
256/// and don't have trailing newlines.
257///
258/// # UTF-8 Handling
259/// The bytes are interpreted as UTF-8. Invalid UTF-8 sequences are replaced
260/// with the Unicode replacement character (U+FFFD). This is appropriate for
261/// text-based protocols like LSP but may not be suitable for binary data.
262///
263/// # Safety
264/// Stack must have an Int on top. The integer must be non-negative and
265/// not exceed READ_N_MAX_BYTES (10MB).
266#[unsafe(no_mangle)]
267pub unsafe extern "C" fn patch_seq_read_n(stack: Stack) -> Stack {
268    use std::io::Read;
269
270    assert!(!stack.is_null(), "read_n: stack is empty");
271
272    let (stack, value) = unsafe { pop(stack) };
273
274    // Validate input - return error status for invalid input
275    let n = match validate_read_n_count(&value) {
276        Ok(n) => n,
277        Err(_) => {
278            // Invalid input - return empty string and error status
279            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
280            return unsafe { push(stack, Value::Int(0)) };
281        }
282    };
283
284    let stdin = io::stdin();
285    let mut buffer = vec![0u8; n];
286    let mut total_read = 0;
287
288    {
289        let mut handle = stdin.lock();
290        while total_read < n {
291            match handle.read(&mut buffer[total_read..]) {
292                Ok(0) => break, // EOF
293                Ok(bytes_read) => total_read += bytes_read,
294                Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
295                Err(_) => break, // I/O error - stop reading, return what we have
296            }
297        }
298    }
299
300    // Truncate to actual bytes read
301    buffer.truncate(total_read);
302
303    // Convert to String (assuming UTF-8)
304    let s = String::from_utf8_lossy(&buffer).into_owned();
305
306    // Status: 1 if we read all N bytes, 0 otherwise
307    let status = if total_read == n { 1i64 } else { 0i64 };
308
309    let stack = unsafe { push(stack, Value::String(s.into())) };
310    unsafe { push(stack, Value::Int(status)) }
311}
312
313/// Convert an integer to a string
314///
315/// Stack effect: ( Int -- String )
316///
317/// # Safety
318/// Stack must have an Int value on top
319#[unsafe(no_mangle)]
320pub unsafe extern "C" fn patch_seq_int_to_string(stack: Stack) -> Stack {
321    assert!(!stack.is_null(), "int_to_string: stack is empty");
322
323    let (rest, value) = unsafe { pop(stack) };
324
325    match value {
326        Value::Int(n) => unsafe { push(rest, Value::String(n.to_string().into())) },
327        _ => panic!("int_to_string: expected Int on stack, got {:?}", value),
328    }
329}
330
331/// Push a C string literal onto the stack (for compiler-generated code)
332///
333/// Stack effect: ( -- str )
334///
335/// # Safety
336/// The c_str pointer must be valid and null-terminated
337#[unsafe(no_mangle)]
338pub unsafe extern "C" fn patch_seq_push_string(stack: Stack, c_str: *const i8) -> Stack {
339    assert!(!c_str.is_null(), "push_string: null string pointer");
340
341    let s = unsafe {
342        CStr::from_ptr(c_str)
343            .to_str()
344            .expect("push_string: invalid UTF-8 in string literal")
345            .to_owned()
346    };
347
348    unsafe { push(stack, Value::String(s.into())) }
349}
350
351/// Push a C string literal onto the stack as a Symbol (for compiler-generated code)
352///
353/// Stack effect: ( -- symbol )
354///
355/// # Safety
356/// The c_str pointer must be valid and null-terminated
357#[unsafe(no_mangle)]
358pub unsafe extern "C" fn patch_seq_push_symbol(stack: Stack, c_str: *const i8) -> Stack {
359    assert!(!c_str.is_null(), "push_symbol: null string pointer");
360
361    let s = unsafe {
362        CStr::from_ptr(c_str)
363            .to_str()
364            .expect("push_symbol: invalid UTF-8 in symbol literal")
365            .to_owned()
366    };
367
368    unsafe { push(stack, Value::Symbol(s.into())) }
369}
370
371/// Layout of static interned symbol data from LLVM IR
372///
373/// Matches the LLVM IR structure:
374/// `{ ptr, i64 len, i64 capacity, i8 global }`
375///
376/// # Safety Contract
377///
378/// This struct must ONLY be constructed by the compiler in static globals.
379/// Invariants that MUST hold:
380/// - `ptr` points to valid static UTF-8 string data with lifetime `'static`
381/// - `len` matches the actual byte length of the string
382/// - `capacity` MUST be 0 (marks symbol as interned/static)
383/// - `global` MUST be 1 (marks symbol as static allocation)
384///
385/// Violating these invariants causes undefined behavior (memory corruption,
386/// double-free, or null pointer dereference).
387#[repr(C)]
388pub struct InternedSymbolData {
389    ptr: *const u8,
390    len: i64,
391    capacity: i64, // MUST be 0 for interned symbols
392    global: i8,    // MUST be 1 for interned symbols
393}
394
395/// Push an interned symbol onto the stack (Issue #166)
396///
397/// This pushes a compile-time symbol literal that shares static memory.
398/// The SeqString has capacity=0 to mark it as interned (never freed).
399///
400/// Stack effect: ( -- Symbol )
401///
402/// # Safety
403/// The symbol_data pointer must point to a valid static InternedSymbolData structure.
404#[unsafe(no_mangle)]
405pub unsafe extern "C" fn patch_seq_push_interned_symbol(
406    stack: Stack,
407    symbol_data: *const InternedSymbolData,
408) -> Stack {
409    assert!(
410        !symbol_data.is_null(),
411        "push_interned_symbol: null symbol data pointer"
412    );
413
414    let data = unsafe { &*symbol_data };
415
416    // Validate interned symbol invariants - these are safety-critical
417    // and must run in release builds to prevent memory corruption
418    assert!(!data.ptr.is_null(), "Interned symbol data pointer is null");
419    assert_eq!(data.capacity, 0, "Interned symbols must have capacity=0");
420    assert_ne!(data.global, 0, "Interned symbols must have global=1");
421
422    // Create SeqString that points to static data
423    // capacity=0 marks it as interned (Drop will skip deallocation)
424    // Safety: from_raw_parts requires valid ptr/len/capacity, which we trust
425    // from the LLVM-generated static data
426    let seq_str = unsafe {
427        crate::seqstring::SeqString::from_raw_parts(
428            data.ptr,
429            data.len as usize,
430            data.capacity as usize, // 0 for interned
431            data.global != 0,       // true for interned
432        )
433    };
434
435    unsafe { push(stack, Value::Symbol(seq_str)) }
436}
437
438/// Push a SeqString value onto the stack
439///
440/// This is used when we already have a SeqString (e.g., from closures).
441/// Unlike push_string which takes a C string, this takes a SeqString by value.
442///
443/// Stack effect: ( -- String )
444///
445/// # Safety
446/// The SeqString must be valid. This is only called from LLVM-generated code, not actual C code.
447#[allow(improper_ctypes_definitions)]
448#[unsafe(no_mangle)]
449pub unsafe extern "C" fn patch_seq_push_seqstring(
450    stack: Stack,
451    seq_str: crate::seqstring::SeqString,
452) -> Stack {
453    unsafe { push(stack, Value::String(seq_str)) }
454}
455
456/// Convert a Symbol to a String
457///
458/// Stack effect: ( Symbol -- String )
459///
460/// # Safety
461/// Stack must have a Symbol on top.
462#[unsafe(no_mangle)]
463pub unsafe extern "C" fn patch_seq_symbol_to_string(stack: Stack) -> Stack {
464    assert!(!stack.is_null(), "symbol_to_string: stack is empty");
465
466    let (rest, value) = unsafe { pop(stack) };
467
468    match value {
469        Value::Symbol(s) => unsafe { push(rest, Value::String(s)) },
470        _ => panic!(
471            "symbol_to_string: expected Symbol on stack, got {:?}",
472            value
473        ),
474    }
475}
476
477/// Convert a String to a Symbol
478///
479/// Stack effect: ( String -- Symbol )
480///
481/// # Safety
482/// Stack must have a String on top.
483#[unsafe(no_mangle)]
484pub unsafe extern "C" fn patch_seq_string_to_symbol(stack: Stack) -> Stack {
485    assert!(!stack.is_null(), "string_to_symbol: stack is empty");
486
487    let (rest, value) = unsafe { pop(stack) };
488
489    match value {
490        Value::String(s) => unsafe { push(rest, Value::Symbol(s)) },
491        _ => panic!(
492            "string_to_symbol: expected String on stack, got {:?}",
493            value
494        ),
495    }
496}
497
498/// Exit the program with a status code
499///
500/// Stack effect: ( exit_code -- )
501///
502/// # Safety
503/// Stack must have an Int on top. Never returns.
504#[unsafe(no_mangle)]
505pub unsafe extern "C" fn patch_seq_exit_op(stack: Stack) -> ! {
506    assert!(!stack.is_null(), "exit_op: stack is empty");
507
508    let (_rest, value) = unsafe { pop(stack) };
509
510    match value {
511        Value::Int(code) => {
512            // Explicitly validate exit code is in Unix-compatible range
513            if !(EXIT_CODE_MIN..=EXIT_CODE_MAX).contains(&code) {
514                panic!(
515                    "exit_op: exit code must be in range {}-{}, got {}",
516                    EXIT_CODE_MIN, EXIT_CODE_MAX, code
517                );
518            }
519            std::process::exit(code as i32);
520        }
521        _ => panic!("exit_op: expected Int on stack, got {:?}", value),
522    }
523}
524
525// Public re-exports with short names for internal use
526pub use patch_seq_exit_op as exit_op;
527pub use patch_seq_int_to_string as int_to_string;
528pub use patch_seq_push_interned_symbol as push_interned_symbol;
529pub use patch_seq_push_seqstring as push_seqstring;
530pub use patch_seq_push_string as push_string;
531pub use patch_seq_push_symbol as push_symbol;
532pub use patch_seq_read_line as read_line;
533pub use patch_seq_read_line_plus as read_line_plus;
534pub use patch_seq_read_n as read_n;
535pub use patch_seq_string_to_symbol as string_to_symbol;
536pub use patch_seq_symbol_to_string as symbol_to_string;
537pub use patch_seq_write as write;
538pub use patch_seq_write_line as write_line;
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543    use crate::value::Value;
544    use std::ffi::CString;
545
546    #[test]
547    fn test_write_line() {
548        unsafe {
549            let stack = crate::stack::alloc_test_stack();
550            let stack = push(stack, Value::String("Hello, World!".into()));
551            let _stack = write_line(stack);
552        }
553    }
554
555    #[test]
556    fn test_write() {
557        unsafe {
558            let stack = crate::stack::alloc_test_stack();
559            let stack = push(stack, Value::String("no newline".into()));
560            let _stack = write(stack);
561        }
562    }
563
564    #[test]
565    fn test_push_string() {
566        unsafe {
567            let stack = crate::stack::alloc_test_stack();
568            let test_str = CString::new("Test").unwrap();
569            let stack = push_string(stack, test_str.as_ptr());
570
571            let (_stack, value) = pop(stack);
572            assert_eq!(value, Value::String("Test".into()));
573        }
574    }
575
576    #[test]
577    fn test_empty_string() {
578        unsafe {
579            // Empty string should be handled correctly
580            let stack = crate::stack::alloc_test_stack();
581            let empty_str = CString::new("").unwrap();
582            let stack = push_string(stack, empty_str.as_ptr());
583
584            let (_stack, value) = pop(stack);
585            assert_eq!(value, Value::String("".into()));
586
587            // Write empty string should work without panic
588            let stack = push(stack, Value::String("".into()));
589            let _stack = write_line(stack);
590        }
591    }
592
593    #[test]
594    fn test_unicode_strings() {
595        unsafe {
596            // Test that Unicode strings are handled correctly
597            let stack = crate::stack::alloc_test_stack();
598            let unicode_str = CString::new("Hello, δΈ–η•Œ! 🌍").unwrap();
599            let stack = push_string(stack, unicode_str.as_ptr());
600
601            let (_stack, value) = pop(stack);
602            assert_eq!(value, Value::String("Hello, δΈ–η•Œ! 🌍".into()));
603        }
604    }
605
606    // =========================================================================
607    // read_n validation tests
608    // =========================================================================
609
610    #[test]
611    fn test_read_n_valid_input() {
612        assert_eq!(super::validate_read_n_count(&Value::Int(0)), Ok(0));
613        assert_eq!(super::validate_read_n_count(&Value::Int(100)), Ok(100));
614        assert_eq!(
615            super::validate_read_n_count(&Value::Int(1024 * 1024)), // 1MB
616            Ok(1024 * 1024)
617        );
618    }
619
620    #[test]
621    fn test_read_n_negative_input() {
622        let result = super::validate_read_n_count(&Value::Int(-1));
623        assert!(result.is_err());
624        assert!(result.unwrap_err().contains("must be non-negative"));
625    }
626
627    #[test]
628    fn test_read_n_large_negative_input() {
629        let result = super::validate_read_n_count(&Value::Int(i64::MIN));
630        assert!(result.is_err());
631        assert!(result.unwrap_err().contains("must be non-negative"));
632    }
633
634    #[test]
635    fn test_read_n_exceeds_max_bytes() {
636        let result = super::validate_read_n_count(&Value::Int(super::READ_N_MAX_BYTES + 1));
637        assert!(result.is_err());
638        assert!(result.unwrap_err().contains("exceeds maximum allowed"));
639    }
640
641    #[test]
642    fn test_read_n_at_max_bytes_ok() {
643        // Exactly at the limit should be OK
644        let result = super::validate_read_n_count(&Value::Int(super::READ_N_MAX_BYTES));
645        assert_eq!(result, Ok(super::READ_N_MAX_BYTES as usize));
646    }
647
648    #[test]
649    fn test_read_n_wrong_type_string() {
650        let result = super::validate_read_n_count(&Value::String("not an int".into()));
651        assert!(result.is_err());
652        assert!(result.unwrap_err().contains("expected Int"));
653    }
654
655    #[test]
656    fn test_read_n_wrong_type_bool() {
657        let result = super::validate_read_n_count(&Value::Bool(true));
658        assert!(result.is_err());
659        assert!(result.unwrap_err().contains("expected Int"));
660    }
661}