seq_runtime/
io.rs

1//! I/O Operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//!
5//! # Safety Contract
6//!
7//! **IMPORTANT:** These functions are designed to be called ONLY by compiler-generated code,
8//! not by end users or arbitrary C code. The compiler is responsible for:
9//!
10//! - Ensuring stack has correct types (verified by type checker)
11//! - Passing valid, null-terminated C strings to `push_string`
12//! - Never calling these functions directly from user code
13//!
14//! # String Handling
15//!
16//! String literals from the compiler must be valid UTF-8 C strings (null-terminated).
17//! Currently, each string literal is allocated as an owned `String`. See
18//! `docs/STRING_INTERNING_DESIGN.md` for discussion of future optimizations
19//! (interning, static references, etc.).
20
21use crate::stack::{Stack, pop, push};
22use crate::value::Value;
23use std::ffi::CStr;
24use std::io;
25use std::sync::LazyLock;
26
27/// Coroutine-aware stdout mutex.
28/// Uses may::sync::Mutex which yields the coroutine when contended instead of blocking the OS thread.
29/// By serializing access to stdout, we prevent RefCell borrow panics that occur when multiple
30/// coroutines on the same thread try to access stdout's internal RefCell concurrently.
31static STDOUT_MUTEX: LazyLock<may::sync::Mutex<()>> = LazyLock::new(|| may::sync::Mutex::new(()));
32
33/// Valid exit code range for Unix compatibility
34const EXIT_CODE_MIN: i64 = 0;
35const EXIT_CODE_MAX: i64 = 255;
36
37/// Write a string to stdout followed by a newline
38///
39/// Stack effect: ( str -- )
40///
41/// # Safety
42/// Stack must have a String value on top
43///
44/// # Concurrency
45/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
46/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
47/// This prevents RefCell borrow panics when multiple strands write concurrently.
48#[unsafe(no_mangle)]
49pub unsafe extern "C" fn patch_seq_write_line(stack: Stack) -> Stack {
50    assert!(!stack.is_null(), "write_line: stack is empty");
51
52    let (rest, value) = unsafe { pop(stack) };
53
54    match value {
55        Value::String(s) => {
56            // Acquire coroutine-aware mutex (yields if contended, doesn't block)
57            // This serializes access to stdout
58            let _guard = STDOUT_MUTEX.lock().unwrap();
59
60            // Write directly to fd 1 using libc to avoid Rust's std::io::stdout() RefCell.
61            // Rust's standard I/O uses RefCell which panics on concurrent access from
62            // multiple coroutines on the same thread.
63            let str_slice = s.as_str();
64            let newline = b"\n";
65            unsafe {
66                libc::write(
67                    1,
68                    str_slice.as_ptr() as *const libc::c_void,
69                    str_slice.len(),
70                );
71                libc::write(1, newline.as_ptr() as *const libc::c_void, newline.len());
72            }
73
74            rest
75        }
76        _ => panic!("write_line: expected String on stack, got {:?}", value),
77    }
78}
79
80/// Read a line from stdin
81///
82/// Returns the line and a success flag:
83/// - ( line true ) on success (line includes trailing newline)
84/// - ( "" false ) on I/O error or EOF
85///
86/// Use `string.chomp` to remove trailing newlines if needed.
87///
88/// # Line Ending Normalization
89///
90/// Line endings are normalized to `\n` regardless of platform. Windows-style
91/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
92/// across different operating systems.
93///
94/// Stack effect: ( -- String Bool )
95///
96/// Errors are values, not crashes.
97///
98/// # Safety
99/// Always safe to call
100#[unsafe(no_mangle)]
101pub unsafe extern "C" fn patch_seq_read_line(stack: Stack) -> Stack {
102    use std::io::BufRead;
103
104    let stdin = io::stdin();
105    let mut line = String::new();
106
107    match stdin.lock().read_line(&mut line) {
108        Ok(0) => {
109            // EOF - return empty string and false
110            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
111            unsafe { push(stack, Value::Bool(false)) }
112        }
113        Ok(_) => {
114            // Normalize line endings: \r\n -> \n
115            if line.ends_with("\r\n") {
116                line.pop(); // remove \n
117                line.pop(); // remove \r
118                line.push('\n'); // add back \n
119            }
120            let stack = unsafe { push(stack, Value::String(line.into())) };
121            unsafe { push(stack, Value::Bool(true)) }
122        }
123        Err(_) => {
124            // I/O error - return empty string and false
125            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
126            unsafe { push(stack, Value::Bool(false)) }
127        }
128    }
129}
130
131/// Read a line from stdin with explicit EOF detection
132///
133/// Returns the line and a status flag:
134/// - ( line 1 ) on success (line includes trailing newline)
135/// - ( "" 0 ) at EOF or I/O error
136///
137/// Stack effect: ( -- String Int )
138///
139/// The `+` suffix indicates this returns a result pattern (value + status).
140/// Errors are values, not crashes.
141///
142/// # Line Ending Normalization
143///
144/// Line endings are normalized to `\n` regardless of platform. Windows-style
145/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
146/// across different operating systems.
147///
148/// # Safety
149/// Always safe to call
150#[unsafe(no_mangle)]
151pub unsafe extern "C" fn patch_seq_read_line_plus(stack: Stack) -> Stack {
152    use std::io::BufRead;
153
154    let stdin = io::stdin();
155    let mut line = String::new();
156
157    match stdin.lock().read_line(&mut line) {
158        Ok(0) => {
159            // EOF
160            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
161            unsafe { push(stack, Value::Int(0)) }
162        }
163        Ok(_) => {
164            // Normalize line endings: \r\n -> \n
165            if line.ends_with("\r\n") {
166                line.pop(); // remove \n
167                line.pop(); // remove \r
168                line.push('\n'); // add back \n
169            }
170            let stack = unsafe { push(stack, Value::String(line.into())) };
171            unsafe { push(stack, Value::Int(1)) }
172        }
173        Err(_) => {
174            // I/O error - treat like EOF
175            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
176            unsafe { push(stack, Value::Int(0)) }
177        }
178    }
179}
180
181/// Maximum bytes allowed for a single read_n call (10MB)
182/// This prevents accidental or malicious massive memory allocations.
183/// LSP messages are typically < 1MB, so 10MB provides generous headroom.
184const READ_N_MAX_BYTES: i64 = 10 * 1024 * 1024;
185
186/// Validates and extracts the byte count from a Value for read_n.
187/// Returns Ok(usize) on success, Err(message) on validation failure.
188fn validate_read_n_count(value: &Value) -> Result<usize, String> {
189    match value {
190        Value::Int(n) if *n < 0 => Err(format!(
191            "read_n: byte count must be non-negative, got {}",
192            n
193        )),
194        Value::Int(n) if *n > READ_N_MAX_BYTES => Err(format!(
195            "read_n: byte count {} exceeds maximum allowed ({})",
196            n, READ_N_MAX_BYTES
197        )),
198        Value::Int(n) => Ok(*n as usize),
199        _ => Err(format!("read_n: expected Int on stack, got {:?}", value)),
200    }
201}
202
203/// Read exactly N bytes from stdin
204///
205/// Returns the bytes read and a status flag:
206/// - ( string 1 ) on success (read all N bytes)
207/// - ( string 0 ) at EOF, partial read, or error (string may be shorter than N)
208///
209/// Stack effect: ( Int -- String Int )
210///
211/// Like `io.read-line+`, this returns a result pattern (value + status) to allow
212/// explicit EOF detection. The function name omits the `+` suffix for brevity
213/// since byte-count reads are inherently status-oriented.
214///
215/// Errors are values, not crashes.
216///
217/// This is used for protocols like LSP where message bodies are byte-counted
218/// and don't have trailing newlines.
219///
220/// # UTF-8 Handling
221/// The bytes are interpreted as UTF-8. Invalid UTF-8 sequences are replaced
222/// with the Unicode replacement character (U+FFFD). This is appropriate for
223/// text-based protocols like LSP but may not be suitable for binary data.
224///
225/// # Safety
226/// Stack must have an Int on top. The integer must be non-negative and
227/// not exceed READ_N_MAX_BYTES (10MB).
228#[unsafe(no_mangle)]
229pub unsafe extern "C" fn patch_seq_read_n(stack: Stack) -> Stack {
230    use std::io::Read;
231
232    assert!(!stack.is_null(), "read_n: stack is empty");
233
234    let (stack, value) = unsafe { pop(stack) };
235
236    // Validate input - return error status for invalid input
237    let n = match validate_read_n_count(&value) {
238        Ok(n) => n,
239        Err(_) => {
240            // Invalid input - return empty string and error status
241            let stack = unsafe { push(stack, Value::String("".to_string().into())) };
242            return unsafe { push(stack, Value::Int(0)) };
243        }
244    };
245
246    let stdin = io::stdin();
247    let mut buffer = vec![0u8; n];
248    let mut total_read = 0;
249
250    {
251        let mut handle = stdin.lock();
252        while total_read < n {
253            match handle.read(&mut buffer[total_read..]) {
254                Ok(0) => break, // EOF
255                Ok(bytes_read) => total_read += bytes_read,
256                Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
257                Err(_) => break, // I/O error - stop reading, return what we have
258            }
259        }
260    }
261
262    // Truncate to actual bytes read
263    buffer.truncate(total_read);
264
265    // Convert to String (assuming UTF-8)
266    let s = String::from_utf8_lossy(&buffer).into_owned();
267
268    // Status: 1 if we read all N bytes, 0 otherwise
269    let status = if total_read == n { 1i64 } else { 0i64 };
270
271    let stack = unsafe { push(stack, Value::String(s.into())) };
272    unsafe { push(stack, Value::Int(status)) }
273}
274
275/// Convert an integer to a string
276///
277/// Stack effect: ( Int -- String )
278///
279/// # Safety
280/// Stack must have an Int value on top
281#[unsafe(no_mangle)]
282pub unsafe extern "C" fn patch_seq_int_to_string(stack: Stack) -> Stack {
283    assert!(!stack.is_null(), "int_to_string: stack is empty");
284
285    let (rest, value) = unsafe { pop(stack) };
286
287    match value {
288        Value::Int(n) => unsafe { push(rest, Value::String(n.to_string().into())) },
289        _ => panic!("int_to_string: expected Int on stack, got {:?}", value),
290    }
291}
292
293/// Push a C string literal onto the stack (for compiler-generated code)
294///
295/// Stack effect: ( -- str )
296///
297/// # Safety
298/// The c_str pointer must be valid and null-terminated
299#[unsafe(no_mangle)]
300pub unsafe extern "C" fn patch_seq_push_string(stack: Stack, c_str: *const i8) -> Stack {
301    assert!(!c_str.is_null(), "push_string: null string pointer");
302
303    let s = unsafe {
304        CStr::from_ptr(c_str)
305            .to_str()
306            .expect("push_string: invalid UTF-8 in string literal")
307            .to_owned()
308    };
309
310    unsafe { push(stack, Value::String(s.into())) }
311}
312
313/// Push a SeqString value onto the stack
314///
315/// This is used when we already have a SeqString (e.g., from closures).
316/// Unlike push_string which takes a C string, this takes a SeqString by value.
317///
318/// Stack effect: ( -- String )
319///
320/// # Safety
321/// The SeqString must be valid. This is only called from LLVM-generated code, not actual C code.
322#[allow(improper_ctypes_definitions)]
323#[unsafe(no_mangle)]
324pub unsafe extern "C" fn patch_seq_push_seqstring(
325    stack: Stack,
326    seq_str: crate::seqstring::SeqString,
327) -> Stack {
328    unsafe { push(stack, Value::String(seq_str)) }
329}
330
331/// Exit the program with a status code
332///
333/// Stack effect: ( exit_code -- )
334///
335/// # Safety
336/// Stack must have an Int on top. Never returns.
337#[unsafe(no_mangle)]
338pub unsafe extern "C" fn patch_seq_exit_op(stack: Stack) -> ! {
339    assert!(!stack.is_null(), "exit_op: stack is empty");
340
341    let (_rest, value) = unsafe { pop(stack) };
342
343    match value {
344        Value::Int(code) => {
345            // Explicitly validate exit code is in Unix-compatible range
346            if !(EXIT_CODE_MIN..=EXIT_CODE_MAX).contains(&code) {
347                panic!(
348                    "exit_op: exit code must be in range {}-{}, got {}",
349                    EXIT_CODE_MIN, EXIT_CODE_MAX, code
350                );
351            }
352            std::process::exit(code as i32);
353        }
354        _ => panic!("exit_op: expected Int on stack, got {:?}", value),
355    }
356}
357
358// Public re-exports with short names for internal use
359pub use patch_seq_exit_op as exit_op;
360pub use patch_seq_int_to_string as int_to_string;
361pub use patch_seq_push_seqstring as push_seqstring;
362pub use patch_seq_push_string as push_string;
363pub use patch_seq_read_line as read_line;
364pub use patch_seq_read_line_plus as read_line_plus;
365pub use patch_seq_read_n as read_n;
366pub use patch_seq_write_line as write_line;
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371    use crate::value::Value;
372    use std::ffi::CString;
373
374    #[test]
375    fn test_write_line() {
376        unsafe {
377            let stack = crate::stack::alloc_test_stack();
378            let stack = push(stack, Value::String("Hello, World!".into()));
379            let _stack = write_line(stack);
380        }
381    }
382
383    #[test]
384    fn test_push_string() {
385        unsafe {
386            let stack = crate::stack::alloc_test_stack();
387            let test_str = CString::new("Test").unwrap();
388            let stack = push_string(stack, test_str.as_ptr());
389
390            let (_stack, value) = pop(stack);
391            assert_eq!(value, Value::String("Test".into()));
392        }
393    }
394
395    #[test]
396    fn test_empty_string() {
397        unsafe {
398            // Empty string should be handled correctly
399            let stack = crate::stack::alloc_test_stack();
400            let empty_str = CString::new("").unwrap();
401            let stack = push_string(stack, empty_str.as_ptr());
402
403            let (_stack, value) = pop(stack);
404            assert_eq!(value, Value::String("".into()));
405
406            // Write empty string should work without panic
407            let stack = push(stack, Value::String("".into()));
408            let _stack = write_line(stack);
409        }
410    }
411
412    #[test]
413    fn test_unicode_strings() {
414        unsafe {
415            // Test that Unicode strings are handled correctly
416            let stack = crate::stack::alloc_test_stack();
417            let unicode_str = CString::new("Hello, δΈ–η•Œ! 🌍").unwrap();
418            let stack = push_string(stack, unicode_str.as_ptr());
419
420            let (_stack, value) = pop(stack);
421            assert_eq!(value, Value::String("Hello, δΈ–η•Œ! 🌍".into()));
422        }
423    }
424
425    // =========================================================================
426    // read_n validation tests
427    // =========================================================================
428
429    #[test]
430    fn test_read_n_valid_input() {
431        assert_eq!(super::validate_read_n_count(&Value::Int(0)), Ok(0));
432        assert_eq!(super::validate_read_n_count(&Value::Int(100)), Ok(100));
433        assert_eq!(
434            super::validate_read_n_count(&Value::Int(1024 * 1024)), // 1MB
435            Ok(1024 * 1024)
436        );
437    }
438
439    #[test]
440    fn test_read_n_negative_input() {
441        let result = super::validate_read_n_count(&Value::Int(-1));
442        assert!(result.is_err());
443        assert!(result.unwrap_err().contains("must be non-negative"));
444    }
445
446    #[test]
447    fn test_read_n_large_negative_input() {
448        let result = super::validate_read_n_count(&Value::Int(i64::MIN));
449        assert!(result.is_err());
450        assert!(result.unwrap_err().contains("must be non-negative"));
451    }
452
453    #[test]
454    fn test_read_n_exceeds_max_bytes() {
455        let result = super::validate_read_n_count(&Value::Int(super::READ_N_MAX_BYTES + 1));
456        assert!(result.is_err());
457        assert!(result.unwrap_err().contains("exceeds maximum allowed"));
458    }
459
460    #[test]
461    fn test_read_n_at_max_bytes_ok() {
462        // Exactly at the limit should be OK
463        let result = super::validate_read_n_count(&Value::Int(super::READ_N_MAX_BYTES));
464        assert_eq!(result, Ok(super::READ_N_MAX_BYTES as usize));
465    }
466
467    #[test]
468    fn test_read_n_wrong_type_string() {
469        let result = super::validate_read_n_count(&Value::String("not an int".into()));
470        assert!(result.is_err());
471        assert!(result.unwrap_err().contains("expected Int"));
472    }
473
474    #[test]
475    fn test_read_n_wrong_type_bool() {
476        let result = super::validate_read_n_count(&Value::Bool(true));
477        assert!(result.is_err());
478        assert!(result.unwrap_err().contains("expected Int"));
479    }
480}