seq_runtime/
io.rs

1//! I/O Operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//!
5//! # Safety Contract
6//!
7//! **IMPORTANT:** These functions are designed to be called ONLY by compiler-generated code,
8//! not by end users or arbitrary C code. The compiler is responsible for:
9//!
10//! - Ensuring stack has correct types (verified by type checker)
11//! - Passing valid, null-terminated C strings to `push_string`
12//! - Never calling these functions directly from user code
13//!
14//! # String Handling
15//!
16//! String literals from the compiler must be valid UTF-8 C strings (null-terminated).
17//! Currently, each string literal is allocated as an owned `String`. See
18//! `docs/STRING_INTERNING_DESIGN.md` for discussion of future optimizations
19//! (interning, static references, etc.).
20
21use crate::stack::{Stack, pop, push};
22use crate::value::Value;
23use std::ffi::CStr;
24use std::io;
25use std::sync::LazyLock;
26
27/// Coroutine-aware stdout mutex.
28/// Uses may::sync::Mutex which yields the coroutine when contended instead of blocking the OS thread.
29/// By serializing access to stdout, we prevent RefCell borrow panics that occur when multiple
30/// coroutines on the same thread try to access stdout's internal RefCell concurrently.
31static STDOUT_MUTEX: LazyLock<may::sync::Mutex<()>> = LazyLock::new(|| may::sync::Mutex::new(()));
32
33/// Valid exit code range for Unix compatibility
34const EXIT_CODE_MIN: i64 = 0;
35const EXIT_CODE_MAX: i64 = 255;
36
37/// Write a string to stdout followed by a newline
38///
39/// Stack effect: ( str -- )
40///
41/// # Safety
42/// Stack must have a String value on top
43///
44/// # Concurrency
45/// Uses may::sync::Mutex to serialize stdout writes from multiple strands.
46/// When the mutex is contended, the strand yields to the scheduler (doesn't block the OS thread).
47/// This prevents RefCell borrow panics when multiple strands write concurrently.
48#[unsafe(no_mangle)]
49pub unsafe extern "C" fn patch_seq_write_line(stack: Stack) -> Stack {
50    assert!(!stack.is_null(), "write_line: stack is empty");
51
52    let (rest, value) = unsafe { pop(stack) };
53
54    match value {
55        Value::String(s) => {
56            // Acquire coroutine-aware mutex (yields if contended, doesn't block)
57            // This serializes access to stdout
58            let _guard = STDOUT_MUTEX.lock().unwrap();
59
60            // Write directly to fd 1 using libc to avoid Rust's std::io::stdout() RefCell.
61            // Rust's standard I/O uses RefCell which panics on concurrent access from
62            // multiple coroutines on the same thread.
63            let str_slice = s.as_str();
64            let newline = b"\n";
65            unsafe {
66                libc::write(
67                    1,
68                    str_slice.as_ptr() as *const libc::c_void,
69                    str_slice.len(),
70                );
71                libc::write(1, newline.as_ptr() as *const libc::c_void, newline.len());
72            }
73
74            rest
75        }
76        _ => panic!("write_line: expected String on stack, got {:?}", value),
77    }
78}
79
80/// Read a line from stdin
81///
82/// Returns the line including trailing newline.
83/// Returns empty string "" at EOF.
84/// Use `string-chomp` to remove trailing newlines if needed.
85///
86/// # Line Ending Normalization
87///
88/// Line endings are normalized to `\n` regardless of platform. Windows-style
89/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
90/// across different operating systems.
91///
92/// Stack effect: ( -- str )
93///
94/// # Safety
95/// Always safe to call
96#[unsafe(no_mangle)]
97pub unsafe extern "C" fn patch_seq_read_line(stack: Stack) -> Stack {
98    use std::io::BufRead;
99
100    let stdin = io::stdin();
101    let mut line = String::new();
102
103    stdin
104        .lock()
105        .read_line(&mut line)
106        .expect("read_line: failed to read from stdin (I/O error or EOF)");
107
108    // Normalize line endings: \r\n -> \n
109    if line.ends_with("\r\n") {
110        line.pop(); // remove \n
111        line.pop(); // remove \r
112        line.push('\n'); // add back \n
113    }
114
115    unsafe { push(stack, Value::String(line.into())) }
116}
117
118/// Read a line from stdin with explicit EOF detection
119///
120/// Returns the line and a status flag:
121/// - ( line 1 ) on success (line includes trailing newline)
122/// - ( "" 0 ) at EOF
123///
124/// Stack effect: ( -- String Int )
125///
126/// The `+` suffix indicates this returns a result pattern (value + status).
127///
128/// # Line Ending Normalization
129///
130/// Line endings are normalized to `\n` regardless of platform. Windows-style
131/// `\r\n` endings are converted to `\n`. This ensures consistent behavior
132/// across different operating systems.
133///
134/// # Safety
135/// Always safe to call
136#[unsafe(no_mangle)]
137pub unsafe extern "C" fn patch_seq_read_line_plus(stack: Stack) -> Stack {
138    use std::io::BufRead;
139
140    let stdin = io::stdin();
141    let mut line = String::new();
142
143    let bytes_read = stdin
144        .lock()
145        .read_line(&mut line)
146        .expect("read_line_safe: failed to read from stdin");
147
148    // Normalize line endings: \r\n -> \n
149    if line.ends_with("\r\n") {
150        line.pop(); // remove \n
151        line.pop(); // remove \r
152        line.push('\n'); // add back \n
153    }
154
155    // bytes_read == 0 means EOF
156    let status = if bytes_read > 0 { 1i64 } else { 0i64 };
157
158    let stack = unsafe { push(stack, Value::String(line.into())) };
159    unsafe { push(stack, Value::Int(status)) }
160}
161
162/// Maximum bytes allowed for a single read_n call (10MB)
163/// This prevents accidental or malicious massive memory allocations.
164/// LSP messages are typically < 1MB, so 10MB provides generous headroom.
165const READ_N_MAX_BYTES: i64 = 10 * 1024 * 1024;
166
167/// Validates and extracts the byte count from a Value for read_n.
168/// Returns Ok(usize) on success, Err(message) on validation failure.
169fn validate_read_n_count(value: &Value) -> Result<usize, String> {
170    match value {
171        Value::Int(n) if *n < 0 => Err(format!(
172            "read_n: byte count must be non-negative, got {}",
173            n
174        )),
175        Value::Int(n) if *n > READ_N_MAX_BYTES => Err(format!(
176            "read_n: byte count {} exceeds maximum allowed ({})",
177            n, READ_N_MAX_BYTES
178        )),
179        Value::Int(n) => Ok(*n as usize),
180        _ => Err(format!("read_n: expected Int on stack, got {:?}", value)),
181    }
182}
183
184/// Read exactly N bytes from stdin
185///
186/// Returns the bytes read and a status flag:
187/// - ( string 1 ) on success (read all N bytes)
188/// - ( string 0 ) at EOF or partial read (string may be shorter than N)
189///
190/// Stack effect: ( Int -- String Int )
191///
192/// Like `io.read-line+`, this returns a result pattern (value + status) to allow
193/// explicit EOF detection. The function name omits the `+` suffix for brevity
194/// since byte-count reads are inherently status-oriented.
195///
196/// This is used for protocols like LSP where message bodies are byte-counted
197/// and don't have trailing newlines.
198///
199/// # UTF-8 Handling
200/// The bytes are interpreted as UTF-8. Invalid UTF-8 sequences are replaced
201/// with the Unicode replacement character (U+FFFD). This is appropriate for
202/// text-based protocols like LSP but may not be suitable for binary data.
203///
204/// # Safety
205/// Stack must have an Int on top. The integer must be non-negative and
206/// not exceed READ_N_MAX_BYTES (10MB).
207#[unsafe(no_mangle)]
208pub unsafe extern "C" fn patch_seq_read_n(stack: Stack) -> Stack {
209    use std::io::Read;
210
211    assert!(!stack.is_null(), "read_n: stack is empty");
212
213    let (stack, value) = unsafe { pop(stack) };
214    let n = validate_read_n_count(&value).unwrap_or_else(|e| panic!("{}", e));
215
216    let stdin = io::stdin();
217    let mut buffer = vec![0u8; n];
218    let mut total_read = 0;
219
220    {
221        let mut handle = stdin.lock();
222        while total_read < n {
223            match handle.read(&mut buffer[total_read..]) {
224                Ok(0) => break, // EOF
225                Ok(bytes_read) => total_read += bytes_read,
226                Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
227                Err(e) => panic!("read_n: failed to read from stdin: {}", e),
228            }
229        }
230    }
231
232    // Truncate to actual bytes read
233    buffer.truncate(total_read);
234
235    // Convert to String (assuming UTF-8)
236    let s = String::from_utf8_lossy(&buffer).into_owned();
237
238    // Status: 1 if we read all N bytes, 0 otherwise
239    let status = if total_read == n { 1i64 } else { 0i64 };
240
241    let stack = unsafe { push(stack, Value::String(s.into())) };
242    unsafe { push(stack, Value::Int(status)) }
243}
244
245/// Convert an integer to a string
246///
247/// Stack effect: ( Int -- String )
248///
249/// # Safety
250/// Stack must have an Int value on top
251#[unsafe(no_mangle)]
252pub unsafe extern "C" fn patch_seq_int_to_string(stack: Stack) -> Stack {
253    assert!(!stack.is_null(), "int_to_string: stack is empty");
254
255    let (rest, value) = unsafe { pop(stack) };
256
257    match value {
258        Value::Int(n) => unsafe { push(rest, Value::String(n.to_string().into())) },
259        _ => panic!("int_to_string: expected Int on stack, got {:?}", value),
260    }
261}
262
263/// Push a C string literal onto the stack (for compiler-generated code)
264///
265/// Stack effect: ( -- str )
266///
267/// # Safety
268/// The c_str pointer must be valid and null-terminated
269#[unsafe(no_mangle)]
270pub unsafe extern "C" fn patch_seq_push_string(stack: Stack, c_str: *const i8) -> Stack {
271    assert!(!c_str.is_null(), "push_string: null string pointer");
272
273    let s = unsafe {
274        CStr::from_ptr(c_str)
275            .to_str()
276            .expect("push_string: invalid UTF-8 in string literal")
277            .to_owned()
278    };
279
280    unsafe { push(stack, Value::String(s.into())) }
281}
282
283/// Push a SeqString value onto the stack
284///
285/// This is used when we already have a SeqString (e.g., from closures).
286/// Unlike push_string which takes a C string, this takes a SeqString by value.
287///
288/// Stack effect: ( -- String )
289///
290/// # Safety
291/// The SeqString must be valid. This is only called from LLVM-generated code, not actual C code.
292#[allow(improper_ctypes_definitions)]
293#[unsafe(no_mangle)]
294pub unsafe extern "C" fn patch_seq_push_seqstring(
295    stack: Stack,
296    seq_str: crate::seqstring::SeqString,
297) -> Stack {
298    unsafe { push(stack, Value::String(seq_str)) }
299}
300
301/// Exit the program with a status code
302///
303/// Stack effect: ( exit_code -- )
304///
305/// # Safety
306/// Stack must have an Int on top. Never returns.
307#[unsafe(no_mangle)]
308pub unsafe extern "C" fn patch_seq_exit_op(stack: Stack) -> ! {
309    assert!(!stack.is_null(), "exit_op: stack is empty");
310
311    let (_rest, value) = unsafe { pop(stack) };
312
313    match value {
314        Value::Int(code) => {
315            // Explicitly validate exit code is in Unix-compatible range
316            if !(EXIT_CODE_MIN..=EXIT_CODE_MAX).contains(&code) {
317                panic!(
318                    "exit_op: exit code must be in range {}-{}, got {}",
319                    EXIT_CODE_MIN, EXIT_CODE_MAX, code
320                );
321            }
322            std::process::exit(code as i32);
323        }
324        _ => panic!("exit_op: expected Int on stack, got {:?}", value),
325    }
326}
327
328// Public re-exports with short names for internal use
329pub use patch_seq_exit_op as exit_op;
330pub use patch_seq_int_to_string as int_to_string;
331pub use patch_seq_push_seqstring as push_seqstring;
332pub use patch_seq_push_string as push_string;
333pub use patch_seq_read_line as read_line;
334pub use patch_seq_read_line_plus as read_line_plus;
335pub use patch_seq_read_n as read_n;
336pub use patch_seq_write_line as write_line;
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use crate::value::Value;
342    use std::ffi::CString;
343
344    #[test]
345    fn test_write_line() {
346        unsafe {
347            let stack = crate::stack::alloc_test_stack();
348            let stack = push(stack, Value::String("Hello, World!".into()));
349            let _stack = write_line(stack);
350        }
351    }
352
353    #[test]
354    fn test_push_string() {
355        unsafe {
356            let stack = crate::stack::alloc_test_stack();
357            let test_str = CString::new("Test").unwrap();
358            let stack = push_string(stack, test_str.as_ptr());
359
360            let (_stack, value) = pop(stack);
361            assert_eq!(value, Value::String("Test".into()));
362        }
363    }
364
365    #[test]
366    fn test_empty_string() {
367        unsafe {
368            // Empty string should be handled correctly
369            let stack = crate::stack::alloc_test_stack();
370            let empty_str = CString::new("").unwrap();
371            let stack = push_string(stack, empty_str.as_ptr());
372
373            let (_stack, value) = pop(stack);
374            assert_eq!(value, Value::String("".into()));
375
376            // Write empty string should work without panic
377            let stack = push(stack, Value::String("".into()));
378            let _stack = write_line(stack);
379        }
380    }
381
382    #[test]
383    fn test_unicode_strings() {
384        unsafe {
385            // Test that Unicode strings are handled correctly
386            let stack = crate::stack::alloc_test_stack();
387            let unicode_str = CString::new("Hello, δΈ–η•Œ! 🌍").unwrap();
388            let stack = push_string(stack, unicode_str.as_ptr());
389
390            let (_stack, value) = pop(stack);
391            assert_eq!(value, Value::String("Hello, δΈ–η•Œ! 🌍".into()));
392        }
393    }
394
395    // =========================================================================
396    // read_n validation tests
397    // =========================================================================
398
399    #[test]
400    fn test_read_n_valid_input() {
401        assert_eq!(super::validate_read_n_count(&Value::Int(0)), Ok(0));
402        assert_eq!(super::validate_read_n_count(&Value::Int(100)), Ok(100));
403        assert_eq!(
404            super::validate_read_n_count(&Value::Int(1024 * 1024)), // 1MB
405            Ok(1024 * 1024)
406        );
407    }
408
409    #[test]
410    fn test_read_n_negative_input() {
411        let result = super::validate_read_n_count(&Value::Int(-1));
412        assert!(result.is_err());
413        assert!(result.unwrap_err().contains("must be non-negative"));
414    }
415
416    #[test]
417    fn test_read_n_large_negative_input() {
418        let result = super::validate_read_n_count(&Value::Int(i64::MIN));
419        assert!(result.is_err());
420        assert!(result.unwrap_err().contains("must be non-negative"));
421    }
422
423    #[test]
424    fn test_read_n_exceeds_max_bytes() {
425        let result = super::validate_read_n_count(&Value::Int(super::READ_N_MAX_BYTES + 1));
426        assert!(result.is_err());
427        assert!(result.unwrap_err().contains("exceeds maximum allowed"));
428    }
429
430    #[test]
431    fn test_read_n_at_max_bytes_ok() {
432        // Exactly at the limit should be OK
433        let result = super::validate_read_n_count(&Value::Int(super::READ_N_MAX_BYTES));
434        assert_eq!(result, Ok(super::READ_N_MAX_BYTES as usize));
435    }
436
437    #[test]
438    fn test_read_n_wrong_type_string() {
439        let result = super::validate_read_n_count(&Value::String("not an int".into()));
440        assert!(result.is_err());
441        assert!(result.unwrap_err().contains("expected Int"));
442    }
443
444    #[test]
445    fn test_read_n_wrong_type_bool() {
446        let result = super::validate_read_n_count(&Value::Bool(true));
447        assert!(result.is_err());
448        assert!(result.unwrap_err().contains("expected Int"));
449    }
450}