runmat_runtime/builtins/io/http/
webread.rs

1//! MATLAB-compatible `webread` builtin for HTTP/HTTPS downloads.
2
3use std::collections::VecDeque;
4use std::time::Duration;
5
6use reqwest::blocking::{Client, RequestBuilder};
7use reqwest::header::{HeaderName, HeaderValue, CONTENT_TYPE};
8use reqwest::Url;
9use runmat_builtins::{CellArray, CharArray, StructValue, Tensor, Value};
10use runmat_macros::runtime_builtin;
11
12use crate::builtins::common::spec::{
13    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
14    ReductionNaN, ResidencyPolicy, ShapeRequirements,
15};
16use crate::builtins::io::json::jsondecode::decode_json_text;
17use crate::gather_if_needed;
18#[cfg(feature = "doc_export")]
19use crate::register_builtin_doc_text;
20use crate::{register_builtin_fusion_spec, register_builtin_gpu_spec};
21
22const DEFAULT_TIMEOUT_SECONDS: f64 = 60.0;
23const DEFAULT_USER_AGENT: &str = "RunMat webread/0.0";
24
25#[cfg(feature = "doc_export")]
26#[allow(clippy::too_many_lines)]
27pub const DOC_MD: &str = r#"---
28title: "webread"
29category: "io/http"
30keywords: ["webread", "http get", "rest client", "json", "https", "api"]
31summary: "Download web content (JSON, text, or binary) over HTTP/HTTPS."
32references:
33  - https://www.mathworks.com/help/matlab/ref/webread.html
34gpu_support:
35  elementwise: false
36  reduction: false
37  precisions: []
38  broadcasting: "none"
39  notes: "webread always gathers gpuArray inputs and executes on the CPU."
40fusion:
41  elementwise: false
42  reduction: false
43  max_inputs: 1
44  constants: "inline"
45requires_feature: null
46tested:
47  unit: "builtins::io::http::webread::tests"
48  integration:
49    - "builtins::io::http::webread::tests::webread_fetches_json_response"
50    - "builtins::io::http::webread::tests::webread_fetches_text_response"
51    - "builtins::io::http::webread::tests::webread_fetches_binary_payload"
52    - "builtins::io::http::webread::tests::webread_appends_query_parameters"
53    - "builtins::io::http::webread::tests::webread_struct_argument_supports_options_and_query"
54    - "builtins::io::http::webread::tests::webread_headerfields_struct_applies_custom_headers"
55    - "builtins::io::http::webread::tests::webread_queryparameters_option_struct"
56---
57
58# What does the `webread` function do in MATLAB / RunMat?
59`webread` issues an HTTP or HTTPS request and returns the response body as a MATLAB-compatible
60value. Textual payloads become character vectors, JSON responses are decoded into structs, cells,
61and numeric arrays, while binary payloads return numeric vectors of bytes.
62
63## How does the `webread` function behave in MATLAB / RunMat?
64- Accepts URLs supplied as character vectors or string scalars; the URL must be absolute.
65- Optional `weboptions`-style fields (either through a struct argument or name-value pairs) control
66  content decoding (`ContentType`), request timeout (`Timeout`), headers (`HeaderFields`), and
67  authentication (`Username`/`Password`). The builtin currently supports the default `GET`
68  request method; use `webwrite` for POST/PUT uploads.
69- Additional name-value pairs that do not match an option are appended to the query string using
70  percent-encoding. A leading struct or cell array argument can also supply query parameters.
71- `ContentType 'auto'` (default) inspects the `Content-Type` response header to choose between JSON,
72  text, or binary decoding. Explicit `ContentType 'json'`, `'text'`, or `'binary'` override the
73  detection logic.
74- JSON responses are parsed with the same rules as `jsondecode`, producing doubles, logicals,
75  strings, structs, and cell arrays that match MATLAB semantics.
76- Text responses preserve the server-provided character encoding (UTF-8 with automatic decoding of
77  exotic charsets exposed in the HTTP headers). Binary payloads return `1×N` double arrays whose
78  entries store byte values in the range 0–255.
79- HTTP errors (non-2xx status codes), timeouts, TLS failures, and parsing problems raise descriptive
80  MATLAB-style errors.
81
82## `webread` Function GPU Execution Behaviour
83`webread` runs entirely on the CPU. Any `gpuArray` inputs (for example, query parameter values)
84are gathered to host memory before building the HTTP request. Results are produced on the host, and
85fusion graphs terminate at this builtin via `ResidencyPolicy::GatherImmediately`.
86
87## Examples of using the `webread` function in MATLAB / RunMat
88
89### Reading JSON data from a REST API
90```matlab
91opts = weboptions("ContentType", "json", "Timeout", 15);
92weather = webread("https://api.example.com/weather", opts, "city", "Reykjavik");
93disp(weather.temperatureC);
94```
95Expected output:
96```matlab
97    2.3
98```
99
100### Downloading plain text as a character vector
101```matlab
102html = webread("https://example.com/index.txt", "Timeout", 5);
103extract = html(1:200);
104```
105`extract` contains the first 200 characters as a `1×200` char vector.
106
107### Retrieving binary payloads such as images
108```matlab
109bytes = webread("https://example.com/logo.png", "ContentType", "binary");
110filewrite("logo.png", uint8(bytes));
111```
112The PNG file is written locally after converting the returned bytes to `uint8`.
113
114### Supplying custom headers and credentials
115```matlab
116headers = struct("Accept", "application/json", "X-Client", "RunMat");
117data = webread("https://api.example.com/me", ...
118               "Username", "ada", "Password", "secret", ...
119               "HeaderFields", headers, ...
120               "ContentType", "json");
121```
122Custom headers are merged into the request, and HTTP basic authentication credentials are attached.
123
124### Passing query parameters as a struct
125```matlab
126query = struct("limit", 25, "sort", "name");
127response = webread("https://api.example.com/resources", query, "ContentType", "json");
128```
129`query` is promoted into the URL query string before the request is sent.
130
131## GPU residency in RunMat (Do I need `gpuArray`?)
132No. `webread` gathers any GPU-resident values before contacting the network and produces host
133results. Keeping inputs on the GPU offers no benefit because HTTP/TLS stacks operate on the CPU.
134
135## FAQ
136
1371. **Can `webread` decode JSON automatically?**  
138   Yes. When the server reports a JSON `Content-Type` header (for example `application/json`
139   or `application/vnd.api+json`) the builtin decodes it using the same rules as `jsondecode`.
140   Override the behaviour with `"ContentType","text"` or `"ContentType","binary"` when needed.
141
1422. **How do I control request timeouts?**  
143   Supply `"Timeout", seconds` as a name-value pair or in an options struct. The default timeout
144   is 60 seconds. Timeouts raise `webread: request to <url> timed out`.
145
1463. **What headers can I set?**  
147   Use `"HeaderFields", struct(...)` or a `cell` array of name/value pairs. Header names must be
148   valid HTTP tokens. The builtin automatically sets a RunMat-specific `User-Agent` string unless
149   you override it with `"UserAgent", "..."`
150
1514. **Does `webread` follow redirects?**  
152   Yes. The underlying HTTP client follows redirects up to the platform default limit while
153   preserving headers and authentication.
154
1555. **How do I provide credentials?**  
156   Use `"Username", "user", "Password", "pass"` for HTTP basic authentication. Supplying a password
157   without a username raises an error.
158
1596. **Can I send POST or PUT requests?**  
160   `webread` is designed for read-only requests and currently supports the default `GET` method.
161   Use `webwrite` (planned) for requests that include bodies or mutate server state.
162
1637. **How are binary responses represented?**  
164   Binary payloads return `1×N` double arrays whose elements are byte values. Convert them to the
165   desired integer type (for example `uint8`) before further processing.
166
1678. **What happens when the server returns an error status?**  
168   Non-success HTTP status codes raise `webread: request to … failed with HTTP status XYZ`. Inspect
169   the remote server logs or response headers for additional diagnostics.
170
1719. **Does `webread` support compressed responses?**  
172   Yes. The builtin enables gzip / deflate content decoding through the HTTP client automatically.
173
17410. **Can I pass query parameters as GPU arrays?**  
175    Yes. Inputs wrapped in `gpuArray` are gathered before assembling the query string.
176
177## See Also
178[webwrite](./webwrite), [weboptions](./weboptions), [jsondecode](../json/jsondecode), [websave](../filetext/filewrite)
179"#;
180
181pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
182    name: "webread",
183    op_kind: GpuOpKind::Custom("http-get"),
184    supported_precisions: &[],
185    broadcast: BroadcastSemantics::None,
186    provider_hooks: &[],
187    constant_strategy: ConstantStrategy::InlineLiteral,
188    residency: ResidencyPolicy::GatherImmediately,
189    nan_mode: ReductionNaN::Include,
190    two_pass_threshold: None,
191    workgroup_size: None,
192    accepts_nan_mode: false,
193    notes: "HTTP requests always execute on the CPU; gpuArray inputs are gathered eagerly.",
194};
195
196register_builtin_gpu_spec!(GPU_SPEC);
197
198pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
199    name: "webread",
200    shape: ShapeRequirements::Any,
201    constant_strategy: ConstantStrategy::InlineLiteral,
202    elementwise: None,
203    reduction: None,
204    emits_nan: false,
205    notes: "webread performs network I/O and terminates fusion graphs.",
206};
207
208register_builtin_fusion_spec!(FUSION_SPEC);
209
210#[cfg(feature = "doc_export")]
211register_builtin_doc_text!("webread", DOC_MD);
212
213#[runtime_builtin(
214    name = "webread",
215    category = "io/http",
216    summary = "Download web content (JSON, text, or binary) over HTTP/HTTPS.",
217    keywords = "webread,http get,rest client,json,api",
218    accel = "sink"
219)]
220fn webread_builtin(url: Value, rest: Vec<Value>) -> Result<Value, String> {
221    let gathered_url = gather_if_needed(&url).map_err(|e| format!("webread: {e}"))?;
222    let gathered_args = gather_arguments(rest)?;
223    let url_text = expect_string_scalar(
224        &gathered_url,
225        "webread: URL must be a character vector or string scalar",
226    )?;
227    if url_text.trim().is_empty() {
228        return Err("webread: URL must not be empty".to_string());
229    }
230    let (options, query_params) = parse_arguments(gathered_args)?;
231    execute_request(&url_text, options, &query_params)
232}
233
234fn gather_arguments(values: Vec<Value>) -> Result<Vec<Value>, String> {
235    let mut out = Vec::with_capacity(values.len());
236    for value in values {
237        out.push(gather_if_needed(&value).map_err(|e| format!("webread: {e}"))?);
238    }
239    Ok(out)
240}
241
242fn parse_arguments(args: Vec<Value>) -> Result<(WebReadOptions, Vec<(String, String)>), String> {
243    let mut queue: VecDeque<Value> = args.into();
244    let mut options = WebReadOptions::default();
245    let mut query_params = Vec::new();
246
247    if matches!(queue.front(), Some(Value::Struct(_))) {
248        if let Some(Value::Struct(struct_value)) = queue.pop_front() {
249            process_struct_fields(&struct_value, &mut options, &mut query_params)?;
250        }
251    } else if matches!(queue.front(), Some(Value::Cell(_))) {
252        if let Some(Value::Cell(cell)) = queue.pop_front() {
253            append_query_from_cell(&cell, &mut query_params)?
254        }
255    }
256
257    while let Some(name_value) = queue.pop_front() {
258        let name = expect_string_scalar(
259            &name_value,
260            "webread: parameter names must be character vectors or string scalars",
261        )?;
262        let value = queue
263            .pop_front()
264            .ok_or_else(|| "webread: missing value for name-value argument".to_string())?;
265        process_name_value_pair(&name, &value, &mut options, &mut query_params)?;
266    }
267
268    Ok((options, query_params))
269}
270
271fn process_struct_fields(
272    struct_value: &StructValue,
273    options: &mut WebReadOptions,
274    query_params: &mut Vec<(String, String)>,
275) -> Result<(), String> {
276    for (key, value) in &struct_value.fields {
277        process_name_value_pair(key, value, options, query_params)?;
278    }
279    Ok(())
280}
281
282fn process_name_value_pair(
283    name: &str,
284    value: &Value,
285    options: &mut WebReadOptions,
286    query_params: &mut Vec<(String, String)>,
287) -> Result<(), String> {
288    let lower = name.to_ascii_lowercase();
289    match lower.as_str() {
290        "contenttype" => {
291            options.content_type = parse_content_type(value)?;
292            Ok(())
293        }
294        "timeout" => {
295            options.timeout = parse_timeout(value)?;
296            Ok(())
297        }
298        "headerfields" => {
299            let headers = parse_header_fields(value)?;
300            options.headers.extend(headers);
301            Ok(())
302        }
303        "useragent" => {
304            options.user_agent = Some(expect_string_scalar(
305                value,
306                "webread: UserAgent must be a character vector or string scalar",
307            )?);
308            Ok(())
309        }
310        "username" => {
311            options.username = Some(expect_string_scalar(
312                value,
313                "webread: Username must be a character vector or string scalar",
314            )?);
315            Ok(())
316        }
317        "password" => {
318            options.password = Some(expect_string_scalar(
319                value,
320                "webread: Password must be a character vector or string scalar",
321            )?);
322            Ok(())
323        }
324        "requestmethod" => {
325            options.method = parse_request_method(value)?;
326            Ok(())
327        }
328        "mediatype" => {
329            // weboptions exposes MediaType for webwrite; accept and ignore for webread.
330            expect_string_scalar(
331                value,
332                "webread: MediaType must be a character vector or string scalar",
333            )?;
334            Ok(())
335        }
336        "queryparameters" => append_query_from_value(value, query_params),
337        _ => {
338            let param_value = value_to_query_string(value, name)?;
339            query_params.push((name.to_string(), param_value));
340            Ok(())
341        }
342    }
343}
344
345fn append_query_from_value(
346    value: &Value,
347    query_params: &mut Vec<(String, String)>,
348) -> Result<(), String> {
349    match value {
350        Value::Struct(struct_value) => {
351            for (key, val) in &struct_value.fields {
352                let text = value_to_query_string(val, key)?;
353                query_params.push((key.clone(), text));
354            }
355            Ok(())
356        }
357        Value::Cell(cell) => append_query_from_cell(cell, query_params),
358        _ => Err("webread: QueryParameters must be a struct or cell array".to_string()),
359    }
360}
361
362fn append_query_from_cell(
363    cell: &CellArray,
364    query_params: &mut Vec<(String, String)>,
365) -> Result<(), String> {
366    if cell.cols != 2 {
367        return Err("webread: cell array of query parameters must have two columns".to_string());
368    }
369    for row in 0..cell.rows {
370        let name_value = cell.get(row, 0).map_err(|e| format!("webread: {e}"))?;
371        let value_value = cell.get(row, 1).map_err(|e| format!("webread: {e}"))?;
372        let name = expect_string_scalar(
373            &name_value,
374            "webread: query parameter names must be text scalars",
375        )?;
376        let text = value_to_query_string(&value_value, &name)?;
377        query_params.push((name, text));
378    }
379    Ok(())
380}
381
382fn execute_request(
383    url_text: &str,
384    options: WebReadOptions,
385    query_params: &[(String, String)],
386) -> Result<Value, String> {
387    let username_present = options
388        .username
389        .as_ref()
390        .map(|s| !s.is_empty())
391        .unwrap_or(false);
392    let password_present = options
393        .password
394        .as_ref()
395        .map(|s| !s.is_empty())
396        .unwrap_or(false);
397    if password_present && !username_present {
398        return Err("webread: Password requires a Username option".to_string());
399    }
400
401    let mut url =
402        Url::parse(url_text).map_err(|err| format!("webread: invalid URL '{url_text}': {err}"))?;
403    if !query_params.is_empty() {
404        {
405            let mut pairs = url.query_pairs_mut();
406            for (name, value) in query_params {
407                pairs.append_pair(name, value);
408            }
409        }
410    }
411    let url_display = url.to_string();
412
413    let user_agent = options
414        .user_agent
415        .as_deref()
416        .filter(|ua| !ua.trim().is_empty())
417        .unwrap_or(DEFAULT_USER_AGENT);
418
419    let client = Client::builder()
420        .timeout(options.timeout)
421        .user_agent(user_agent)
422        .build()
423        .map_err(|err| format!("webread: failed to build HTTP client ({err})"))?;
424
425    let mut builder = match options.method {
426        HttpMethod::Get => client.get(url.clone()),
427    };
428    builder = apply_headers(builder, &options.headers)?;
429    if let Some(username) = &options.username {
430        if !username.is_empty() {
431            let password = options.password.as_ref().filter(|p| !p.is_empty()).cloned();
432            builder = builder.basic_auth(username.clone(), password);
433        }
434    }
435
436    let response = builder
437        .send()
438        .map_err(|err| request_error("request", &url_display, err))?;
439    let status = response.status();
440    if !status.is_success() {
441        return Err(format!(
442            "webread: request to {} failed with HTTP status {}",
443            url_display, status
444        ));
445    }
446
447    let header_content_type = response
448        .headers()
449        .get(CONTENT_TYPE)
450        .and_then(|value| value.to_str().ok())
451        .map(|s| s.to_string());
452    let resolved = options.resolve_content_type(header_content_type.as_deref());
453
454    match resolved {
455        ResolvedContentType::Json => {
456            let body = response
457                .text()
458                .map_err(|err| request_error("read response body", &url_display, err))?;
459            match decode_json_text(&body) {
460                Ok(value) => Ok(value),
461                Err(err) => Err(map_json_error(err)),
462            }
463        }
464        ResolvedContentType::Text => {
465            let body = response
466                .text()
467                .map_err(|err| request_error("read response body", &url_display, err))?;
468            let array = CharArray::new_row(&body);
469            Ok(Value::CharArray(array))
470        }
471        ResolvedContentType::Binary => {
472            let bytes = response
473                .bytes()
474                .map_err(|err| request_error("read response body", &url_display, err))?;
475            let data: Vec<f64> = bytes.iter().map(|b| f64::from(*b)).collect();
476            let cols = bytes.len();
477            let tensor =
478                Tensor::new(data, vec![1, cols]).map_err(|err| format!("webread: {err}"))?;
479            Ok(Value::Tensor(tensor))
480        }
481    }
482}
483
484fn request_error(action: &str, url: &str, err: reqwest::Error) -> String {
485    if err.is_timeout() {
486        format!("webread: {action} to {url} timed out")
487    } else if err.is_connect() {
488        format!("webread: unable to connect to {url}: {err}")
489    } else if err.is_status() {
490        format!("webread: HTTP error for {url}: {err}")
491    } else {
492        format!("webread: failed to {action} {url}: {err}")
493    }
494}
495
496fn map_json_error(err: String) -> String {
497    if let Some(rest) = err.strip_prefix("jsondecode: ") {
498        format!("webread: failed to parse JSON response ({rest})")
499    } else {
500        format!("webread: failed to parse JSON response ({err})")
501    }
502}
503
504fn apply_headers(
505    mut builder: RequestBuilder,
506    headers: &[(String, String)],
507) -> Result<RequestBuilder, String> {
508    for (name, value) in headers {
509        if name.trim().is_empty() {
510            return Err("webread: header names must not be empty".to_string());
511        }
512        let header_name = HeaderName::from_bytes(name.as_bytes())
513            .map_err(|_| format!("webread: invalid header name '{name}'"))?;
514        let header_value = HeaderValue::from_str(value)
515            .map_err(|_| format!("webread: invalid header value for '{name}'"))?;
516        builder = builder.header(header_name, header_value);
517    }
518    Ok(builder)
519}
520
521fn parse_header_fields(value: &Value) -> Result<Vec<(String, String)>, String> {
522    match value {
523        Value::Struct(struct_value) => {
524            let mut headers = Vec::with_capacity(struct_value.fields.len());
525            for (key, val) in &struct_value.fields {
526                let header_value = expect_string_scalar(
527                    val,
528                    "webread: header values must be character vectors or string scalars",
529                )?;
530                headers.push((key.clone(), header_value));
531            }
532            Ok(headers)
533        }
534        Value::Cell(cell) => {
535            if cell.cols != 2 {
536                return Err(
537                    "webread: HeaderFields cell array must have exactly two columns".to_string(),
538                );
539            }
540            let mut headers = Vec::with_capacity(cell.rows);
541            for row in 0..cell.rows {
542                let name = cell.get(row, 0).map_err(|e| format!("webread: {e}"))?;
543                let value = cell.get(row, 1).map_err(|e| format!("webread: {e}"))?;
544                let header_name = expect_string_scalar(
545                    &name,
546                    "webread: header names must be character vectors or string scalars",
547                )?;
548                if header_name.trim().is_empty() {
549                    return Err("webread: header names must not be empty".to_string());
550                }
551                let header_value = expect_string_scalar(
552                    &value,
553                    "webread: header values must be character vectors or string scalars",
554                )?;
555                headers.push((header_name, header_value));
556            }
557            Ok(headers)
558        }
559        _ => Err(
560            "webread: HeaderFields must be provided as a struct or cell array of name/value pairs"
561                .to_string(),
562        ),
563    }
564}
565
566fn parse_content_type(value: &Value) -> Result<ContentTypeHint, String> {
567    let text = expect_string_scalar(
568        value,
569        "webread: ContentType must be a character vector or string scalar",
570    )?;
571    match text.trim().to_ascii_lowercase().as_str() {
572        "auto" => Ok(ContentTypeHint::Auto),
573        "json" => Ok(ContentTypeHint::Json),
574        "text" | "char" | "string" => Ok(ContentTypeHint::Text),
575        "binary" | "octet-stream" | "raw" => Ok(ContentTypeHint::Binary),
576        other => Err(format!(
577            "webread: unsupported ContentType '{}'; use 'auto', 'json', 'text', or 'binary'",
578            other
579        )),
580    }
581}
582
583fn parse_timeout(value: &Value) -> Result<Duration, String> {
584    let seconds = numeric_scalar(value, "webread: Timeout must be a finite, positive scalar")?;
585    if !seconds.is_finite() || seconds <= 0.0 {
586        return Err("webread: Timeout must be a finite, positive scalar".to_string());
587    }
588    Ok(Duration::from_secs_f64(seconds))
589}
590
591fn parse_request_method(value: &Value) -> Result<HttpMethod, String> {
592    let text = expect_string_scalar(
593        value,
594        "webread: RequestMethod must be a character vector or string scalar",
595    )?;
596    let lower = text.trim().to_ascii_lowercase();
597    match lower.as_str() {
598        "get" | "auto" => Ok(HttpMethod::Get),
599        other => Err(format!(
600            "webread: RequestMethod '{}' is not supported; expected 'auto' or 'get'",
601            other
602        )),
603    }
604}
605
606fn numeric_scalar(value: &Value, context: &str) -> Result<f64, String> {
607    match value {
608        Value::Num(n) => Ok(*n),
609        Value::Int(i) => Ok(i.to_f64()),
610        Value::Tensor(tensor) => {
611            if tensor.data.len() == 1 {
612                Ok(tensor.data[0])
613            } else {
614                Err(context.to_string())
615            }
616        }
617        _ => Err(context.to_string()),
618    }
619}
620
621fn expect_string_scalar(value: &Value, context: &str) -> Result<String, String> {
622    match value {
623        Value::String(s) => Ok(s.clone()),
624        Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
625        Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
626        _ => Err(context.to_string()),
627    }
628}
629
630fn value_to_query_string(value: &Value, name: &str) -> Result<String, String> {
631    match value {
632        Value::String(s) => Ok(s.clone()),
633        Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
634        Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
635        Value::Num(n) => Ok(format!("{}", n)),
636        Value::Int(i) => Ok(i.to_i64().to_string()),
637        Value::Bool(b) => Ok(if *b { "true".into() } else { "false".into() }),
638        Value::Tensor(tensor) => {
639            if tensor.data.len() == 1 {
640                Ok(format!("{}", tensor.data[0]))
641            } else {
642                Err(format!(
643                    "webread: query parameter '{}' must be scalar",
644                    name
645                ))
646            }
647        }
648        Value::LogicalArray(array) => {
649            if array.len() == 1 {
650                Ok(if array.data[0] != 0 {
651                    "true".into()
652                } else {
653                    "false".into()
654                })
655            } else {
656                Err(format!(
657                    "webread: query parameter '{}' must be scalar",
658                    name
659                ))
660            }
661        }
662        _ => Err(format!(
663            "webread: unsupported value type for query parameter '{}'",
664            name
665        )),
666    }
667}
668
669#[derive(Clone, Copy, Debug)]
670enum ContentTypeHint {
671    Auto,
672    Text,
673    Json,
674    Binary,
675}
676
677#[derive(Clone, Copy, Debug)]
678enum ResolvedContentType {
679    Text,
680    Json,
681    Binary,
682}
683
684#[derive(Clone, Copy, Debug)]
685enum HttpMethod {
686    Get,
687}
688
689#[derive(Clone, Debug)]
690struct WebReadOptions {
691    content_type: ContentTypeHint,
692    timeout: Duration,
693    headers: Vec<(String, String)>,
694    user_agent: Option<String>,
695    username: Option<String>,
696    password: Option<String>,
697    method: HttpMethod,
698}
699
700impl Default for WebReadOptions {
701    fn default() -> Self {
702        Self {
703            content_type: ContentTypeHint::Auto,
704            timeout: Duration::from_secs_f64(DEFAULT_TIMEOUT_SECONDS),
705            headers: Vec::new(),
706            user_agent: None,
707            username: None,
708            password: None,
709            method: HttpMethod::Get,
710        }
711    }
712}
713
714impl WebReadOptions {
715    fn resolve_content_type(&self, header: Option<&str>) -> ResolvedContentType {
716        match self.content_type {
717            ContentTypeHint::Json => ResolvedContentType::Json,
718            ContentTypeHint::Text => ResolvedContentType::Text,
719            ContentTypeHint::Binary => ResolvedContentType::Binary,
720            ContentTypeHint::Auto => infer_content_type(header),
721        }
722    }
723}
724
725fn infer_content_type(header: Option<&str>) -> ResolvedContentType {
726    if let Some(raw) = header {
727        let mime = raw
728            .split(';')
729            .next()
730            .map(|part| part.trim().to_ascii_lowercase())
731            .unwrap_or_default();
732        if mime == "application/json" || mime == "text/json" || mime.ends_with("+json") {
733            ResolvedContentType::Json
734        } else if mime.starts_with("text/")
735            || mime == "application/xml"
736            || mime.ends_with("+xml")
737            || mime == "application/xhtml+xml"
738            || mime == "application/javascript"
739            || mime == "application/x-www-form-urlencoded"
740        {
741            ResolvedContentType::Text
742        } else {
743            ResolvedContentType::Binary
744        }
745    } else {
746        ResolvedContentType::Text
747    }
748}
749
750#[cfg(test)]
751mod tests {
752    use super::*;
753    use std::io::{Read, Write};
754    use std::net::{TcpListener, TcpStream};
755    use std::sync::mpsc;
756    use std::thread;
757
758    #[cfg(feature = "doc_export")]
759    use crate::builtins::common::test_support;
760
761    fn spawn_server<F>(handler: F) -> String
762    where
763        F: FnOnce(TcpStream) + Send + 'static,
764    {
765        let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
766        let addr = listener.local_addr().unwrap();
767        thread::spawn(move || {
768            if let Ok((stream, _)) = listener.accept() {
769                handler(stream);
770            }
771        });
772        format!("http://{}", addr)
773    }
774
775    fn respond_with(mut stream: TcpStream, content_type: &str, body: &[u8]) {
776        let response = format!(
777            "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nContent-Type: {}\r\nConnection: close\r\n\r\n",
778            body.len(),
779            content_type
780        );
781        let _ = stream.write_all(response.as_bytes());
782        let _ = stream.write_all(body);
783    }
784
785    fn read_request_headers(stream: &mut TcpStream) -> String {
786        let mut buffer = Vec::new();
787        let mut chunk = [0u8; 256];
788        while let Ok(read) = stream.read(&mut chunk) {
789            if read == 0 {
790                break;
791            }
792            buffer.extend_from_slice(&chunk[..read]);
793            if buffer.windows(4).any(|w| w == b"\r\n\r\n") {
794                break;
795            }
796            if buffer.len() > 16 * 1024 {
797                break;
798            }
799        }
800        String::from_utf8_lossy(&buffer).to_string()
801    }
802
803    #[test]
804    fn webread_fetches_json_response() {
805        let url = spawn_server(|mut stream| {
806            let mut buffer = [0u8; 1024];
807            let _ = stream.read(&mut buffer);
808            respond_with(
809                stream,
810                "application/json",
811                br#"{"message":"hello","value":42}"#,
812            );
813        });
814
815        let result = webread_builtin(Value::from(url), vec![]).expect("webread JSON response");
816
817        match result {
818            Value::Struct(struct_value) => {
819                let message = struct_value.fields.get("message").expect("message field");
820                let value = struct_value.fields.get("value").expect("value field");
821                match message {
822                    Value::CharArray(ca) => {
823                        let text: String = ca.data.iter().collect();
824                        assert_eq!(text, "hello");
825                    }
826                    other => panic!("expected char array, got {other:?}"),
827                }
828                match value {
829                    Value::Num(n) => assert_eq!(*n, 42.0),
830                    other => panic!("expected numeric value, got {other:?}"),
831                }
832            }
833            other => panic!("expected struct, got {other:?}"),
834        }
835    }
836
837    #[test]
838    fn webread_fetches_text_response() {
839        let url = spawn_server(|mut stream| {
840            let mut buffer = [0u8; 512];
841            let _ = stream.read(&mut buffer);
842            respond_with(stream, "text/plain; charset=utf-8", b"RunMat webread test");
843        });
844
845        let result = webread_builtin(Value::from(url), vec![]).expect("webread text response");
846
847        match result {
848            Value::CharArray(ca) => {
849                let text: String = ca.data.iter().collect();
850                assert_eq!(text, "RunMat webread test");
851            }
852            other => panic!("expected char array, got {other:?}"),
853        }
854    }
855
856    #[test]
857    fn webread_fetches_binary_payload() {
858        let payload = [1u8, 2, 3, 254, 255];
859        let url = spawn_server(move |mut stream| {
860            let mut buffer = [0u8; 512];
861            let _ = stream.read(&mut buffer);
862            respond_with(stream, "application/octet-stream", &payload);
863        });
864
865        let args = vec![Value::from("ContentType"), Value::from("binary")];
866        let result = webread_builtin(Value::from(url), args).expect("webread binary response");
867
868        match result {
869            Value::Tensor(tensor) => {
870                assert_eq!(tensor.shape, vec![1, 5]);
871                let bytes: Vec<u8> = tensor.data.iter().map(|v| *v as u8).collect();
872                assert_eq!(bytes, payload);
873            }
874            other => panic!("expected tensor, got {other:?}"),
875        }
876    }
877
878    #[test]
879    fn webread_appends_query_parameters() {
880        let (tx, rx) = mpsc::channel();
881        let url = spawn_server(move |mut stream| {
882            let request = read_request_headers(&mut stream);
883            let _ = tx.send(request);
884            respond_with(stream, "application/json", br#"{"ok":true}"#);
885        });
886
887        let args = vec![
888            Value::from("count"),
889            Value::Num(42.0),
890            Value::from("ContentType"),
891            Value::from("json"),
892        ];
893        let result = webread_builtin(Value::from(url.clone()), args).expect("webread query");
894        match result {
895            Value::Struct(struct_value) => {
896                assert!(struct_value.fields.contains_key("ok"));
897            }
898            other => panic!("expected struct result, got {other:?}"),
899        }
900        let request = rx.recv().expect("request log");
901        assert!(
902            request.starts_with("GET /"),
903            "unexpected request line: {request}"
904        );
905        assert!(
906            request.contains("count=42"),
907            "query parameters missing: {request}"
908        );
909    }
910
911    #[test]
912    fn webread_struct_argument_supports_options_and_query() {
913        let (tx, rx) = mpsc::channel();
914        let url = spawn_server(move |mut stream| {
915            let request = read_request_headers(&mut stream);
916            let _ = tx.send(request);
917            respond_with(stream, "application/json", br#"{"value":123}"#);
918        });
919
920        let mut fields = StructValue::new();
921        fields
922            .fields
923            .insert("ContentType".to_string(), Value::from("json"));
924        fields.fields.insert("limit".to_string(), Value::Num(5.0));
925
926        let result = webread_builtin(Value::from(url.clone()), vec![Value::Struct(fields)])
927            .expect("webread struct arg");
928
929        let request = rx.recv().expect("request log");
930        assert!(
931            request.contains("GET /?limit=5"),
932            "expected limit query parameter: {request}"
933        );
934
935        match result {
936            Value::Struct(struct_value) => match struct_value.fields.get("value") {
937                Some(Value::Num(n)) => assert_eq!(*n, 123.0),
938                other => panic!("unexpected JSON decode result: {other:?}"),
939            },
940            other => panic!("expected struct, got {other:?}"),
941        }
942    }
943
944    #[test]
945    fn webread_headerfields_struct_applies_custom_headers() {
946        let (tx, rx) = mpsc::channel();
947        let url = spawn_server(move |mut stream| {
948            let request = read_request_headers(&mut stream);
949            let _ = tx.send(request);
950            respond_with(stream, "application/json", br#"{"ok":true}"#);
951        });
952
953        let mut headers = StructValue::new();
954        headers
955            .fields
956            .insert("X-Test".to_string(), Value::from("RunMat"));
957
958        let args = vec![
959            Value::from("HeaderFields"),
960            Value::Struct(headers),
961            Value::from("ContentType"),
962            Value::from("json"),
963        ];
964
965        let result = webread_builtin(Value::from(url), args).expect("webread header fields");
966        assert!(matches!(result, Value::Struct(_)));
967
968        let request = rx.recv().expect("request log");
969        assert!(
970            request.to_ascii_lowercase().contains("x-test: runmat"),
971            "custom header missing: {request}"
972        );
973    }
974
975    #[test]
976    fn webread_queryparameters_option_struct() {
977        let (tx, rx) = mpsc::channel();
978        let url = spawn_server(move |mut stream| {
979            let request = read_request_headers(&mut stream);
980            let _ = tx.send(request);
981            respond_with(stream, "application/json", br#"{"ok":true}"#);
982        });
983
984        let mut params = StructValue::new();
985        params.fields.insert("page".to_string(), Value::Num(2.0));
986
987        let args = vec![
988            Value::from("QueryParameters"),
989            Value::Struct(params),
990            Value::from("ContentType"),
991            Value::from("json"),
992        ];
993
994        let result =
995            webread_builtin(Value::from(url.clone()), args).expect("webread query parameters");
996        assert!(matches!(result, Value::Struct(_)));
997
998        let request = rx.recv().expect("request log");
999        assert!(
1000            request.contains("page=2"),
1001            "query parameter missing: {request}"
1002        );
1003    }
1004
1005    #[test]
1006    fn webread_errors_on_missing_name_value_pair() {
1007        let err = webread_builtin(
1008            Value::from("https://example.com"),
1009            vec![Value::from("Timeout")],
1010        )
1011        .expect_err("expected missing value error");
1012        assert!(
1013            err.contains("missing value"),
1014            "unexpected error message: {err}"
1015        );
1016    }
1017
1018    #[test]
1019    fn webread_rejects_non_positive_timeout() {
1020        let args = vec![Value::from("Timeout"), Value::Num(0.0)];
1021        let err =
1022            webread_builtin(Value::from("https://example.com"), args).expect_err("timeout error");
1023        assert!(
1024            err.contains("Timeout must be a finite, positive scalar"),
1025            "unexpected error message: {err}"
1026        );
1027    }
1028
1029    #[test]
1030    fn webread_rejects_password_without_username() {
1031        let args = vec![Value::from("Password"), Value::from("secret")];
1032        let err =
1033            webread_builtin(Value::from("https://example.com"), args).expect_err("auth error");
1034        assert!(
1035            err.contains("Password requires a Username"),
1036            "unexpected error message: {err}"
1037        );
1038    }
1039
1040    #[test]
1041    fn webread_rejects_unsupported_content_type() {
1042        let args = vec![Value::from("ContentType"), Value::from("table")];
1043        let err =
1044            webread_builtin(Value::from("https://example.com"), args).expect_err("format error");
1045        assert!(
1046            err.contains("unsupported ContentType"),
1047            "unexpected error message: {err}"
1048        );
1049    }
1050
1051    #[test]
1052    fn webread_rejects_invalid_headerfields_shape() {
1053        let cell = crate::make_cell(
1054            vec![Value::from("A"), Value::from("B"), Value::from("C")],
1055            1,
1056            3,
1057        )
1058        .expect("make cell");
1059
1060        let args = vec![Value::from("HeaderFields"), cell];
1061        let err =
1062            webread_builtin(Value::from("https://example.com"), args).expect_err("header error");
1063        assert!(
1064            err.contains("HeaderFields cell array must have exactly two columns"),
1065            "unexpected error message: {err}"
1066        );
1067    }
1068
1069    #[test]
1070    #[cfg(feature = "doc_export")]
1071    fn doc_examples_present() {
1072        let blocks = test_support::doc_examples(DOC_MD);
1073        assert!(!blocks.is_empty());
1074    }
1075}