1use std::collections::VecDeque;
4use std::time::Duration;
5
6use reqwest::blocking::{Client, RequestBuilder};
7use reqwest::header::{HeaderName, HeaderValue, CONTENT_TYPE};
8use reqwest::Url;
9use runmat_builtins::{CellArray, CharArray, StructValue, Tensor, Value};
10use runmat_macros::runtime_builtin;
11
12use crate::builtins::common::spec::{
13 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
14 ReductionNaN, ResidencyPolicy, ShapeRequirements,
15};
16use crate::builtins::io::json::jsondecode::decode_json_text;
17use crate::gather_if_needed;
18#[cfg(feature = "doc_export")]
19use crate::register_builtin_doc_text;
20use crate::{register_builtin_fusion_spec, register_builtin_gpu_spec};
21
22const DEFAULT_TIMEOUT_SECONDS: f64 = 60.0;
23const DEFAULT_USER_AGENT: &str = "RunMat webread/0.0";
24
25#[cfg(feature = "doc_export")]
26#[allow(clippy::too_many_lines)]
27pub const DOC_MD: &str = r#"---
28title: "webread"
29category: "io/http"
30keywords: ["webread", "http get", "rest client", "json", "https", "api"]
31summary: "Download web content (JSON, text, or binary) over HTTP/HTTPS."
32references:
33 - https://www.mathworks.com/help/matlab/ref/webread.html
34gpu_support:
35 elementwise: false
36 reduction: false
37 precisions: []
38 broadcasting: "none"
39 notes: "webread always gathers gpuArray inputs and executes on the CPU."
40fusion:
41 elementwise: false
42 reduction: false
43 max_inputs: 1
44 constants: "inline"
45requires_feature: null
46tested:
47 unit: "builtins::io::http::webread::tests"
48 integration:
49 - "builtins::io::http::webread::tests::webread_fetches_json_response"
50 - "builtins::io::http::webread::tests::webread_fetches_text_response"
51 - "builtins::io::http::webread::tests::webread_fetches_binary_payload"
52 - "builtins::io::http::webread::tests::webread_appends_query_parameters"
53 - "builtins::io::http::webread::tests::webread_struct_argument_supports_options_and_query"
54 - "builtins::io::http::webread::tests::webread_headerfields_struct_applies_custom_headers"
55 - "builtins::io::http::webread::tests::webread_queryparameters_option_struct"
56---
57
58# What does the `webread` function do in MATLAB / RunMat?
59`webread` issues an HTTP or HTTPS request and returns the response body as a MATLAB-compatible
60value. Textual payloads become character vectors, JSON responses are decoded into structs, cells,
61and numeric arrays, while binary payloads return numeric vectors of bytes.
62
63## How does the `webread` function behave in MATLAB / RunMat?
64- Accepts URLs supplied as character vectors or string scalars; the URL must be absolute.
65- Optional `weboptions`-style fields (either through a struct argument or name-value pairs) control
66 content decoding (`ContentType`), request timeout (`Timeout`), headers (`HeaderFields`), and
67 authentication (`Username`/`Password`). The builtin currently supports the default `GET`
68 request method; use `webwrite` for POST/PUT uploads.
69- Additional name-value pairs that do not match an option are appended to the query string using
70 percent-encoding. A leading struct or cell array argument can also supply query parameters.
71- `ContentType 'auto'` (default) inspects the `Content-Type` response header to choose between JSON,
72 text, or binary decoding. Explicit `ContentType 'json'`, `'text'`, or `'binary'` override the
73 detection logic.
74- JSON responses are parsed with the same rules as `jsondecode`, producing doubles, logicals,
75 strings, structs, and cell arrays that match MATLAB semantics.
76- Text responses preserve the server-provided character encoding (UTF-8 with automatic decoding of
77 exotic charsets exposed in the HTTP headers). Binary payloads return `1×N` double arrays whose
78 entries store byte values in the range 0–255.
79- HTTP errors (non-2xx status codes), timeouts, TLS failures, and parsing problems raise descriptive
80 MATLAB-style errors.
81
82## `webread` Function GPU Execution Behaviour
83`webread` runs entirely on the CPU. Any `gpuArray` inputs (for example, query parameter values)
84are gathered to host memory before building the HTTP request. Results are produced on the host, and
85fusion graphs terminate at this builtin via `ResidencyPolicy::GatherImmediately`.
86
87## Examples of using the `webread` function in MATLAB / RunMat
88
89### Reading JSON data from a REST API
90```matlab
91opts = weboptions("ContentType", "json", "Timeout", 15);
92weather = webread("https://api.example.com/weather", opts, "city", "Reykjavik");
93disp(weather.temperatureC);
94```
95Expected output:
96```matlab
97 2.3
98```
99
100### Downloading plain text as a character vector
101```matlab
102html = webread("https://example.com/index.txt", "Timeout", 5);
103extract = html(1:200);
104```
105`extract` contains the first 200 characters as a `1×200` char vector.
106
107### Retrieving binary payloads such as images
108```matlab
109bytes = webread("https://example.com/logo.png", "ContentType", "binary");
110filewrite("logo.png", uint8(bytes));
111```
112The PNG file is written locally after converting the returned bytes to `uint8`.
113
114### Supplying custom headers and credentials
115```matlab
116headers = struct("Accept", "application/json", "X-Client", "RunMat");
117data = webread("https://api.example.com/me", ...
118 "Username", "ada", "Password", "secret", ...
119 "HeaderFields", headers, ...
120 "ContentType", "json");
121```
122Custom headers are merged into the request, and HTTP basic authentication credentials are attached.
123
124### Passing query parameters as a struct
125```matlab
126query = struct("limit", 25, "sort", "name");
127response = webread("https://api.example.com/resources", query, "ContentType", "json");
128```
129`query` is promoted into the URL query string before the request is sent.
130
131## GPU residency in RunMat (Do I need `gpuArray`?)
132No. `webread` gathers any GPU-resident values before contacting the network and produces host
133results. Keeping inputs on the GPU offers no benefit because HTTP/TLS stacks operate on the CPU.
134
135## FAQ
136
1371. **Can `webread` decode JSON automatically?**
138 Yes. When the server reports a JSON `Content-Type` header (for example `application/json`
139 or `application/vnd.api+json`) the builtin decodes it using the same rules as `jsondecode`.
140 Override the behaviour with `"ContentType","text"` or `"ContentType","binary"` when needed.
141
1422. **How do I control request timeouts?**
143 Supply `"Timeout", seconds` as a name-value pair or in an options struct. The default timeout
144 is 60 seconds. Timeouts raise `webread: request to <url> timed out`.
145
1463. **What headers can I set?**
147 Use `"HeaderFields", struct(...)` or a `cell` array of name/value pairs. Header names must be
148 valid HTTP tokens. The builtin automatically sets a RunMat-specific `User-Agent` string unless
149 you override it with `"UserAgent", "..."`
150
1514. **Does `webread` follow redirects?**
152 Yes. The underlying HTTP client follows redirects up to the platform default limit while
153 preserving headers and authentication.
154
1555. **How do I provide credentials?**
156 Use `"Username", "user", "Password", "pass"` for HTTP basic authentication. Supplying a password
157 without a username raises an error.
158
1596. **Can I send POST or PUT requests?**
160 `webread` is designed for read-only requests and currently supports the default `GET` method.
161 Use `webwrite` (planned) for requests that include bodies or mutate server state.
162
1637. **How are binary responses represented?**
164 Binary payloads return `1×N` double arrays whose elements are byte values. Convert them to the
165 desired integer type (for example `uint8`) before further processing.
166
1678. **What happens when the server returns an error status?**
168 Non-success HTTP status codes raise `webread: request to … failed with HTTP status XYZ`. Inspect
169 the remote server logs or response headers for additional diagnostics.
170
1719. **Does `webread` support compressed responses?**
172 Yes. The builtin enables gzip / deflate content decoding through the HTTP client automatically.
173
17410. **Can I pass query parameters as GPU arrays?**
175 Yes. Inputs wrapped in `gpuArray` are gathered before assembling the query string.
176
177## See Also
178[webwrite](./webwrite), [weboptions](./weboptions), [jsondecode](../json/jsondecode), [websave](../filetext/filewrite)
179"#;
180
181pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
182 name: "webread",
183 op_kind: GpuOpKind::Custom("http-get"),
184 supported_precisions: &[],
185 broadcast: BroadcastSemantics::None,
186 provider_hooks: &[],
187 constant_strategy: ConstantStrategy::InlineLiteral,
188 residency: ResidencyPolicy::GatherImmediately,
189 nan_mode: ReductionNaN::Include,
190 two_pass_threshold: None,
191 workgroup_size: None,
192 accepts_nan_mode: false,
193 notes: "HTTP requests always execute on the CPU; gpuArray inputs are gathered eagerly.",
194};
195
196register_builtin_gpu_spec!(GPU_SPEC);
197
198pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
199 name: "webread",
200 shape: ShapeRequirements::Any,
201 constant_strategy: ConstantStrategy::InlineLiteral,
202 elementwise: None,
203 reduction: None,
204 emits_nan: false,
205 notes: "webread performs network I/O and terminates fusion graphs.",
206};
207
208register_builtin_fusion_spec!(FUSION_SPEC);
209
210#[cfg(feature = "doc_export")]
211register_builtin_doc_text!("webread", DOC_MD);
212
213#[runtime_builtin(
214 name = "webread",
215 category = "io/http",
216 summary = "Download web content (JSON, text, or binary) over HTTP/HTTPS.",
217 keywords = "webread,http get,rest client,json,api",
218 accel = "sink"
219)]
220fn webread_builtin(url: Value, rest: Vec<Value>) -> Result<Value, String> {
221 let gathered_url = gather_if_needed(&url).map_err(|e| format!("webread: {e}"))?;
222 let gathered_args = gather_arguments(rest)?;
223 let url_text = expect_string_scalar(
224 &gathered_url,
225 "webread: URL must be a character vector or string scalar",
226 )?;
227 if url_text.trim().is_empty() {
228 return Err("webread: URL must not be empty".to_string());
229 }
230 let (options, query_params) = parse_arguments(gathered_args)?;
231 execute_request(&url_text, options, &query_params)
232}
233
234fn gather_arguments(values: Vec<Value>) -> Result<Vec<Value>, String> {
235 let mut out = Vec::with_capacity(values.len());
236 for value in values {
237 out.push(gather_if_needed(&value).map_err(|e| format!("webread: {e}"))?);
238 }
239 Ok(out)
240}
241
242fn parse_arguments(args: Vec<Value>) -> Result<(WebReadOptions, Vec<(String, String)>), String> {
243 let mut queue: VecDeque<Value> = args.into();
244 let mut options = WebReadOptions::default();
245 let mut query_params = Vec::new();
246
247 if matches!(queue.front(), Some(Value::Struct(_))) {
248 if let Some(Value::Struct(struct_value)) = queue.pop_front() {
249 process_struct_fields(&struct_value, &mut options, &mut query_params)?;
250 }
251 } else if matches!(queue.front(), Some(Value::Cell(_))) {
252 if let Some(Value::Cell(cell)) = queue.pop_front() {
253 append_query_from_cell(&cell, &mut query_params)?
254 }
255 }
256
257 while let Some(name_value) = queue.pop_front() {
258 let name = expect_string_scalar(
259 &name_value,
260 "webread: parameter names must be character vectors or string scalars",
261 )?;
262 let value = queue
263 .pop_front()
264 .ok_or_else(|| "webread: missing value for name-value argument".to_string())?;
265 process_name_value_pair(&name, &value, &mut options, &mut query_params)?;
266 }
267
268 Ok((options, query_params))
269}
270
271fn process_struct_fields(
272 struct_value: &StructValue,
273 options: &mut WebReadOptions,
274 query_params: &mut Vec<(String, String)>,
275) -> Result<(), String> {
276 for (key, value) in &struct_value.fields {
277 process_name_value_pair(key, value, options, query_params)?;
278 }
279 Ok(())
280}
281
282fn process_name_value_pair(
283 name: &str,
284 value: &Value,
285 options: &mut WebReadOptions,
286 query_params: &mut Vec<(String, String)>,
287) -> Result<(), String> {
288 let lower = name.to_ascii_lowercase();
289 match lower.as_str() {
290 "contenttype" => {
291 options.content_type = parse_content_type(value)?;
292 Ok(())
293 }
294 "timeout" => {
295 options.timeout = parse_timeout(value)?;
296 Ok(())
297 }
298 "headerfields" => {
299 let headers = parse_header_fields(value)?;
300 options.headers.extend(headers);
301 Ok(())
302 }
303 "useragent" => {
304 options.user_agent = Some(expect_string_scalar(
305 value,
306 "webread: UserAgent must be a character vector or string scalar",
307 )?);
308 Ok(())
309 }
310 "username" => {
311 options.username = Some(expect_string_scalar(
312 value,
313 "webread: Username must be a character vector or string scalar",
314 )?);
315 Ok(())
316 }
317 "password" => {
318 options.password = Some(expect_string_scalar(
319 value,
320 "webread: Password must be a character vector or string scalar",
321 )?);
322 Ok(())
323 }
324 "requestmethod" => {
325 options.method = parse_request_method(value)?;
326 Ok(())
327 }
328 "mediatype" => {
329 expect_string_scalar(
331 value,
332 "webread: MediaType must be a character vector or string scalar",
333 )?;
334 Ok(())
335 }
336 "queryparameters" => append_query_from_value(value, query_params),
337 _ => {
338 let param_value = value_to_query_string(value, name)?;
339 query_params.push((name.to_string(), param_value));
340 Ok(())
341 }
342 }
343}
344
345fn append_query_from_value(
346 value: &Value,
347 query_params: &mut Vec<(String, String)>,
348) -> Result<(), String> {
349 match value {
350 Value::Struct(struct_value) => {
351 for (key, val) in &struct_value.fields {
352 let text = value_to_query_string(val, key)?;
353 query_params.push((key.clone(), text));
354 }
355 Ok(())
356 }
357 Value::Cell(cell) => append_query_from_cell(cell, query_params),
358 _ => Err("webread: QueryParameters must be a struct or cell array".to_string()),
359 }
360}
361
362fn append_query_from_cell(
363 cell: &CellArray,
364 query_params: &mut Vec<(String, String)>,
365) -> Result<(), String> {
366 if cell.cols != 2 {
367 return Err("webread: cell array of query parameters must have two columns".to_string());
368 }
369 for row in 0..cell.rows {
370 let name_value = cell.get(row, 0).map_err(|e| format!("webread: {e}"))?;
371 let value_value = cell.get(row, 1).map_err(|e| format!("webread: {e}"))?;
372 let name = expect_string_scalar(
373 &name_value,
374 "webread: query parameter names must be text scalars",
375 )?;
376 let text = value_to_query_string(&value_value, &name)?;
377 query_params.push((name, text));
378 }
379 Ok(())
380}
381
382fn execute_request(
383 url_text: &str,
384 options: WebReadOptions,
385 query_params: &[(String, String)],
386) -> Result<Value, String> {
387 let username_present = options
388 .username
389 .as_ref()
390 .map(|s| !s.is_empty())
391 .unwrap_or(false);
392 let password_present = options
393 .password
394 .as_ref()
395 .map(|s| !s.is_empty())
396 .unwrap_or(false);
397 if password_present && !username_present {
398 return Err("webread: Password requires a Username option".to_string());
399 }
400
401 let mut url =
402 Url::parse(url_text).map_err(|err| format!("webread: invalid URL '{url_text}': {err}"))?;
403 if !query_params.is_empty() {
404 {
405 let mut pairs = url.query_pairs_mut();
406 for (name, value) in query_params {
407 pairs.append_pair(name, value);
408 }
409 }
410 }
411 let url_display = url.to_string();
412
413 let user_agent = options
414 .user_agent
415 .as_deref()
416 .filter(|ua| !ua.trim().is_empty())
417 .unwrap_or(DEFAULT_USER_AGENT);
418
419 let client = Client::builder()
420 .timeout(options.timeout)
421 .user_agent(user_agent)
422 .build()
423 .map_err(|err| format!("webread: failed to build HTTP client ({err})"))?;
424
425 let mut builder = match options.method {
426 HttpMethod::Get => client.get(url.clone()),
427 };
428 builder = apply_headers(builder, &options.headers)?;
429 if let Some(username) = &options.username {
430 if !username.is_empty() {
431 let password = options.password.as_ref().filter(|p| !p.is_empty()).cloned();
432 builder = builder.basic_auth(username.clone(), password);
433 }
434 }
435
436 let response = builder
437 .send()
438 .map_err(|err| request_error("request", &url_display, err))?;
439 let status = response.status();
440 if !status.is_success() {
441 return Err(format!(
442 "webread: request to {} failed with HTTP status {}",
443 url_display, status
444 ));
445 }
446
447 let header_content_type = response
448 .headers()
449 .get(CONTENT_TYPE)
450 .and_then(|value| value.to_str().ok())
451 .map(|s| s.to_string());
452 let resolved = options.resolve_content_type(header_content_type.as_deref());
453
454 match resolved {
455 ResolvedContentType::Json => {
456 let body = response
457 .text()
458 .map_err(|err| request_error("read response body", &url_display, err))?;
459 match decode_json_text(&body) {
460 Ok(value) => Ok(value),
461 Err(err) => Err(map_json_error(err)),
462 }
463 }
464 ResolvedContentType::Text => {
465 let body = response
466 .text()
467 .map_err(|err| request_error("read response body", &url_display, err))?;
468 let array = CharArray::new_row(&body);
469 Ok(Value::CharArray(array))
470 }
471 ResolvedContentType::Binary => {
472 let bytes = response
473 .bytes()
474 .map_err(|err| request_error("read response body", &url_display, err))?;
475 let data: Vec<f64> = bytes.iter().map(|b| f64::from(*b)).collect();
476 let cols = bytes.len();
477 let tensor =
478 Tensor::new(data, vec![1, cols]).map_err(|err| format!("webread: {err}"))?;
479 Ok(Value::Tensor(tensor))
480 }
481 }
482}
483
484fn request_error(action: &str, url: &str, err: reqwest::Error) -> String {
485 if err.is_timeout() {
486 format!("webread: {action} to {url} timed out")
487 } else if err.is_connect() {
488 format!("webread: unable to connect to {url}: {err}")
489 } else if err.is_status() {
490 format!("webread: HTTP error for {url}: {err}")
491 } else {
492 format!("webread: failed to {action} {url}: {err}")
493 }
494}
495
496fn map_json_error(err: String) -> String {
497 if let Some(rest) = err.strip_prefix("jsondecode: ") {
498 format!("webread: failed to parse JSON response ({rest})")
499 } else {
500 format!("webread: failed to parse JSON response ({err})")
501 }
502}
503
504fn apply_headers(
505 mut builder: RequestBuilder,
506 headers: &[(String, String)],
507) -> Result<RequestBuilder, String> {
508 for (name, value) in headers {
509 if name.trim().is_empty() {
510 return Err("webread: header names must not be empty".to_string());
511 }
512 let header_name = HeaderName::from_bytes(name.as_bytes())
513 .map_err(|_| format!("webread: invalid header name '{name}'"))?;
514 let header_value = HeaderValue::from_str(value)
515 .map_err(|_| format!("webread: invalid header value for '{name}'"))?;
516 builder = builder.header(header_name, header_value);
517 }
518 Ok(builder)
519}
520
521fn parse_header_fields(value: &Value) -> Result<Vec<(String, String)>, String> {
522 match value {
523 Value::Struct(struct_value) => {
524 let mut headers = Vec::with_capacity(struct_value.fields.len());
525 for (key, val) in &struct_value.fields {
526 let header_value = expect_string_scalar(
527 val,
528 "webread: header values must be character vectors or string scalars",
529 )?;
530 headers.push((key.clone(), header_value));
531 }
532 Ok(headers)
533 }
534 Value::Cell(cell) => {
535 if cell.cols != 2 {
536 return Err(
537 "webread: HeaderFields cell array must have exactly two columns".to_string(),
538 );
539 }
540 let mut headers = Vec::with_capacity(cell.rows);
541 for row in 0..cell.rows {
542 let name = cell.get(row, 0).map_err(|e| format!("webread: {e}"))?;
543 let value = cell.get(row, 1).map_err(|e| format!("webread: {e}"))?;
544 let header_name = expect_string_scalar(
545 &name,
546 "webread: header names must be character vectors or string scalars",
547 )?;
548 if header_name.trim().is_empty() {
549 return Err("webread: header names must not be empty".to_string());
550 }
551 let header_value = expect_string_scalar(
552 &value,
553 "webread: header values must be character vectors or string scalars",
554 )?;
555 headers.push((header_name, header_value));
556 }
557 Ok(headers)
558 }
559 _ => Err(
560 "webread: HeaderFields must be provided as a struct or cell array of name/value pairs"
561 .to_string(),
562 ),
563 }
564}
565
566fn parse_content_type(value: &Value) -> Result<ContentTypeHint, String> {
567 let text = expect_string_scalar(
568 value,
569 "webread: ContentType must be a character vector or string scalar",
570 )?;
571 match text.trim().to_ascii_lowercase().as_str() {
572 "auto" => Ok(ContentTypeHint::Auto),
573 "json" => Ok(ContentTypeHint::Json),
574 "text" | "char" | "string" => Ok(ContentTypeHint::Text),
575 "binary" | "octet-stream" | "raw" => Ok(ContentTypeHint::Binary),
576 other => Err(format!(
577 "webread: unsupported ContentType '{}'; use 'auto', 'json', 'text', or 'binary'",
578 other
579 )),
580 }
581}
582
583fn parse_timeout(value: &Value) -> Result<Duration, String> {
584 let seconds = numeric_scalar(value, "webread: Timeout must be a finite, positive scalar")?;
585 if !seconds.is_finite() || seconds <= 0.0 {
586 return Err("webread: Timeout must be a finite, positive scalar".to_string());
587 }
588 Ok(Duration::from_secs_f64(seconds))
589}
590
591fn parse_request_method(value: &Value) -> Result<HttpMethod, String> {
592 let text = expect_string_scalar(
593 value,
594 "webread: RequestMethod must be a character vector or string scalar",
595 )?;
596 let lower = text.trim().to_ascii_lowercase();
597 match lower.as_str() {
598 "get" | "auto" => Ok(HttpMethod::Get),
599 other => Err(format!(
600 "webread: RequestMethod '{}' is not supported; expected 'auto' or 'get'",
601 other
602 )),
603 }
604}
605
606fn numeric_scalar(value: &Value, context: &str) -> Result<f64, String> {
607 match value {
608 Value::Num(n) => Ok(*n),
609 Value::Int(i) => Ok(i.to_f64()),
610 Value::Tensor(tensor) => {
611 if tensor.data.len() == 1 {
612 Ok(tensor.data[0])
613 } else {
614 Err(context.to_string())
615 }
616 }
617 _ => Err(context.to_string()),
618 }
619}
620
621fn expect_string_scalar(value: &Value, context: &str) -> Result<String, String> {
622 match value {
623 Value::String(s) => Ok(s.clone()),
624 Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
625 Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
626 _ => Err(context.to_string()),
627 }
628}
629
630fn value_to_query_string(value: &Value, name: &str) -> Result<String, String> {
631 match value {
632 Value::String(s) => Ok(s.clone()),
633 Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
634 Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
635 Value::Num(n) => Ok(format!("{}", n)),
636 Value::Int(i) => Ok(i.to_i64().to_string()),
637 Value::Bool(b) => Ok(if *b { "true".into() } else { "false".into() }),
638 Value::Tensor(tensor) => {
639 if tensor.data.len() == 1 {
640 Ok(format!("{}", tensor.data[0]))
641 } else {
642 Err(format!(
643 "webread: query parameter '{}' must be scalar",
644 name
645 ))
646 }
647 }
648 Value::LogicalArray(array) => {
649 if array.len() == 1 {
650 Ok(if array.data[0] != 0 {
651 "true".into()
652 } else {
653 "false".into()
654 })
655 } else {
656 Err(format!(
657 "webread: query parameter '{}' must be scalar",
658 name
659 ))
660 }
661 }
662 _ => Err(format!(
663 "webread: unsupported value type for query parameter '{}'",
664 name
665 )),
666 }
667}
668
669#[derive(Clone, Copy, Debug)]
670enum ContentTypeHint {
671 Auto,
672 Text,
673 Json,
674 Binary,
675}
676
677#[derive(Clone, Copy, Debug)]
678enum ResolvedContentType {
679 Text,
680 Json,
681 Binary,
682}
683
684#[derive(Clone, Copy, Debug)]
685enum HttpMethod {
686 Get,
687}
688
689#[derive(Clone, Debug)]
690struct WebReadOptions {
691 content_type: ContentTypeHint,
692 timeout: Duration,
693 headers: Vec<(String, String)>,
694 user_agent: Option<String>,
695 username: Option<String>,
696 password: Option<String>,
697 method: HttpMethod,
698}
699
700impl Default for WebReadOptions {
701 fn default() -> Self {
702 Self {
703 content_type: ContentTypeHint::Auto,
704 timeout: Duration::from_secs_f64(DEFAULT_TIMEOUT_SECONDS),
705 headers: Vec::new(),
706 user_agent: None,
707 username: None,
708 password: None,
709 method: HttpMethod::Get,
710 }
711 }
712}
713
714impl WebReadOptions {
715 fn resolve_content_type(&self, header: Option<&str>) -> ResolvedContentType {
716 match self.content_type {
717 ContentTypeHint::Json => ResolvedContentType::Json,
718 ContentTypeHint::Text => ResolvedContentType::Text,
719 ContentTypeHint::Binary => ResolvedContentType::Binary,
720 ContentTypeHint::Auto => infer_content_type(header),
721 }
722 }
723}
724
725fn infer_content_type(header: Option<&str>) -> ResolvedContentType {
726 if let Some(raw) = header {
727 let mime = raw
728 .split(';')
729 .next()
730 .map(|part| part.trim().to_ascii_lowercase())
731 .unwrap_or_default();
732 if mime == "application/json" || mime == "text/json" || mime.ends_with("+json") {
733 ResolvedContentType::Json
734 } else if mime.starts_with("text/")
735 || mime == "application/xml"
736 || mime.ends_with("+xml")
737 || mime == "application/xhtml+xml"
738 || mime == "application/javascript"
739 || mime == "application/x-www-form-urlencoded"
740 {
741 ResolvedContentType::Text
742 } else {
743 ResolvedContentType::Binary
744 }
745 } else {
746 ResolvedContentType::Text
747 }
748}
749
750#[cfg(test)]
751mod tests {
752 use super::*;
753 use std::io::{Read, Write};
754 use std::net::{TcpListener, TcpStream};
755 use std::sync::mpsc;
756 use std::thread;
757
758 #[cfg(feature = "doc_export")]
759 use crate::builtins::common::test_support;
760
761 fn spawn_server<F>(handler: F) -> String
762 where
763 F: FnOnce(TcpStream) + Send + 'static,
764 {
765 let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
766 let addr = listener.local_addr().unwrap();
767 thread::spawn(move || {
768 if let Ok((stream, _)) = listener.accept() {
769 handler(stream);
770 }
771 });
772 format!("http://{}", addr)
773 }
774
775 fn respond_with(mut stream: TcpStream, content_type: &str, body: &[u8]) {
776 let response = format!(
777 "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nContent-Type: {}\r\nConnection: close\r\n\r\n",
778 body.len(),
779 content_type
780 );
781 let _ = stream.write_all(response.as_bytes());
782 let _ = stream.write_all(body);
783 }
784
785 fn read_request_headers(stream: &mut TcpStream) -> String {
786 let mut buffer = Vec::new();
787 let mut chunk = [0u8; 256];
788 while let Ok(read) = stream.read(&mut chunk) {
789 if read == 0 {
790 break;
791 }
792 buffer.extend_from_slice(&chunk[..read]);
793 if buffer.windows(4).any(|w| w == b"\r\n\r\n") {
794 break;
795 }
796 if buffer.len() > 16 * 1024 {
797 break;
798 }
799 }
800 String::from_utf8_lossy(&buffer).to_string()
801 }
802
803 #[test]
804 fn webread_fetches_json_response() {
805 let url = spawn_server(|mut stream| {
806 let mut buffer = [0u8; 1024];
807 let _ = stream.read(&mut buffer);
808 respond_with(
809 stream,
810 "application/json",
811 br#"{"message":"hello","value":42}"#,
812 );
813 });
814
815 let result = webread_builtin(Value::from(url), vec![]).expect("webread JSON response");
816
817 match result {
818 Value::Struct(struct_value) => {
819 let message = struct_value.fields.get("message").expect("message field");
820 let value = struct_value.fields.get("value").expect("value field");
821 match message {
822 Value::CharArray(ca) => {
823 let text: String = ca.data.iter().collect();
824 assert_eq!(text, "hello");
825 }
826 other => panic!("expected char array, got {other:?}"),
827 }
828 match value {
829 Value::Num(n) => assert_eq!(*n, 42.0),
830 other => panic!("expected numeric value, got {other:?}"),
831 }
832 }
833 other => panic!("expected struct, got {other:?}"),
834 }
835 }
836
837 #[test]
838 fn webread_fetches_text_response() {
839 let url = spawn_server(|mut stream| {
840 let mut buffer = [0u8; 512];
841 let _ = stream.read(&mut buffer);
842 respond_with(stream, "text/plain; charset=utf-8", b"RunMat webread test");
843 });
844
845 let result = webread_builtin(Value::from(url), vec![]).expect("webread text response");
846
847 match result {
848 Value::CharArray(ca) => {
849 let text: String = ca.data.iter().collect();
850 assert_eq!(text, "RunMat webread test");
851 }
852 other => panic!("expected char array, got {other:?}"),
853 }
854 }
855
856 #[test]
857 fn webread_fetches_binary_payload() {
858 let payload = [1u8, 2, 3, 254, 255];
859 let url = spawn_server(move |mut stream| {
860 let mut buffer = [0u8; 512];
861 let _ = stream.read(&mut buffer);
862 respond_with(stream, "application/octet-stream", &payload);
863 });
864
865 let args = vec![Value::from("ContentType"), Value::from("binary")];
866 let result = webread_builtin(Value::from(url), args).expect("webread binary response");
867
868 match result {
869 Value::Tensor(tensor) => {
870 assert_eq!(tensor.shape, vec![1, 5]);
871 let bytes: Vec<u8> = tensor.data.iter().map(|v| *v as u8).collect();
872 assert_eq!(bytes, payload);
873 }
874 other => panic!("expected tensor, got {other:?}"),
875 }
876 }
877
878 #[test]
879 fn webread_appends_query_parameters() {
880 let (tx, rx) = mpsc::channel();
881 let url = spawn_server(move |mut stream| {
882 let request = read_request_headers(&mut stream);
883 let _ = tx.send(request);
884 respond_with(stream, "application/json", br#"{"ok":true}"#);
885 });
886
887 let args = vec![
888 Value::from("count"),
889 Value::Num(42.0),
890 Value::from("ContentType"),
891 Value::from("json"),
892 ];
893 let result = webread_builtin(Value::from(url.clone()), args).expect("webread query");
894 match result {
895 Value::Struct(struct_value) => {
896 assert!(struct_value.fields.contains_key("ok"));
897 }
898 other => panic!("expected struct result, got {other:?}"),
899 }
900 let request = rx.recv().expect("request log");
901 assert!(
902 request.starts_with("GET /"),
903 "unexpected request line: {request}"
904 );
905 assert!(
906 request.contains("count=42"),
907 "query parameters missing: {request}"
908 );
909 }
910
911 #[test]
912 fn webread_struct_argument_supports_options_and_query() {
913 let (tx, rx) = mpsc::channel();
914 let url = spawn_server(move |mut stream| {
915 let request = read_request_headers(&mut stream);
916 let _ = tx.send(request);
917 respond_with(stream, "application/json", br#"{"value":123}"#);
918 });
919
920 let mut fields = StructValue::new();
921 fields
922 .fields
923 .insert("ContentType".to_string(), Value::from("json"));
924 fields.fields.insert("limit".to_string(), Value::Num(5.0));
925
926 let result = webread_builtin(Value::from(url.clone()), vec![Value::Struct(fields)])
927 .expect("webread struct arg");
928
929 let request = rx.recv().expect("request log");
930 assert!(
931 request.contains("GET /?limit=5"),
932 "expected limit query parameter: {request}"
933 );
934
935 match result {
936 Value::Struct(struct_value) => match struct_value.fields.get("value") {
937 Some(Value::Num(n)) => assert_eq!(*n, 123.0),
938 other => panic!("unexpected JSON decode result: {other:?}"),
939 },
940 other => panic!("expected struct, got {other:?}"),
941 }
942 }
943
944 #[test]
945 fn webread_headerfields_struct_applies_custom_headers() {
946 let (tx, rx) = mpsc::channel();
947 let url = spawn_server(move |mut stream| {
948 let request = read_request_headers(&mut stream);
949 let _ = tx.send(request);
950 respond_with(stream, "application/json", br#"{"ok":true}"#);
951 });
952
953 let mut headers = StructValue::new();
954 headers
955 .fields
956 .insert("X-Test".to_string(), Value::from("RunMat"));
957
958 let args = vec![
959 Value::from("HeaderFields"),
960 Value::Struct(headers),
961 Value::from("ContentType"),
962 Value::from("json"),
963 ];
964
965 let result = webread_builtin(Value::from(url), args).expect("webread header fields");
966 assert!(matches!(result, Value::Struct(_)));
967
968 let request = rx.recv().expect("request log");
969 assert!(
970 request.to_ascii_lowercase().contains("x-test: runmat"),
971 "custom header missing: {request}"
972 );
973 }
974
975 #[test]
976 fn webread_queryparameters_option_struct() {
977 let (tx, rx) = mpsc::channel();
978 let url = spawn_server(move |mut stream| {
979 let request = read_request_headers(&mut stream);
980 let _ = tx.send(request);
981 respond_with(stream, "application/json", br#"{"ok":true}"#);
982 });
983
984 let mut params = StructValue::new();
985 params.fields.insert("page".to_string(), Value::Num(2.0));
986
987 let args = vec![
988 Value::from("QueryParameters"),
989 Value::Struct(params),
990 Value::from("ContentType"),
991 Value::from("json"),
992 ];
993
994 let result =
995 webread_builtin(Value::from(url.clone()), args).expect("webread query parameters");
996 assert!(matches!(result, Value::Struct(_)));
997
998 let request = rx.recv().expect("request log");
999 assert!(
1000 request.contains("page=2"),
1001 "query parameter missing: {request}"
1002 );
1003 }
1004
1005 #[test]
1006 fn webread_errors_on_missing_name_value_pair() {
1007 let err = webread_builtin(
1008 Value::from("https://example.com"),
1009 vec![Value::from("Timeout")],
1010 )
1011 .expect_err("expected missing value error");
1012 assert!(
1013 err.contains("missing value"),
1014 "unexpected error message: {err}"
1015 );
1016 }
1017
1018 #[test]
1019 fn webread_rejects_non_positive_timeout() {
1020 let args = vec![Value::from("Timeout"), Value::Num(0.0)];
1021 let err =
1022 webread_builtin(Value::from("https://example.com"), args).expect_err("timeout error");
1023 assert!(
1024 err.contains("Timeout must be a finite, positive scalar"),
1025 "unexpected error message: {err}"
1026 );
1027 }
1028
1029 #[test]
1030 fn webread_rejects_password_without_username() {
1031 let args = vec![Value::from("Password"), Value::from("secret")];
1032 let err =
1033 webread_builtin(Value::from("https://example.com"), args).expect_err("auth error");
1034 assert!(
1035 err.contains("Password requires a Username"),
1036 "unexpected error message: {err}"
1037 );
1038 }
1039
1040 #[test]
1041 fn webread_rejects_unsupported_content_type() {
1042 let args = vec![Value::from("ContentType"), Value::from("table")];
1043 let err =
1044 webread_builtin(Value::from("https://example.com"), args).expect_err("format error");
1045 assert!(
1046 err.contains("unsupported ContentType"),
1047 "unexpected error message: {err}"
1048 );
1049 }
1050
1051 #[test]
1052 fn webread_rejects_invalid_headerfields_shape() {
1053 let cell = crate::make_cell(
1054 vec![Value::from("A"), Value::from("B"), Value::from("C")],
1055 1,
1056 3,
1057 )
1058 .expect("make cell");
1059
1060 let args = vec![Value::from("HeaderFields"), cell];
1061 let err =
1062 webread_builtin(Value::from("https://example.com"), args).expect_err("header error");
1063 assert!(
1064 err.contains("HeaderFields cell array must have exactly two columns"),
1065 "unexpected error message: {err}"
1066 );
1067 }
1068
1069 #[test]
1070 #[cfg(feature = "doc_export")]
1071 fn doc_examples_present() {
1072 let blocks = test_support::doc_examples(DOC_MD);
1073 assert!(!blocks.is_empty());
1074 }
1075}