kcl_lib/engine/
conn.rs

1//! Functions for setting up our WebSocket and WebRTC connections for communications with the
2//! engine.
3
4use std::{collections::HashMap, sync::Arc};
5
6use anyhow::{Result, anyhow};
7use futures::{SinkExt, StreamExt};
8use indexmap::IndexMap;
9use kcmc::{
10    ModelingCmd,
11    websocket::{
12        BatchResponse, FailureWebSocketResponse, ModelingCmdReq, ModelingSessionData, OkWebSocketResponseData,
13        SuccessWebSocketResponse, WebSocketRequest, WebSocketResponse,
14    },
15};
16use kittycad_modeling_cmds::{self as kcmc};
17use tokio::sync::{RwLock, mpsc, oneshot};
18use tokio_tungstenite::tungstenite::Message as WsMsg;
19use uuid::Uuid;
20
21use crate::{
22    SourceRange,
23    engine::{AsyncTasks, EngineManager, EngineStats},
24    errors::{KclError, KclErrorDetails},
25    execution::{DefaultPlanes, IdGenerator},
26};
27
28#[derive(Debug, PartialEq)]
29enum SocketHealth {
30    Active,
31    Inactive,
32}
33
34type WebSocketTcpWrite = futures::stream::SplitSink<tokio_tungstenite::WebSocketStream<reqwest::Upgraded>, WsMsg>;
35#[derive(Debug)]
36pub struct EngineConnection {
37    engine_req_tx: mpsc::Sender<ToEngineReq>,
38    shutdown_tx: mpsc::Sender<()>,
39    responses: ResponseInformation,
40    pending_errors: Arc<RwLock<Vec<String>>>,
41    #[allow(dead_code)]
42    tcp_read_handle: Arc<TcpReadHandle>,
43    socket_health: Arc<RwLock<SocketHealth>>,
44    batch: Arc<RwLock<Vec<(WebSocketRequest, SourceRange)>>>,
45    batch_end: Arc<RwLock<IndexMap<uuid::Uuid, (WebSocketRequest, SourceRange)>>>,
46    ids_of_async_commands: Arc<RwLock<IndexMap<Uuid, SourceRange>>>,
47
48    /// The default planes for the scene.
49    default_planes: Arc<RwLock<Option<DefaultPlanes>>>,
50    /// If the server sends session data, it'll be copied to here.
51    session_data: Arc<RwLock<Option<ModelingSessionData>>>,
52
53    stats: EngineStats,
54
55    async_tasks: AsyncTasks,
56
57    debug_info: Arc<RwLock<Option<OkWebSocketResponseData>>>,
58}
59
60pub struct TcpRead {
61    stream: futures::stream::SplitStream<tokio_tungstenite::WebSocketStream<reqwest::Upgraded>>,
62}
63
64/// Occurs when client couldn't read from the WebSocket to the engine.
65// #[derive(Debug)]
66#[allow(clippy::large_enum_variant)]
67pub enum WebSocketReadError {
68    /// Could not read a message due to WebSocket errors.
69    Read(tokio_tungstenite::tungstenite::Error),
70    /// WebSocket message didn't contain a valid message that the KCL Executor could parse.
71    Deser(anyhow::Error),
72}
73
74impl From<anyhow::Error> for WebSocketReadError {
75    fn from(e: anyhow::Error) -> Self {
76        Self::Deser(e)
77    }
78}
79
80impl TcpRead {
81    pub async fn read(&mut self) -> std::result::Result<WebSocketResponse, WebSocketReadError> {
82        let Some(msg) = self.stream.next().await else {
83            return Err(anyhow::anyhow!("Failed to read from WebSocket").into());
84        };
85        let msg = match msg {
86            Ok(msg) => msg,
87            Err(e) if matches!(e, tokio_tungstenite::tungstenite::Error::Protocol(_)) => {
88                return Err(WebSocketReadError::Read(e));
89            }
90            Err(e) => return Err(anyhow::anyhow!("Error reading from engine's WebSocket: {e}").into()),
91        };
92        let msg: WebSocketResponse = match msg {
93            WsMsg::Text(text) => serde_json::from_str(&text)
94                .map_err(anyhow::Error::from)
95                .map_err(WebSocketReadError::from)?,
96            WsMsg::Binary(bin) => bson::from_slice(&bin)
97                .map_err(anyhow::Error::from)
98                .map_err(WebSocketReadError::from)?,
99            other => return Err(anyhow::anyhow!("Unexpected WebSocket message from engine API: {other}").into()),
100        };
101        Ok(msg)
102    }
103}
104
105pub struct TcpReadHandle {
106    handle: Arc<tokio::task::JoinHandle<Result<(), WebSocketReadError>>>,
107}
108
109impl std::fmt::Debug for TcpReadHandle {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        write!(f, "TcpReadHandle")
112    }
113}
114
115impl Drop for TcpReadHandle {
116    fn drop(&mut self) {
117        // Drop the read handle.
118        self.handle.abort();
119    }
120}
121
122/// Information about the responses from the engine.
123#[derive(Clone, Debug)]
124struct ResponseInformation {
125    /// The responses from the engine.
126    responses: Arc<RwLock<IndexMap<uuid::Uuid, WebSocketResponse>>>,
127}
128
129impl ResponseInformation {
130    pub async fn add(&self, id: Uuid, response: WebSocketResponse) {
131        self.responses.write().await.insert(id, response);
132    }
133}
134
135/// Requests to send to the engine, and a way to await a response.
136struct ToEngineReq {
137    /// The request to send
138    req: WebSocketRequest,
139    /// If this resolves to Ok, the request was sent.
140    /// If this resolves to Err, the request could not be sent.
141    /// If this has not yet resolved, the request has not been sent yet.
142    request_sent: oneshot::Sender<Result<()>>,
143}
144
145impl EngineConnection {
146    /// Start waiting for incoming engine requests, and send each one over the WebSocket to the engine.
147    async fn start_write_actor(
148        mut tcp_write: WebSocketTcpWrite,
149        mut engine_req_rx: mpsc::Receiver<ToEngineReq>,
150        mut shutdown_rx: mpsc::Receiver<()>,
151    ) {
152        loop {
153            tokio::select! {
154                maybe_req = engine_req_rx.recv() => {
155                    match maybe_req {
156                        Some(ToEngineReq { req, request_sent }) => {
157                            // Decide whether to send as binary or text,
158                            // then send to the engine.
159                            let res = if let WebSocketRequest::ModelingCmdReq(ModelingCmdReq {
160                                cmd: ModelingCmd::ImportFiles { .. },
161                                cmd_id: _,
162                            }) = &req
163                            {
164                                Self::inner_send_to_engine_binary(req, &mut tcp_write).await
165                            } else {
166                                Self::inner_send_to_engine(req, &mut tcp_write).await
167                            };
168
169                            // Let the caller know we’ve sent the request (ok or error).
170                            let _ = request_sent.send(res);
171                        }
172                        None => {
173                            // The engine_req_rx channel has closed, so no more requests.
174                            // We'll gracefully exit the loop and close the engine.
175                            break;
176                        }
177                    }
178                },
179
180                // If we get a shutdown signal, close the engine immediately and return.
181                _ = shutdown_rx.recv() => {
182                    let _ = Self::inner_close_engine(&mut tcp_write).await;
183                    return;
184                }
185            }
186        }
187
188        // If we exit the loop (e.g. engine_req_rx was closed),
189        // still gracefully close the engine before returning.
190        let _ = Self::inner_close_engine(&mut tcp_write).await;
191    }
192
193    /// Send the given `request` to the engine via the WebSocket connection `tcp_write`.
194    async fn inner_close_engine(tcp_write: &mut WebSocketTcpWrite) -> Result<()> {
195        tcp_write
196            .send(WsMsg::Close(None))
197            .await
198            .map_err(|e| anyhow!("could not send close over websocket: {e}"))?;
199        Ok(())
200    }
201
202    /// Send the given `request` to the engine via the WebSocket connection `tcp_write`.
203    async fn inner_send_to_engine(request: WebSocketRequest, tcp_write: &mut WebSocketTcpWrite) -> Result<()> {
204        let msg = serde_json::to_string(&request).map_err(|e| anyhow!("could not serialize json: {e}"))?;
205        tcp_write
206            .send(WsMsg::Text(msg.into()))
207            .await
208            .map_err(|e| anyhow!("could not send json over websocket: {e}"))?;
209        Ok(())
210    }
211
212    /// Send the given `request` to the engine via the WebSocket connection `tcp_write` as binary.
213    async fn inner_send_to_engine_binary(request: WebSocketRequest, tcp_write: &mut WebSocketTcpWrite) -> Result<()> {
214        let msg = bson::to_vec(&request).map_err(|e| anyhow!("could not serialize bson: {e}"))?;
215        tcp_write
216            .send(WsMsg::Binary(msg.into()))
217            .await
218            .map_err(|e| anyhow!("could not send json over websocket: {e}"))?;
219        Ok(())
220    }
221
222    pub async fn new(ws: reqwest::Upgraded) -> Result<EngineConnection> {
223        let wsconfig = tokio_tungstenite::tungstenite::protocol::WebSocketConfig::default()
224            // 4294967296 bytes, which is around 4.2 GB.
225            .max_message_size(Some(usize::MAX))
226            .max_frame_size(Some(usize::MAX));
227
228        let ws_stream = tokio_tungstenite::WebSocketStream::from_raw_socket(
229            ws,
230            tokio_tungstenite::tungstenite::protocol::Role::Client,
231            Some(wsconfig),
232        )
233        .await;
234
235        let (tcp_write, tcp_read) = ws_stream.split();
236        let (engine_req_tx, engine_req_rx) = mpsc::channel(10);
237        let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
238        tokio::task::spawn(Self::start_write_actor(tcp_write, engine_req_rx, shutdown_rx));
239
240        let mut tcp_read = TcpRead { stream: tcp_read };
241
242        let session_data: Arc<RwLock<Option<ModelingSessionData>>> = Arc::new(RwLock::new(None));
243        let session_data2 = session_data.clone();
244        let ids_of_async_commands: Arc<RwLock<IndexMap<Uuid, SourceRange>>> = Arc::new(RwLock::new(IndexMap::new()));
245        let socket_health = Arc::new(RwLock::new(SocketHealth::Active));
246        let pending_errors = Arc::new(RwLock::new(Vec::new()));
247        let pending_errors_clone = pending_errors.clone();
248        let response_information = ResponseInformation {
249            responses: Arc::new(RwLock::new(IndexMap::new())),
250        };
251        let response_information_cloned = response_information.clone();
252        let debug_info = Arc::new(RwLock::new(None));
253        let debug_info_cloned = debug_info.clone();
254
255        let socket_health_tcp_read = socket_health.clone();
256        let tcp_read_handle = tokio::spawn(async move {
257            // Get Websocket messages from API server
258            loop {
259                match tcp_read.read().await {
260                    Ok(ws_resp) => {
261                        // If we got a batch response, add all the inner responses.
262                        let id = ws_resp.request_id();
263                        match &ws_resp {
264                            WebSocketResponse::Success(SuccessWebSocketResponse {
265                                resp: OkWebSocketResponseData::ModelingBatch { responses },
266                                ..
267                            }) => {
268                                #[expect(
269                                    clippy::iter_over_hash_type,
270                                    reason = "modeling command uses a HashMap and keys are random, so we don't really have a choice"
271                                )]
272                                for (resp_id, batch_response) in responses {
273                                    let id: uuid::Uuid = (*resp_id).into();
274                                    match batch_response {
275                                        BatchResponse::Success { response } => {
276                                            // If the id is in our ids of async commands, remove
277                                            // it.
278                                            response_information_cloned
279                                                .add(
280                                                    id,
281                                                    WebSocketResponse::Success(SuccessWebSocketResponse {
282                                                        success: true,
283                                                        request_id: Some(id),
284                                                        resp: OkWebSocketResponseData::Modeling {
285                                                            modeling_response: response.clone(),
286                                                        },
287                                                    }),
288                                                )
289                                                .await;
290                                        }
291                                        BatchResponse::Failure { errors } => {
292                                            response_information_cloned
293                                                .add(
294                                                    id,
295                                                    WebSocketResponse::Failure(FailureWebSocketResponse {
296                                                        success: false,
297                                                        request_id: Some(id),
298                                                        errors: errors.clone(),
299                                                    }),
300                                                )
301                                                .await;
302                                        }
303                                    }
304                                }
305                            }
306                            WebSocketResponse::Success(SuccessWebSocketResponse {
307                                resp: OkWebSocketResponseData::ModelingSessionData { session },
308                                ..
309                            }) => {
310                                let mut sd = session_data2.write().await;
311                                sd.replace(session.clone());
312                            }
313                            WebSocketResponse::Failure(FailureWebSocketResponse {
314                                success: _,
315                                request_id,
316                                errors,
317                            }) => {
318                                if let Some(id) = request_id {
319                                    response_information_cloned
320                                        .add(
321                                            *id,
322                                            WebSocketResponse::Failure(FailureWebSocketResponse {
323                                                success: false,
324                                                request_id: *request_id,
325                                                errors: errors.clone(),
326                                            }),
327                                        )
328                                        .await;
329                                } else {
330                                    // Add it to our pending errors.
331                                    let mut pe = pending_errors_clone.write().await;
332                                    for error in errors {
333                                        if !pe.contains(&error.message) {
334                                            pe.push(error.message.clone());
335                                        }
336                                    }
337                                    drop(pe);
338                                }
339                            }
340                            WebSocketResponse::Success(SuccessWebSocketResponse {
341                                resp: debug @ OkWebSocketResponseData::Debug { .. },
342                                ..
343                            }) => {
344                                let mut handle = debug_info_cloned.write().await;
345                                *handle = Some(debug.clone());
346                            }
347                            _ => {}
348                        }
349
350                        if let Some(id) = id {
351                            response_information_cloned.add(id, ws_resp.clone()).await;
352                        }
353                    }
354                    Err(e) => {
355                        match &e {
356                            WebSocketReadError::Read(e) => crate::logln!("could not read from WS: {:?}", e),
357                            WebSocketReadError::Deser(e) => crate::logln!("could not deserialize msg from WS: {:?}", e),
358                        }
359                        *socket_health_tcp_read.write().await = SocketHealth::Inactive;
360                        return Err(e);
361                    }
362                }
363            }
364        });
365
366        Ok(EngineConnection {
367            engine_req_tx,
368            shutdown_tx,
369            tcp_read_handle: Arc::new(TcpReadHandle {
370                handle: Arc::new(tcp_read_handle),
371            }),
372            responses: response_information,
373            pending_errors,
374            socket_health,
375            batch: Arc::new(RwLock::new(Vec::new())),
376            batch_end: Arc::new(RwLock::new(IndexMap::new())),
377            ids_of_async_commands,
378            default_planes: Default::default(),
379            session_data,
380            stats: Default::default(),
381            async_tasks: AsyncTasks::new(),
382            debug_info,
383        })
384    }
385}
386
387#[async_trait::async_trait]
388impl EngineManager for EngineConnection {
389    fn batch(&self) -> Arc<RwLock<Vec<(WebSocketRequest, SourceRange)>>> {
390        self.batch.clone()
391    }
392
393    fn batch_end(&self) -> Arc<RwLock<IndexMap<uuid::Uuid, (WebSocketRequest, SourceRange)>>> {
394        self.batch_end.clone()
395    }
396
397    fn responses(&self) -> Arc<RwLock<IndexMap<Uuid, WebSocketResponse>>> {
398        self.responses.responses.clone()
399    }
400
401    fn ids_of_async_commands(&self) -> Arc<RwLock<IndexMap<Uuid, SourceRange>>> {
402        self.ids_of_async_commands.clone()
403    }
404
405    fn async_tasks(&self) -> AsyncTasks {
406        self.async_tasks.clone()
407    }
408
409    fn stats(&self) -> &EngineStats {
410        &self.stats
411    }
412
413    fn get_default_planes(&self) -> Arc<RwLock<Option<DefaultPlanes>>> {
414        self.default_planes.clone()
415    }
416
417    async fn get_debug(&self) -> Option<OkWebSocketResponseData> {
418        self.debug_info.read().await.clone()
419    }
420
421    async fn fetch_debug(&self) -> Result<(), KclError> {
422        let (tx, rx) = oneshot::channel();
423
424        self.engine_req_tx
425            .send(ToEngineReq {
426                req: WebSocketRequest::Debug {},
427                request_sent: tx,
428            })
429            .await
430            .map_err(|e| KclError::new_engine(KclErrorDetails::new(format!("Failed to send debug: {e}"), vec![])))?;
431
432        let _ = rx.await;
433        Ok(())
434    }
435
436    async fn clear_scene_post_hook(
437        &self,
438        id_generator: &mut IdGenerator,
439        source_range: SourceRange,
440    ) -> Result<(), KclError> {
441        // Remake the default planes, since they would have been removed after the scene was cleared.
442        let new_planes = self.new_default_planes(id_generator, source_range).await?;
443        *self.default_planes.write().await = Some(new_planes);
444
445        Ok(())
446    }
447
448    async fn inner_fire_modeling_cmd(
449        &self,
450        _id: uuid::Uuid,
451        source_range: SourceRange,
452        cmd: WebSocketRequest,
453        _id_to_source_range: HashMap<Uuid, SourceRange>,
454    ) -> Result<(), KclError> {
455        let (tx, rx) = oneshot::channel();
456
457        // Send the request to the engine, via the actor.
458        self.engine_req_tx
459            .send(ToEngineReq {
460                req: cmd.clone(),
461                request_sent: tx,
462            })
463            .await
464            .map_err(|e| {
465                KclError::new_engine(KclErrorDetails::new(
466                    format!("Failed to send modeling command: {e}"),
467                    vec![source_range],
468                ))
469            })?;
470
471        // Wait for the request to be sent.
472        rx.await
473            .map_err(|e| {
474                KclError::new_engine(KclErrorDetails::new(
475                    format!("could not send request to the engine actor: {e}"),
476                    vec![source_range],
477                ))
478            })?
479            .map_err(|e| {
480                KclError::new_engine(KclErrorDetails::new(
481                    format!("could not send request to the engine: {e}"),
482                    vec![source_range],
483                ))
484            })?;
485
486        Ok(())
487    }
488
489    async fn inner_send_modeling_cmd(
490        &self,
491        id: uuid::Uuid,
492        source_range: SourceRange,
493        cmd: WebSocketRequest,
494        id_to_source_range: HashMap<Uuid, SourceRange>,
495    ) -> Result<WebSocketResponse, KclError> {
496        self.inner_fire_modeling_cmd(id, source_range, cmd, id_to_source_range)
497            .await?;
498
499        // Wait for the response.
500        let response_timeout = 300;
501        let current_time = std::time::Instant::now();
502        while current_time.elapsed().as_secs() < response_timeout {
503            let guard = self.socket_health.read().await;
504            if *guard == SocketHealth::Inactive {
505                // Check if we have any pending errors.
506                let pe = self.pending_errors.read().await;
507                if !pe.is_empty() {
508                    return Err(KclError::new_engine(KclErrorDetails::new(
509                        pe.join(", ").to_string(),
510                        vec![source_range],
511                    )));
512                } else {
513                    return Err(KclError::new_engine(KclErrorDetails::new(
514                        "Modeling command failed: websocket closed early".to_string(),
515                        vec![source_range],
516                    )));
517                }
518            }
519
520            #[cfg(feature = "artifact-graph")]
521            {
522                // We cannot pop here or it will break the artifact graph.
523                if let Some(resp) = self.responses.responses.read().await.get(&id) {
524                    return Ok(resp.clone());
525                }
526            }
527            #[cfg(not(feature = "artifact-graph"))]
528            {
529                if let Some(resp) = self.responses.responses.write().await.shift_remove(&id) {
530                    return Ok(resp);
531                }
532            }
533        }
534
535        Err(KclError::new_engine(KclErrorDetails::new(
536            format!("Modeling command timed out `{id}`"),
537            vec![source_range],
538        )))
539    }
540
541    async fn get_session_data(&self) -> Option<ModelingSessionData> {
542        self.session_data.read().await.clone()
543    }
544
545    async fn close(&self) {
546        let _ = self.shutdown_tx.send(()).await;
547        loop {
548            let guard = self.socket_health.read().await;
549            if *guard == SocketHealth::Inactive {
550                return;
551            }
552        }
553    }
554}