carton-runner-interface 0.0.1

// Copyright 2023 Vivek Panyam
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::{
    any::Any,
    collections::HashMap,
    path::{Path, PathBuf},
    sync::{atomic::AtomicBool, Arc, Mutex},
};

use anywhere::types::{AnywhereFS, ReadOnlyFS, ReadWriteFS};
use clap::Parser;
use tokio::sync::mpsc::{self, error::SendError};
use tracing_chrome::ChromeLayerBuilder;
use tracing_subscriber::prelude::*;

use crate::{
    do_not_modify::comms::Comms,
    do_not_modify::types::{ChannelId, FsToken, RPCRequest, RPCResponse},
    multiplexer::Multiplexer,
    types::{Device, Handle, LogRecord, RPCRequestData, RPCResponseData, RpcId, RunnerOpt, Tensor},
};

pub struct Server {
    comms: Comms,
    fs_multiplexer: Multiplexer<
        anywhere::transport::serde::RequestMessageType,
        anywhere::transport::serde::ResponseMessageType,
    >,

    outgoing: mpsc::Sender<RPCResponse>,
    incoming: mpsc::Receiver<RPCRequest>,

    // Keep this alive while the server is up
    _keepalive: Vec<Box<dyn Any + Send + Sync>>,

    // A flag that stops us from attempting to send log messages after shutdown
    is_shutdown: Arc<AtomicBool>,
}

/// A handle that represents a map of sealed tensors
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub struct SealHandle(pub(crate) u64);

impl SealHandle {
    pub fn new(v: u64) -> Self {
        SealHandle(v)
    }

    pub fn get(&self) -> u64 {
        self.0
    }
}

impl From<crate::types::SealHandle> for SealHandle {
    fn from(value: crate::types::SealHandle) -> Self {
        Self(value.0)
    }
}

impl From<SealHandle> for crate::types::SealHandle {
    fn from(value: SealHandle) -> Self {
        Self(value.0)
    }
}

/// A request from the core library
#[derive(Debug)]
pub struct Request {
    pub id: RpcId,

    pub data: RequestData,
}

impl Request {
    async fn from(req: RPCRequest, comms: &Comms) -> Self {
        Request {
            id: req.id,
            data: RequestData::from(req.data, comms).await,
        }
    }
}

#[derive(Debug)]
pub enum RequestData {
    Load {
        /// This filesystem points to a folder that is of the same structure as the output of `Pack` (for a particular runner)
        /// For a readonly filesystem
        fs: FsToken,

        /// Load options
        runner_name: String,
        required_framework_version: semver::VersionReq,
        runner_compat_version: u64,
        runner_opts: Option<HashMap<String, RunnerOpt>>,
        visible_device: Device,

        // The hash of the model
        // This should always be avalable unless we're loading an unpacked model
        carton_manifest_hash: Option<String>,
    },

    // Pack a model
    Pack {
        /// A token for a read/write filesystem that the below paths reference
        fs: FsToken,

        // The path to user input data
        // If this is a folder, the runner is allowed to place data in a `.carton` subfolder
        // This can be used if it wants to generate a lockfile for example
        input_path: String,

        // A temporary folder generated by the core library. The runner can use this if it needs
        // to generate output in a new folder.
        // (In some cases, the input can be wrapped as-is and doesn't need to be copied into a new folder)
        // This folder is owned by the core library and will be deleted by it
        temp_folder: String,
    },

    Seal {
        tensors: HashMap<String, Tensor>,
    },

    InferWithTensors {
        tensors: HashMap<String, Tensor>,

        // Do we support a streaming response
        streaming: bool,
    },

    InferWithHandle {
        handle: SealHandle,

        // Do we support a streaming response
        streaming: bool,
    },
}

impl RequestData {
    async fn from(value: RPCRequestData, comms: &Comms) -> Self {
        let from_handles = |tensors: HashMap<String, Handle<Tensor>>| async {
            let mut out = HashMap::new();
            for (k, v) in tensors {
                out.insert(k, v.into_inner(comms).await);
            }

            out
        };

        match value {
            RPCRequestData::Load {
                fs,
                runner_name,
                required_framework_version,
                runner_compat_version,
                runner_opts,
                visible_device,
                carton_manifest_hash,
            } => Self::Load {
                fs,
                runner_name,
                required_framework_version,
                runner_compat_version,
                runner_opts,
                visible_device,
                carton_manifest_hash,
            },
            RPCRequestData::Pack {
                fs,
                input_path,
                temp_folder,
            } => Self::Pack {
                fs,
                input_path,
                temp_folder,
            },
            RPCRequestData::Seal { tensors } => Self::Seal {
                tensors: from_handles(tensors).await,
            },
            RPCRequestData::InferWithTensors { tensors, streaming } => Self::InferWithTensors {
                tensors: from_handles(tensors).await,
                streaming,
            },
            RPCRequestData::InferWithHandle { handle, streaming } => Self::InferWithHandle {
                handle: handle.into(),
                streaming,
            },
        }
    }
}

#[derive(Debug)]
pub enum ResponseData {
    /// Successful load
    Load,

    Pack {
        // The path to the output directory. This can be in the temp folder passed into `Pack`
        // Note: this must be a *directory* even if the input was a file
        // This references a path on the FS that was passed in
        // during the request
        output_path: String,
    },

    Seal {
        handle: SealHandle,
    },

    Infer {
        tensors: HashMap<String, Tensor>,
    },

    /// Something went wrong
    Error {
        e: String,
    },

    /// Logging
    LogMessage {
        record: LogRecord,
    },

    Empty,
}

impl ResponseData {
    async fn to_rpc(self, comms: &Comms) -> RPCResponseData {
        let into_handles = |tensors: HashMap<String, Tensor>| async {
            let mut out = HashMap::new();
            for (k, v) in tensors {
                out.insert(k, Handle::new(v, comms).await);
            }

            out
        };

        match self {
            ResponseData::Load => RPCResponseData::Load,
            ResponseData::Pack { output_path } => RPCResponseData::Pack { output_path },
            ResponseData::Seal { handle } => RPCResponseData::Seal {
                handle: handle.into(),
            },
            ResponseData::Infer { tensors } => RPCResponseData::Infer {
                tensors: into_handles(tensors).await,
            },
            ResponseData::Error { e } => RPCResponseData::Error { e },
            ResponseData::LogMessage { record } => RPCResponseData::LogMessage { record },
            ResponseData::Empty => RPCResponseData::Empty,
        }
    }
}

impl Server {
    async fn connect(path: &Path, logger: Option<&PassThroughLogger>) -> Self {
        let comms = Comms::connect(path).await;

        // Set up filesystem handling
        let (tx, rx) = comms.get_channel(ChannelId::FileSystem).await;
        let fs_multiplexer = Multiplexer::new(tx, rx).await;

        let (tx, rx) = comms.get_channel(ChannelId::Rpc).await;

        let is_shutdown = Arc::new(AtomicBool::new(false));
        if let Some(logger) = logger {
            let mut messages = logger.get_rx();
            let out = tx.clone();
            let is_shutdown = is_shutdown.clone();
            tokio::spawn(async move {
                while let Some(record) = messages.recv().await {
                    if is_shutdown.load(std::sync::atomic::Ordering::Relaxed) {
                        break;
                    }

                    // TODO: don't hardcode 0
                    let status = out
                        .send(RPCResponse {
                            id: 0,
                            complete: true,
                            data: RPCResponseData::LogMessage { record },
                        })
                        .await;

                    // Ignore send errors only when we're shutting down
                    if let Err(s) = status {
                        if is_shutdown.load(std::sync::atomic::Ordering::Relaxed) {
                            break;
                        } else {
                            Err(s).unwrap()
                        }
                    }
                }
            });
        }

        Server {
            comms,
            fs_multiplexer,
            incoming: rx,
            outgoing: tx,
            _keepalive: Vec::new(),
            is_shutdown,
        }
    }

    pub async fn get_next_request(&mut self) -> Option<Request> {
        match self.incoming.recv().await {
            Some(req) => Some(Request::from(req, &self.comms).await),
            None => None,
        }
    }

    pub async fn send_response_for_request(
        &self,
        req_id: u64,
        res: ResponseData,
    ) -> Result<(), SendError<()>> {
        self.outgoing
            .send(RPCResponse {
                id: req_id,
                complete: true,
                data: res.to_rpc(&self.comms).await,
            })
            .await
            .map_err(|_| SendError(()))
    }

    pub async fn send_streaming_response_for_request(
        &self,
        req_id: u64,
        complete: bool,
        res: ResponseData,
    ) -> Result<(), SendError<()>> {
        self.outgoing
            .send(RPCResponse {
                id: req_id,
                complete,
                data: res.to_rpc(&self.comms).await,
            })
            .await
            .map_err(|_| SendError(()))
    }

    pub async fn get_writable_filesystem(&self, token: FsToken) -> std::io::Result<ReadWriteFS> {
        self.get_filesystem_internal(token).await
    }

    pub async fn get_readonly_filesystem(&self, token: FsToken) -> std::io::Result<ReadOnlyFS> {
        self.get_filesystem_internal(token).await
    }

    async fn get_filesystem_internal<const W: bool, const S: bool>(
        &self,
        token: FsToken,
    ) -> std::io::Result<AnywhereFS<W, S>> {
        let (tx, rx) = self.fs_multiplexer.get_stream_for_id(token.0).await;

        anywhere::transport::serde::connect(tx, rx).await
    }
}

impl Drop for Server {
    fn drop(&mut self) {
        // Mark that we shutdown
        // TODO: we should be able to remove this once we remove the `unwrap`s in comms
        self.is_shutdown
            .store(true, std::sync::atomic::Ordering::Relaxed);
    }
}

#[derive(Parser, Debug)]
struct Args {
    #[arg(long)]
    uds_path: String,
}

/// Initialize the runner from command line args and return two queues to use to communicate
pub async fn init_runner() -> Server {
    let args = Args::parse();

    // Shutdown the runner if the parent process dies
    // NOTE: this technically shuts down if the thread that forked this process dies, but since
    // the parent should be running in tokio, this should be okay because if the parent's tokio
    // runtime goes down, we should go down.
    #[cfg(not(target_os = "macos"))]
    if unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) } != 0 {
        panic!("prctl failed")
    }

    // Watchdog on macos where we can't use PR_SET_PDEATHSIG
    #[cfg(target_os = "macos")]
    std::thread::spawn(|| {
        loop {
            let ppid = unsafe { libc::getppid() };
            if ppid == 1 {
                // The parent exited so we should exit
                std::process::exit(0);
            }

            std::thread::sleep(std::time::Duration::from_secs(1));
        }
    });

    // TODO: this is a little messy. Clean it up
    let mut keepalive = None;
    let mut pass_through_logger = None;
    match std::env::var("CARTON_RUNNER_TRACE_FILE") {
        Ok(path) => {
            // Setup tracing
            let (chrome_layer, _guard) = ChromeLayerBuilder::new()
                .file(path)
                .include_args(true)
                .build();
            tracing_subscriber::registry().with(chrome_layer).init();

            keepalive = Some(_guard);
        }
        Err(_) => {
            // Initialize logging
            let logger: &'static PassThroughLogger = Box::leak(Box::new(PassThroughLogger::new()));
            log::set_logger(logger).unwrap();
            log::set_max_level(log::LevelFilter::Trace);

            pass_through_logger = Some(logger);
        }
    };

    // TODO: run the FD passing channel on top of UDS and get the appropriate channels out
    let mut s = Server::connect(&PathBuf::from(args.uds_path), pass_through_logger).await;

    if let Some(ka) = keepalive {
        s._keepalive.push(Box::new(Mutex::new(ka)));
    }

    s
}

/// A logging implementation that passes through to the main process
struct PassThroughLogger {
    tx: mpsc::UnboundedSender<LogRecord>,
    rx: std::sync::Mutex<Option<mpsc::UnboundedReceiver<LogRecord>>>,
}

impl PassThroughLogger {
    fn new() -> Self {
        let (tx, rx) = mpsc::unbounded_channel();
        Self {
            tx,
            rx: std::sync::Mutex::new(Some(rx)),
        }
    }

    // Can only be called once
    fn get_rx(&self) -> mpsc::UnboundedReceiver<LogRecord> {
        self.rx.lock().unwrap().take().unwrap()
    }
}

impl log::Log for PassThroughLogger {
    fn enabled(&self, _metadata: &log::Metadata) -> bool {
        // This isn't ideal, but for now, lets always return true and let the
        // main process handle it
        // TODO: improve this
        true
    }

    fn log(&self, record: &log::Record) {
        // TODO: check if this is reasonably efficient
        // Ignore send failures
        let _ = self.tx.send(record.into());
    }

    fn flush(&self) {
        // Noop for now
    }
}