kapot-executor 0.2.1

kapot Distributed Compute - Executor
Documentation
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

[general]
name = "kapot Executor"
env_prefix = "kapot_EXECUTOR"
conf_file_param = "config_file"

[[switch]]
name = "version"
doc = "Print version of this executable"

[[param]]
name = "scheduler_host"
type = "String"
default = "std::string::String::from(\"localhost\")"
doc = "Scheduler host"

[[param]]
name = "scheduler_port"
type = "u16"
default = "50050"
doc = "scheduler port"

[[param]]
name = "bind_host"
type = "String"
default = "std::string::String::from(\"0.0.0.0\")"
doc = "Local IP address to bind to."

[[param]]
name = "external_host"
type = "String"
doc = "Host name or IP address to register with scheduler so that other executors can connect to this executor. If none is provided, the scheduler will use the connecting IP address to communicate with the executor."

[[param]]
abbr = "p"
name = "bind_port"
type = "u16"
default = "50051"
doc = "bind port"

[[param]]
name = "bind_grpc_port"
type = "u16"
default = "50052"
doc = "bind grpc service port"

[[param]]
name = "scheduler_connect_timeout_seconds"
type = "u16"
default = "30"
doc = "How long to try connecting to scheduler before failing. Set to zero to fail after first attempt."

[[param]]
name = "work_dir"
type = "String"
doc = "Directory for temporary IPC files"

[[param]]
abbr = "c"
name = "concurrent_tasks"
type = "usize"
default = "0" # defaults to all available cores if left as zero
doc = "Max concurrent tasks."

[[param]]
abbr = "s"
name = "task_scheduling_policy"
type = "kapot_core::config::TaskSchedulingPolicy"
doc = "The task scheduing policy for the scheduler, possible values: pull-staged, push-staged. Default: pull-staged"
default = "kapot_core::config::TaskSchedulingPolicy::PullStaged"

[[param]]
name = "job_data_clean_up_interval_seconds"
type = "u64"
doc = "Controls the interval in seconds, which the worker cleans up old job dirs on the local machine. 0 means the clean up is disabled"
default = "0"

[[param]]
name = "job_data_ttl_seconds"
type = "u64"
doc = "The number of seconds to retain job directories on each worker 604800 (7 days, 7 * 24 * 3600), In other words, after job done, how long the resulting data is retained"
default = "604800"

[[param]]
name = "plugin_dir"
type = "String"
doc = "plugin dir"
default = "std::string::String::from(\"\")"

[[param]]
name = "log_dir"
type = "String"
doc = "Log dir: a path to save log. This will create a new storage directory at the specified path if it does not already exist."

[[param]]
name = "print_thread_info"
type = "bool"
doc = "Enable print thread ids and names in log file."
default = "true"

[[param]]
name = "log_level_setting"
type = "String"
doc = "special log level for sub mod. link: https://docs.rs/env_logger/latest/env_logger/#enabling-logging. For example we want whole level is INFO but datafusion mode is DEBUG"
default = "std::string::String::from(\"INFO,datafusion=INFO\")"

[[param]]
name = "log_rotation_policy"
type = "kapot_core::config::LogRotationPolicy"
doc = "Tracing log rotation policy, possible values: minutely, hourly, daily, never. Default: daily"
default = "kapot_core::config::LogRotationPolicy::Daily"

[[param]]
name = "grpc_server_max_decoding_message_size"
type = "u32"
default = "16777216"
doc = "The maximum size of a decoded message at the grpc server side. Default: 16MB"

[[param]]
name = "grpc_server_max_encoding_message_size"
type = "u32"
default = "16777216"
doc = "The maximum size of an encoded message at the grpc server side. Default: 16MB"

[[param]]
name = "executor_heartbeat_interval_seconds"
type = "u64"
doc = "The heartbeat interval in seconds to the scheduler for push-based task scheduling"
default = "60"

[[param]]
name = "data_cache_policy"
type = "kapot_core::config::DataCachePolicy"
doc = "Data cache policy, possible values: local-disk-file"

[[param]]
name = "cache_dir"
type = "String"
doc = "Directory for cached source data"

[[param]]
name = "cache_capacity"
type = "u64"
doc = "The maximum capacity can be used for cache. Default: 1GB"
default = "1073741824"

[[param]]
name = "cache_io_concurrency"
type = "u32"
doc = "The number of worker threads for the runtime of caching. Default: 2"
default = "2"