pub struct DevTweaks {Show 19 fields
pub adaptive_joins: Option<bool>,
pub balancer_balance_tax: Option<f64>,
pub balancer_key_distribution_refresh_threshold: Option<f64>,
pub balancer_min_absolute_improvement_threshold: Option<i64>,
pub balancer_min_relative_improvement_threshold: Option<f64>,
pub bloom_false_positive_rate: Option<f64>,
pub buffer_cache_allocation_strategy: Option<BufferCacheAllocationStrategy>,
pub buffer_cache_strategy: Option<BufferCacheStrategy>,
pub buffer_max_buckets: Option<u64>,
pub fbuf_slab_bytes_per_class: Option<u64>,
pub fetch_distinct: Option<bool>,
pub fetch_join: Option<bool>,
pub max_level0_batch_size_records: Option<i64>,
pub merger: Option<MergerType>,
pub merger_threads: Option<i64>,
pub negative_weight_multiplier: Option<i64>,
pub splitter_chunk_size_records: Option<i64>,
pub stack_overflow_backtrace: Option<bool>,
pub storage_mb_max: Option<i64>,
}Expand description
Optional settings for tweaking Feldera internals.
These settings reflect experiments that may come and go and change from version to version. Users should not consider them to be stable.
JSON schema
{
"description": "Optional settings for tweaking Feldera internals.\n\nThese settings reflect experiments that may come and go and change from\nversion to version. Users should not consider them to be stable.",
"type": "object",
"properties": {
"adaptive_joins": {
"description": "Enable adaptive joins.\n\nAdaptive joins dynamically change their partitioning policy to avoid skew.\n\nAdaptive joins are disabled by default.",
"type": [
"boolean",
"null"
]
},
"balancer_balance_tax": {
"description": "Factor that discourages the use of the Balance policy in a perfectly balanced collection.\n\nAssuming a perfectly balanced key distribution, the Balance policy is slightly less efficient than Shard,\nsince it requires computing the hash of the entire key/value pair. This factor discourages the use of this policy\nif the skew is `<balancer_balance_tax`.\n\nThe default value is 1.1.",
"type": [
"number",
"null"
],
"format": "double"
},
"balancer_key_distribution_refresh_threshold": {
"description": "The balancer threshold for checking for an improved partitioning policy for a stream.\n\nFinding a good partitioning policy for a circuit involves solving an optimization problem,\nwhich can be relatively expensive. Instead of doing this on every step, the balancer only\nchecks for an improved partitioning policy if the key distribution of a stream has changed\nsignificantly since the current solution was computed. Specifically, it only kicks in when\nthe size of at least one shard of at least one stream in the cluster has changed by more than\nthis threshold.\n\nThe default value is 0.1.",
"type": [
"number",
"null"
],
"format": "double"
},
"balancer_min_absolute_improvement_threshold": {
"description": "The minimum absolute improvement threshold for the balancer.\n\nThe join balancer is a component that dynamically chooses an optimal\npartitioning policy for adaptive join operators. This parameter\nprevents the join balancer from making changes to the partitioning\npolicy if the improvement is not significant, since the overhead of such\nrebalancing, especially when performed frequently, can exceed the\nbenefits.\n\nA rebalancing is considered significant if the absolute estimated\nimprovement for the cluster of joins where the rebalancing is applied is\nat least this threshold. The cost model used by the balancer is based on\nthe number of records in the largest partition of a collection.\n\nA rebalancing is applied if both this threshold and\n`balancer_min_relative_improvement_threshold` are met.\n\nThe default value is 10,000.",
"type": [
"integer",
"null"
],
"format": "int64",
"minimum": 0.0
},
"balancer_min_relative_improvement_threshold": {
"description": "The minimum relative improvement threshold for the join balancer.\n\nThe join balancer is a component that dynamically chooses an optimal\npartitioning policy for adaptive join operators. This parameter\nprevents the join balancer from making changes to the partitioning\npolicy if the improvement is not significant, since the overhead of such\nrebalancing, especially when performed frequently, can exceed the\nbenefits.\n\nA rebalancing is considered significant if the relative estimated\nimprovement for the cluster of joins where the rebalancing is applied is\nat least this threshold.\n\nA rebalancing is applied if both this threshold and\n`balancer_min_absolute_improvement_threshold` are met.\n\nThe default value is 1.2.",
"type": [
"number",
"null"
],
"format": "double"
},
"bloom_false_positive_rate": {
"description": "False-positive rate for Bloom filters on batches on storage, as a\nfraction f, where 0 < f < 1.\n\nThe false-positive rate trades off between the amount of memory used by\nBloom filters and how frequently storage needs to be searched for keys\nthat are not actually present. Typical false-positive rates and their\ncorresponding memory costs are:\n\n- 0.1: 4.8 bits per key\n- 0.01: 9.6 bits per key\n- 0.001: 14.4 bits per key\n- 0.0001: 19.2 bits per key (default)\n\nValues outside the valid range, such as 0.0, disable Bloom filters.",
"type": [
"number",
"null"
],
"format": "double"
},
"buffer_cache_allocation_strategy": {
"allOf": [
{
"$ref": "#/components/schemas/BufferCacheAllocationStrategy"
}
]
},
"buffer_cache_strategy": {
"allOf": [
{
"$ref": "#/components/schemas/BufferCacheStrategy"
}
]
},
"buffer_max_buckets": {
"description": "Override the number of buckets/shards used by sharded buffer caches.\n\nThis only applies when `buffer_cache_strategy = \"s3_fifo\"`. Values are\nrounded up to the next power of two because the current implementation\nshards by `hash(key) & (n - 1)`.",
"type": [
"integer",
"null"
],
"minimum": 0.0
},
"fbuf_slab_bytes_per_class": {
"description": "Target number of cached bytes retained in each `FBuf` slab size class.\n\nThe default is 16 MiB.",
"type": [
"integer",
"null"
],
"minimum": 0.0
},
"fetch_distinct": {
"description": "Whether to asynchronously fetch keys needed for the distinct operator\nfrom storage. Asynchronous fetching should be faster for high-latency\nstorage, such as object storage, but it could use excessive amounts of\nmemory if the number of keys fetched is very large.",
"type": [
"boolean",
"null"
]
},
"fetch_join": {
"description": "Whether to asynchronously fetch keys needed for the join operator from\nstorage. Asynchronous fetching should be faster for high-latency\nstorage, such as object storage, but it could use excessive amounts of\nmemory if the number of keys fetched is very large.",
"type": [
"boolean",
"null"
]
},
"max_level0_batch_size_records": {
"description": "Maximum batch size in records for level 0 merges.",
"type": [
"integer",
"null"
],
"format": "int32",
"minimum": 0.0
},
"merger": {
"allOf": [
{
"$ref": "#/components/schemas/MergerType"
}
]
},
"merger_threads": {
"description": "The number of merger threads.\n\nThe default value is equal to the number of worker threads.",
"type": [
"integer",
"null"
],
"format": "int32",
"minimum": 0.0
},
"negative_weight_multiplier": {
"description": "Additional bias the merger assigns to records with negative weights\n(retractions) to promote them to higher levels of the LSM tree sooner.\n\nReasonable values for this parameter are in the range [0, 10].\n\nThe default value is 0, which means that retractions are not given\nany additional bias.",
"type": [
"integer",
"null"
],
"format": "int32",
"minimum": 0.0
},
"splitter_chunk_size_records": {
"description": "Controls the maximal number of records output by splitter operators\n(joins, distinct, aggregation, rolling window and group operators) at\neach step.\n\nThe default value is 10,000 records.",
"type": [
"integer",
"null"
],
"format": "int64",
"minimum": 0.0
},
"stack_overflow_backtrace": {
"description": "Attempt to print a stack trace on stack overflow.\n\nTo be used for debugging only; do not enable in production.",
"type": [
"boolean",
"null"
]
},
"storage_mb_max": {
"description": "If set, the maximum amount of storage, in MiB, for the POSIX backend to\nallow to be in use before failing all writes with [StorageFull]. This\nis useful for testing on top of storage that does not implement its own\nquota mechanism.\n\n[StorageFull]: std::io::ErrorKind::StorageFull",
"type": [
"integer",
"null"
],
"format": "int64",
"minimum": 0.0
}
},
"additionalProperties": {
"description": "Options not understood by this particular version.\n\nThis allows the pipeline manager to take options that a custom or old\nruntime version accepts.",
"default": {}
}
}Fields§
§adaptive_joins: Option<bool>Enable adaptive joins.
Adaptive joins dynamically change their partitioning policy to avoid skew.
Adaptive joins are disabled by default.
balancer_balance_tax: Option<f64>Factor that discourages the use of the Balance policy in a perfectly balanced collection.
Assuming a perfectly balanced key distribution, the Balance policy is slightly less efficient than Shard,
since it requires computing the hash of the entire key/value pair. This factor discourages the use of this policy
if the skew is <balancer_balance_tax.
The default value is 1.1.
balancer_key_distribution_refresh_threshold: Option<f64>The balancer threshold for checking for an improved partitioning policy for a stream.
Finding a good partitioning policy for a circuit involves solving an optimization problem, which can be relatively expensive. Instead of doing this on every step, the balancer only checks for an improved partitioning policy if the key distribution of a stream has changed significantly since the current solution was computed. Specifically, it only kicks in when the size of at least one shard of at least one stream in the cluster has changed by more than this threshold.
The default value is 0.1.
balancer_min_absolute_improvement_threshold: Option<i64>The minimum absolute improvement threshold for the balancer.
The join balancer is a component that dynamically chooses an optimal partitioning policy for adaptive join operators. This parameter prevents the join balancer from making changes to the partitioning policy if the improvement is not significant, since the overhead of such rebalancing, especially when performed frequently, can exceed the benefits.
A rebalancing is considered significant if the absolute estimated improvement for the cluster of joins where the rebalancing is applied is at least this threshold. The cost model used by the balancer is based on the number of records in the largest partition of a collection.
A rebalancing is applied if both this threshold and
balancer_min_relative_improvement_threshold are met.
The default value is 10,000.
balancer_min_relative_improvement_threshold: Option<f64>The minimum relative improvement threshold for the join balancer.
The join balancer is a component that dynamically chooses an optimal partitioning policy for adaptive join operators. This parameter prevents the join balancer from making changes to the partitioning policy if the improvement is not significant, since the overhead of such rebalancing, especially when performed frequently, can exceed the benefits.
A rebalancing is considered significant if the relative estimated improvement for the cluster of joins where the rebalancing is applied is at least this threshold.
A rebalancing is applied if both this threshold and
balancer_min_absolute_improvement_threshold are met.
The default value is 1.2.
bloom_false_positive_rate: Option<f64>False-positive rate for Bloom filters on batches on storage, as a fraction f, where 0 < f < 1.
The false-positive rate trades off between the amount of memory used by Bloom filters and how frequently storage needs to be searched for keys that are not actually present. Typical false-positive rates and their corresponding memory costs are:
- 0.1: 4.8 bits per key
- 0.01: 9.6 bits per key
- 0.001: 14.4 bits per key
- 0.0001: 19.2 bits per key (default)
Values outside the valid range, such as 0.0, disable Bloom filters.
buffer_cache_allocation_strategy: Option<BufferCacheAllocationStrategy>§buffer_cache_strategy: Option<BufferCacheStrategy>§buffer_max_buckets: Option<u64>Override the number of buckets/shards used by sharded buffer caches.
This only applies when buffer_cache_strategy = "s3_fifo". Values are
rounded up to the next power of two because the current implementation
shards by hash(key) & (n - 1).
fbuf_slab_bytes_per_class: Option<u64>Target number of cached bytes retained in each FBuf slab size class.
The default is 16 MiB.
fetch_distinct: Option<bool>Whether to asynchronously fetch keys needed for the distinct operator from storage. Asynchronous fetching should be faster for high-latency storage, such as object storage, but it could use excessive amounts of memory if the number of keys fetched is very large.
fetch_join: Option<bool>Whether to asynchronously fetch keys needed for the join operator from storage. Asynchronous fetching should be faster for high-latency storage, such as object storage, but it could use excessive amounts of memory if the number of keys fetched is very large.
max_level0_batch_size_records: Option<i64>Maximum batch size in records for level 0 merges.
merger: Option<MergerType>§merger_threads: Option<i64>The number of merger threads.
The default value is equal to the number of worker threads.
negative_weight_multiplier: Option<i64>Additional bias the merger assigns to records with negative weights (retractions) to promote them to higher levels of the LSM tree sooner.
Reasonable values for this parameter are in the range [0, 10].
The default value is 0, which means that retractions are not given any additional bias.
splitter_chunk_size_records: Option<i64>Controls the maximal number of records output by splitter operators (joins, distinct, aggregation, rolling window and group operators) at each step.
The default value is 10,000 records.
stack_overflow_backtrace: Option<bool>Attempt to print a stack trace on stack overflow.
To be used for debugging only; do not enable in production.
storage_mb_max: Option<i64>If set, the maximum amount of storage, in MiB, for the POSIX backend to allow to be in use before failing all writes with StorageFull. This is useful for testing on top of storage that does not implement its own quota mechanism.