1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use crate::{column_mapping::ColumnMapping, ArrowChunk};
use anyhow::{anyhow, Context, Result};
use arrow2::datatypes::SchemaRef;
use serde::{Deserialize, Serialize};
use skar_net_types::RollbackGuard;

#[derive(Debug, Clone)]
pub struct QueryResponseData {
    pub blocks: Vec<ArrowBatch>,
    pub transactions: Vec<ArrowBatch>,
    pub logs: Vec<ArrowBatch>,
}

#[derive(Debug, Clone)]
pub struct QueryResponse {
    /// Current height of the source hypersync instance
    pub archive_height: Option<u64>,
    /// Next block to query for, the responses are paginated so
    /// the caller should continue the query from this block if they
    /// didn't get responses up to the to_block they specified in the Query.
    pub next_block: u64,
    /// Total time it took the hypersync instance to execute the query.
    pub total_execution_time: u64,
    /// Response data
    pub data: QueryResponseData,
    /// Rollback guard
    pub rollback_guard: Option<RollbackGuard>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamConfig {
    #[serde(default = "default_batch_size")]
    /// Block range size to use when making individual requests.
    pub batch_size: u64,
    #[serde(default = "default_concurrency")]
    /// Controls the number of concurrent requests made to hypersync server.
    pub concurrency: usize,
    /// Requests are retried forever internally if this param is set to true.
    #[serde(default)]
    pub retry: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParquetConfig {
    /// Path to write parquet files to
    pub path: String,
    /// Define type mapping for output columns
    #[serde(default)]
    pub column_mapping: ColumnMapping,
    /// Convert binary output columns to hex
    #[serde(default)]
    pub hex_output: bool,
    #[serde(default = "default_batch_size")]
    /// Block range size to use when making individual requests.
    pub batch_size: u64,
    #[serde(default = "default_concurrency")]
    /// Controls the number of concurrent requests made to hypersync server.
    pub concurrency: usize,
    /// Requests are retried forever internally if this param is set to true.
    #[serde(default)]
    pub retry: bool,
}

fn default_batch_size() -> u64 {
    400
}

fn default_concurrency() -> usize {
    10
}

#[derive(Debug, Clone)]
pub struct ArrowBatch {
    pub chunk: ArrowChunk,
    pub schema: SchemaRef,
}

impl ArrowBatch {
    pub fn column<T: 'static>(&self, name: &str) -> Result<&T> {
        match self
            .schema
            .fields
            .iter()
            .enumerate()
            .find(|(_, f)| f.name == name)
        {
            Some((idx, _)) => {
                let col = self
                    .chunk
                    .columns()
                    .get(idx)
                    .context("get column")?
                    .as_any()
                    .downcast_ref::<T>()
                    .context("cast column type")?;
                Ok(col)
            }
            None => Err(anyhow!("field {} not found in schema", name)),
        }
    }
}