1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
//! Semantic-convention attribute keys for crawlberg telemetry.
//!
//! All upstream OTEL semconv constants are re-exported here so call sites
//! import from a single location rather than reaching into
//! `opentelemetry_semantic_conventions` directly. Extension constants in the
//! `crawl.*` namespace are defined as `pub const` strings below.
//!
//! Note: `HTTP_RESPONSE_BODY_SIZE` and `URL_DOMAIN` are stable in the semconv
//! attribute registry but gated behind `semconv_experimental` in the trace
//! re-export module. We import them directly from `attribute` to avoid
//! requiring that Cargo feature flag.
// Stable re-exports from the semconv trace module.
pub use ;
// The following two constants are defined in the semconv attribute registry but
// gated behind the `semconv_experimental` Cargo feature in
// opentelemetry-semantic-conventions 0.32. We pin the stable string values
// here to avoid pulling in that feature flag.
/// `http.response.body.size` — size of the HTTP response body in bytes.
pub const HTTP_RESPONSE_BODY_SIZE: &str = "http.response.body.size";
/// `url.domain` — domain part of the request URL.
pub const URL_DOMAIN: &str = "url.domain";
// ---------------------------------------------------------------------------
// crawlberg extension constants (crawl.* namespace)
// ---------------------------------------------------------------------------
/// Number of seed URLs in the crawl job.
pub const CRAWL_SEED_COUNT: &str = "crawl.seed_count";
/// Configured maximum crawl depth.
pub const CRAWL_MAX_DEPTH: &str = "crawl.max_depth";
/// Configured maximum number of pages to crawl.
pub const CRAWL_MAX_PAGES: &str = "crawl.max_pages";
/// Crawl strategy name (e.g. `bfs`, `dfs`, `best_first`).
pub const CRAWL_STRATEGY: &str = "crawl.strategy";
/// Browser mode in effect (e.g. `never`, `always`, `on_demand`, `stealth`).
pub const CRAWL_BROWSER_MODE: &str = "crawl.browser_mode";
/// Depth of the current URL being processed.
pub const CRAWL_DEPTH: &str = "crawl.depth";
/// Number of URLs currently in the frontier.
pub const CRAWL_FRONTIER_SIZE: &str = "crawl.frontier_size";
/// Number of pages successfully completed so far.
pub const CRAWL_PAGES_COMPLETED: &str = "crawl.pages_completed";
/// Parent URL from which the current link was discovered.
pub const CRAWL_PARENT_URL: &str = "crawl.parent_url";
/// Link type (e.g. `internal`, `external`, `document`).
pub const CRAWL_LINK_TYPE: &str = "crawl.link_type";
/// Dispatch tier (e.g. `http`, `browser`).
pub const CRAWL_TIER: &str = "crawl.tier";
/// Final URL after redirects.
pub const CRAWL_FINAL_URL: &str = "crawl.final_url";
/// MIME type of the fetched resource.
pub const CRAWL_MIME_TYPE: &str = "crawl.mime_type";
/// Browser backend used for rendering (e.g. `chromiumoxide`, `native`).
pub const CRAWL_BROWSER_BACKEND: &str = "crawl.browser.backend";
/// Opaque browser session identifier.
pub const CRAWL_BROWSER_SESSION_ID: &str = "crawl.browser.session_id";
/// Number of pages rendered in the current browser session.
pub const CRAWL_PAGES_RENDERED: &str = "crawl.pages_rendered";
/// Hostname being checked against robots.txt.
pub const CRAWL_HOST: &str = "crawl.host";
/// Whether the robots.txt check allowed the URL.
pub const CRAWL_ALLOWED: &str = "crawl.allowed";
/// Size of the downloaded resource in bytes.
pub const CRAWL_SIZE_BYTES: &str = "crawl.size_bytes";