nemo-flow 0.1.0

Core Rust SDK for NeMo Flow observability, scope management, and runtime instrumentation.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};

use bitflags::bitflags;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::json;
use typed_builder::TypedBuilder;
use uuid::Uuid;

use crate::api::runtime::NemoFlowContextState;
use crate::api::runtime::current_scope_stack;
use crate::api::runtime::global_context;
use crate::api::runtime::{
    LlmCollectorFn, LlmExecutionNextFn, LlmFinalizerFn, LlmJsonStream, LlmStreamExecutionNextFn,
};
use crate::api::scope::event;
use crate::api::scope::{EmitMarkEventParams, ScopeHandle};
use crate::api::shared::{
    ensure_runtime_owner, resolve_parent_uuid, run_request_intercepts_with_codec,
    snapshot_event_subscribers,
};
use crate::codec::request::AnnotatedLlmRequest;
use crate::codec::response::AnnotatedLlmResponse;
use crate::codec::traits::{LlmCodec, LlmResponseCodec};
use crate::error::{FlowError, Result};
use crate::json::Json;
use crate::stream::LlmStreamWrapper;

bitflags! {
    /// Bitflags that modify LLM-call behavior and observability.
    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
    pub struct LlmAttributes: u32 {
        /// Marks the request as stateful from the runtime's perspective.
        const STATEFUL = 0b01;
        /// Marks the request as streaming.
        const STREAMING = 0b10;
    }
}

/// Runtime-owned handle identifying an active or completed LLM call.
#[derive(Debug, Clone, Serialize, Deserialize, TypedBuilder)]
#[builder(field_defaults(setter(strip_option(ignore_invalid, fallback_suffix = "_opt"))))]
pub struct LlmHandle {
    /// Unique LLM-call identifier.
    #[builder(default = Uuid::now_v7())]
    pub uuid: Uuid,
    /// Timestamp captured when the LLM handle was created.
    #[builder(default = Utc::now())]
    pub started_at: DateTime<Utc>,
    /// Provider or logical call name recorded on lifecycle events.
    #[builder(setter(into))]
    pub name: String,
    /// Optional application payload stored on the handle.
    #[builder(default)]
    pub data: Option<Json>,
    /// Optional metadata attached to the LLM span.
    #[builder(default)]
    pub metadata: Option<Json>,
    /// LLM behavior flags.
    #[builder(default = LlmAttributes::empty())]
    pub attributes: LlmAttributes,
    /// UUID of the parent scope, if any.
    #[builder(default)]
    pub parent_uuid: Option<Uuid>,
    /// Optional normalized model name for observability.
    #[builder(default, setter(into))]
    pub model_name: Option<String>,
}

/// JSON-shaped LLM request payload passed through the runtime.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmRequest {
    /// Provider-specific request headers.
    pub headers: serde_json::Map<String, Json>,
    /// Provider-specific request body.
    pub content: Json,
}

/// Builder parameters for [`NemoFlowContextState::create_llm_handle`].
#[derive(Debug, Clone, TypedBuilder)]
#[builder(field_defaults(setter(strip_option(ignore_invalid, fallback_suffix = "_opt"))))]
pub struct CreateLlmHandleParams<'a> {
    /// Logical provider or model family name.
    pub name: &'a str,
    /// Optional parent scope UUID.
    #[builder(default)]
    pub parent_uuid: Option<uuid::Uuid>,
    /// LLM attribute bitflags.
    #[builder(default = LlmAttributes::empty())]
    pub attributes: LlmAttributes,
    /// Optional application payload stored on the handle.
    #[builder(default)]
    pub data: Option<Json>,
    /// Optional metadata stored on the handle.
    #[builder(default)]
    pub metadata: Option<Json>,
    /// Optional normalized model name stored on the handle.
    #[builder(default, setter(into))]
    pub model_name: Option<String>,
    /// Optional timestamp captured as the handle start time and reused by the
    /// emitted start event. When omitted, the current UTC time is used.
    #[builder(default)]
    pub timestamp: Option<DateTime<Utc>>,
}

/// Builder parameters for [`NemoFlowContextState::build_llm_end_event`].
#[derive(Clone, TypedBuilder)]
#[builder(field_defaults(setter(strip_option(ignore_invalid, fallback_suffix = "_opt"))))]
pub struct EndLlmHandleParams<'a> {
    /// LLM handle to serialize into the emitted end event.
    pub handle: &'a LlmHandle,
    /// Optional data payload merged over the handle data.
    #[builder(default)]
    pub data: Option<Json>,
    /// Optional metadata payload merged over the handle metadata.
    #[builder(default)]
    pub metadata: Option<Json>,
    /// Optional normalized response annotation produced by a response codec.
    #[builder(default)]
    pub annotated_response: Option<Arc<AnnotatedLlmResponse>>,
    /// Optional timestamp recorded on the emitted end event. When omitted, the
    /// runtime records the current UTC time, or one microsecond after the
    /// handle start time if the current time is not later.
    #[builder(default)]
    pub timestamp: Option<DateTime<Utc>>,
}

/// Builder parameters for [`llm_call`].
#[derive(TypedBuilder)]
#[builder(field_defaults(setter(strip_option(ignore_invalid, fallback_suffix = "_opt"))))]
pub struct LlmCallParams<'a> {
    /// Logical provider or model family name recorded on the span.
    pub name: &'a str,
    /// Raw request associated with the span.
    pub request: &'a LlmRequest,
    /// Optional explicit parent scope.
    #[builder(default)]
    pub parent: Option<&'a ScopeHandle>,
    /// LLM attribute bitflags applied to the span.
    #[builder(default = LlmAttributes::empty())]
    pub attributes: LlmAttributes,
    /// Optional application payload stored on the handle but not emitted as ATOF data.
    #[builder(default)]
    pub data: Option<Json>,
    /// Optional JSON metadata recorded on the start event.
    #[builder(default)]
    pub metadata: Option<Json>,
    /// Optional normalized model name recorded separately from the request payload.
    #[builder(default, setter(into))]
    pub model_name: Option<String>,
    /// Optional normalized request annotation produced by a codec.
    #[builder(default)]
    pub annotated_request: Option<Arc<AnnotatedLlmRequest>>,
    /// Optional timestamp captured as the handle start time and reused by the
    /// emitted start event. When omitted, the current UTC time is used.
    #[builder(default)]
    pub timestamp: Option<DateTime<Utc>>,
}

/// Builder parameters for [`llm_call_execute`].
#[derive(TypedBuilder)]
#[builder(field_defaults(setter(strip_option(ignore_invalid, fallback_suffix = "_opt"))))]
pub struct LlmCallExecuteParams {
    /// Logical provider or model family name recorded on emitted events.
    #[builder(setter(into))]
    pub name: String,
    /// Raw request passed into the managed pipeline.
    pub request: LlmRequest,
    /// Provider callback or execution continuation.
    pub func: LlmExecutionNextFn,
    /// Optional explicit parent scope for the emitted LLM span.
    #[builder(default)]
    pub parent: Option<ScopeHandle>,
    /// LLM attribute bitflags applied to the managed span.
    #[builder(default = LlmAttributes::empty())]
    pub attributes: LlmAttributes,
    /// Optional application payload stored on the handle but not emitted as ATOF data.
    #[builder(default)]
    pub data: Option<Json>,
    /// Optional JSON metadata recorded on emitted events.
    #[builder(default)]
    pub metadata: Option<Json>,
    /// Optional normalized model name for observability output.
    #[builder(default, setter(into))]
    pub model_name: Option<String>,
    /// Optional request codec used to produce annotated request data.
    #[builder(default)]
    pub codec: Option<Arc<dyn LlmCodec>>,
    /// Optional response codec used to attach annotated response data.
    #[builder(default)]
    pub response_codec: Option<Arc<dyn LlmResponseCodec>>,
}

/// Builder parameters for [`llm_stream_call_execute`].
#[derive(TypedBuilder)]
#[builder(field_defaults(setter(strip_option(ignore_invalid, fallback_suffix = "_opt"))))]
pub struct LlmStreamCallExecuteParams {
    /// Logical provider or model family name recorded on emitted events.
    #[builder(setter(into))]
    pub name: String,
    /// Raw request passed into the managed pipeline.
    pub request: LlmRequest,
    /// Streaming provider callback or execution continuation.
    pub func: LlmStreamExecutionNextFn,
    /// Per-chunk collector callback used to accumulate stream state.
    pub collector: LlmCollectorFn,
    /// Finalizer callback used to construct the completed response.
    pub finalizer: LlmFinalizerFn,
    /// Optional explicit parent scope for the emitted LLM span.
    #[builder(default)]
    pub parent: Option<ScopeHandle>,
    /// LLM attribute bitflags applied to the managed span.
    #[builder(default = LlmAttributes::empty())]
    pub attributes: LlmAttributes,
    /// Optional application payload stored on the handle but not emitted as ATOF data.
    #[builder(default)]
    pub data: Option<Json>,
    /// Optional JSON metadata recorded on emitted events.
    #[builder(default)]
    pub metadata: Option<Json>,
    /// Optional normalized model name for observability output.
    #[builder(default, setter(into))]
    pub model_name: Option<String>,
    /// Optional request codec used to produce annotated request data.
    #[builder(default)]
    pub codec: Option<Arc<dyn LlmCodec>>,
    /// Optional response codec used to attach annotated response data.
    #[builder(default)]
    pub response_codec: Option<Arc<dyn LlmResponseCodec>>,
}

/// Builder parameters for [`llm_call_end`].
#[derive(TypedBuilder)]
#[builder(field_defaults(setter(strip_option(ignore_invalid, fallback_suffix = "_opt"))))]
pub struct LlmCallEndParams<'a> {
    /// LLM handle to close.
    pub handle: &'a LlmHandle,
    /// Raw provider response associated with the end event.
    pub response: Json,
    /// Optional application payload retained for compatibility; ATOF data is the response.
    #[builder(default)]
    pub data: Option<Json>,
    /// Optional JSON metadata recorded on the end event.
    #[builder(default)]
    pub metadata: Option<Json>,
    /// Optional normalized response annotation produced by a response codec.
    #[builder(default)]
    pub annotated_response: Option<Arc<AnnotatedLlmResponse>>,
    /// Optional timestamp recorded on the emitted end event. When omitted, the
    /// runtime records the current UTC time, or one microsecond after the
    /// handle start time if the current time is not later.
    #[builder(default)]
    pub timestamp: Option<DateTime<Utc>>,
}

fn create_llm_handle(params: CreateLlmHandleParams<'_>) -> Result<LlmHandle> {
    ensure_runtime_owner()?;
    let context = global_context();
    let state = context
        .read()
        .map_err(|error| FlowError::Internal(error.to_string()))?;
    Ok(state.create_llm_handle(params))
}

fn emit_llm_start(
    handle: &LlmHandle,
    request: &LlmRequest,
    annotated_request: Option<Arc<AnnotatedLlmRequest>>,
) -> Result<()> {
    ensure_runtime_owner()?;
    let (event, subscribers) = {
        let scope_stack = current_scope_stack();
        let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
        let scope_locals = scope_guard.collect_scope_local_registries(|registries| {
            &registries.llm_sanitize_request_guardrails
        });
        let scope_subscribers = scope_guard.collect_scope_local_subscribers();
        let subscribers = snapshot_event_subscribers(scope_subscribers)?;
        let context = global_context();
        let state = context
            .read()
            .map_err(|error| FlowError::Internal(error.to_string()))?;

        let sanitized_request = state.llm_sanitize_request_chain(request.clone(), &scope_locals);
        let input = serde_json::to_value(&sanitized_request).unwrap_or(Json::Null);
        let event = state.build_llm_start_event(handle, Some(input), annotated_request);
        (event, subscribers)
    };
    NemoFlowContextState::emit_event(&event, &subscribers);
    Ok(())
}

fn emit_llm_start_once(
    start_emitted: &Arc<AtomicBool>,
    handle: &LlmHandle,
    request: &LlmRequest,
    annotated_request: Option<Arc<AnnotatedLlmRequest>>,
) -> Result<()> {
    if start_emitted.swap(true, Ordering::SeqCst) {
        return Ok(());
    }
    emit_llm_start(handle, request, annotated_request)
}

/// Start a manual LLM lifecycle span.
///
/// This emits an LLM-start event after applying sanitize-request guardrails to
/// the payload recorded for observability.
///
/// # Parameters
/// - `name`: Logical provider or model family name recorded on the span.
/// - `request`: Raw [`LlmRequest`] associated with the span.
/// - `parent`: Optional explicit parent scope.
/// - `attributes`: LLM attribute bitflags applied to the span.
/// - `data`: Optional application payload stored on the returned handle. The
///   emitted start event data is the sanitized `request` payload.
/// - `metadata`: Optional JSON metadata recorded on the start event.
/// - `model_name`: Optional normalized model name recorded separately from the
///   request payload.
/// - `annotated_request`: Optional normalized request annotation produced by a
///   codec.
/// - `timestamp`: Optional timestamp recorded as the handle start time and on
///   the emitted start event. When `None`, the current UTC time is used.
///
/// # Returns
/// A [`Result`] containing the created [`LlmHandle`].
///
/// # Errors
/// Returns an error when the runtime owner check fails or when internal state
/// cannot be read safely.
///
/// # Notes
/// Sanitize-request guardrails affect only the emitted start-event payload, not
/// the caller-owned [`LlmRequest`].
pub fn llm_call(params: LlmCallParams<'_>) -> Result<LlmHandle> {
    let handle_params = CreateLlmHandleParams::builder()
        .name(params.name)
        .parent_uuid_opt(resolve_parent_uuid(params.parent))
        .attributes(params.attributes)
        .data_opt(params.data)
        .metadata_opt(params.metadata)
        .model_name_opt(params.model_name)
        .timestamp_opt(params.timestamp)
        .build();
    let handle = create_llm_handle(handle_params)?;
    emit_llm_start(&handle, params.request, params.annotated_request)?;
    Ok(handle)
}

/// Finish a manual LLM lifecycle span.
///
/// This emits an LLM-end event for a handle previously returned by
/// [`llm_call`].
///
/// # Parameters
/// - `handle`: LLM handle to close.
/// - `response`: Raw provider response associated with the end event.
/// - `data`: Optional application payload retained for compatibility. The
///   emitted end event data is the sanitized `response` unless it sanitizes to
///   JSON null, in which case this payload is used.
/// - `metadata`: Optional JSON metadata recorded on the end event.
/// - `annotated_response`: Optional normalized response annotation produced by
///   a response codec.
/// - `timestamp`: Optional timestamp recorded on the emitted end event. When
///   `None`, the runtime uses the current UTC time, or one microsecond after
///   the handle start time if the current time is not later.
///
/// # Returns
/// A [`Result`] that is `Ok(())` when the end event has been emitted.
///
/// # Errors
/// Returns an error when the runtime owner check fails or when internal state
/// cannot be read safely.
///
/// # Notes
/// Sanitize-response guardrails affect only the emitted end-event payload, not
/// the caller-owned `response` value.
pub fn llm_call_end(params: LlmCallEndParams<'_>) -> Result<()> {
    ensure_runtime_owner()?;
    let (event, subscribers) = {
        let scope_stack = current_scope_stack();
        let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
        let scope_locals = scope_guard.collect_scope_local_registries(|registries| {
            &registries.llm_sanitize_response_guardrails
        });
        let scope_subscribers = scope_guard.collect_scope_local_subscribers();
        let subscribers = snapshot_event_subscribers(scope_subscribers)?;
        let context = global_context();
        let state = context
            .read()
            .map_err(|error| FlowError::Internal(error.to_string()))?;

        let sanitized_response = state.llm_sanitize_response_chain(params.response, &scope_locals);
        let data = if sanitized_response.is_null() {
            params.data
        } else {
            Some(sanitized_response)
        };
        let event = state.build_llm_end_event(
            EndLlmHandleParams::builder()
                .handle(params.handle)
                .data_opt(data)
                .metadata_opt(params.metadata)
                .annotated_response_opt(params.annotated_response)
                .timestamp_opt(params.timestamp)
                .build(),
        );
        (event, subscribers)
    };
    NemoFlowContextState::emit_event(&event, &subscribers);
    Ok(())
}

fn emit_llm_end_without_output(handle: &LlmHandle, metadata: Option<Json>) -> Result<()> {
    ensure_runtime_owner()?;
    let (event, subscribers) = {
        let scope_stack = current_scope_stack();
        let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
        let scope_subscribers = scope_guard.collect_scope_local_subscribers();
        let subscribers = snapshot_event_subscribers(scope_subscribers)?;
        let context = global_context();
        let state = context
            .read()
            .map_err(|error| FlowError::Internal(error.to_string()))?;
        let event = state.end_llm_handle(handle, handle.data.clone(), metadata, None);
        (event, subscribers)
    };
    NemoFlowContextState::emit_event(&event, &subscribers);
    Ok(())
}

/// Execute an LLM call through the managed middleware pipeline.
///
/// This runs conditional-execution guardrails, request intercepts,
/// sanitize-request guardrails, execution intercepts, the provider callback,
/// and sanitize-response guardrails in the runtime-defined order.
///
/// # Parameters
/// - `name`: Logical provider or model family name recorded on emitted events.
/// - `request`: Raw [`LlmRequest`] passed into the managed pipeline.
/// - `func`: Provider callback or execution continuation.
/// - `parent`: Optional explicit parent scope for the emitted LLM span.
/// - `attributes`: LLM attribute bitflags applied to the managed span.
/// - `data`: Optional application payload stored on the managed LLM handle. It
///   may be used on failure end events that have no output payload.
/// - `metadata`: Optional JSON metadata recorded on emitted events.
/// - `model_name`: Optional normalized model name for observability output.
/// - `codec`: Optional request codec used to produce annotated request data for
///   intercepts and events.
/// - `response_codec`: Optional response codec used to attach annotated
///   response data to the end event.
///
/// # Returns
/// A [`Result`] containing the raw JSON response returned by the callback or
/// an execution intercept.
///
/// # Errors
/// Returns [`FlowError::GuardrailRejected`] when conditional-execution
/// guardrails block the call, or any error raised by request intercepts,
/// execution intercepts, codecs, or the callback itself.
///
/// # Notes
/// Response codecs enrich observability output only and do not change the
/// value returned to the caller.
pub async fn llm_call_execute(params: LlmCallExecuteParams) -> Result<Json> {
    let LlmCallExecuteParams {
        name,
        request,
        func,
        parent,
        attributes,
        data,
        metadata,
        model_name,
        codec,
        response_codec,
    } = params;
    ensure_runtime_owner()?;
    {
        let scope_stack = current_scope_stack();
        let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
        let scope_locals = scope_guard.collect_scope_local_registries(|registries| {
            &registries.llm_conditional_execution_guardrails
        });
        let context = global_context();
        let state = context
            .read()
            .map_err(|error| FlowError::Internal(error.to_string()))?;
        if let Some(error) = state.llm_conditional_execution_chain(&request, &scope_locals)? {
            drop(state);
            drop(scope_guard);
            let mut rejection_data = json!({});
            if let Some(object) = rejection_data.as_object_mut() {
                object.insert("rejected".into(), json!(true));
                object.insert("rejection_reason".into(), json!(&error));
            }
            let _ = event(
                EmitMarkEventParams::builder()
                    .name(&name)
                    .parent_opt(parent.as_ref())
                    .data(rejection_data)
                    .metadata_opt(metadata.clone())
                    .build(),
            );
            return Err(FlowError::GuardrailRejected(error));
        }
    }

    let (intercepted_request, annotated_request) =
        run_request_intercepts_with_codec(&name, request, codec)?;

    let handle = create_llm_handle(
        CreateLlmHandleParams::builder()
            .name(name.as_str())
            .parent_uuid_opt(resolve_parent_uuid(parent.as_ref()))
            .attributes(attributes)
            .data_opt(data.clone())
            .metadata_opt(metadata.clone())
            .model_name_opt(model_name)
            .build(),
    )?;
    let start_emitted = Arc::new(AtomicBool::new(false));
    let fallback_request = intercepted_request.clone();
    let execution_handle = handle.clone();
    let execution_annotated_request = annotated_request.clone();
    let execution_start_emitted = start_emitted.clone();
    let instrumented_func: LlmExecutionNextFn = Arc::new(move |request| {
        let next = func.clone();
        let handle = execution_handle.clone();
        let annotated_request = execution_annotated_request.clone();
        let start_emitted = execution_start_emitted.clone();
        Box::pin(async move {
            emit_llm_start_once(&start_emitted, &handle, &request, annotated_request)?;
            next(request).await
        })
    });

    let execution = {
        let scope_stack = current_scope_stack();
        let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
        let scope_locals = scope_guard
            .collect_scope_local_registries(|registries| &registries.llm_execution_intercepts);
        let context = global_context();
        let state = context
            .read()
            .map_err(|error| FlowError::Internal(error.to_string()))?;
        state.llm_build_execution_chain(&name, instrumented_func, &scope_locals)
    };

    match execution(intercepted_request).await {
        Ok(response) => {
            emit_llm_start_once(
                &start_emitted,
                &handle,
                &fallback_request,
                annotated_request.clone(),
            )?;
            let annotated_response = response_codec
                .as_ref()
                .and_then(|codec| codec.decode_response(&response).ok())
                .map(Arc::new);
            llm_call_end(
                LlmCallEndParams::builder()
                    .handle(&handle)
                    .response(response.clone())
                    .data_opt(data)
                    .metadata_opt(metadata)
                    .annotated_response_opt(annotated_response)
                    .build(),
            )?;
            Ok(response)
        }
        Err(error) => {
            let _ = emit_llm_start_once(
                &start_emitted,
                &handle,
                &fallback_request,
                annotated_request,
            );
            let _ = emit_llm_end_without_output(&handle, metadata);
            Err(error)
        }
    }
}

/// Execute a streaming LLM call through the managed middleware pipeline.
///
/// This runs the same pre-execution middleware as [`llm_call_execute`] and
/// then wraps the provider stream so chunk callbacks and finalization can emit
/// a single LLM-end event when streaming completes.
///
/// # Parameters
/// - `name`: Logical provider or model family name recorded on emitted events.
/// - `request`: Raw [`LlmRequest`] passed into the managed pipeline.
/// - `func`: Streaming provider callback or execution continuation.
/// - `collector`: Per-chunk collector callback used to accumulate stream state.
/// - `finalizer`: Finalizer callback used to construct the completed response.
/// - `parent`: Optional explicit parent scope for the emitted LLM span.
/// - `attributes`: LLM attribute bitflags applied to the managed span.
/// - `data`: Optional application payload stored on the managed LLM handle. It
///   may be used on failure end events that have no output payload.
/// - `metadata`: Optional JSON metadata recorded on emitted events.
/// - `model_name`: Optional normalized model name for observability output.
/// - `codec`: Optional request codec used to produce annotated request data for
///   intercepts and events.
/// - `response_codec`: Optional response codec used to attach annotated
///   response data to the end event.
///
/// # Returns
/// A [`Result`] containing a boxed stream of JSON chunks.
///
/// # Errors
/// Returns [`FlowError::GuardrailRejected`] when conditional-execution
/// guardrails block the call, or any error raised by request intercepts,
/// execution intercepts, stream callbacks, codecs, or the provider callback.
///
/// # Notes
/// The returned stream emits chunk-level results while the runtime defers the
/// LLM-end event until the collector and finalizer complete.
pub async fn llm_stream_call_execute(params: LlmStreamCallExecuteParams) -> Result<LlmJsonStream> {
    let LlmStreamCallExecuteParams {
        name,
        request,
        func,
        collector,
        finalizer,
        parent,
        attributes,
        data,
        metadata,
        model_name,
        codec,
        response_codec,
    } = params;
    ensure_runtime_owner()?;
    {
        let scope_stack = current_scope_stack();
        let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
        let scope_locals = scope_guard.collect_scope_local_registries(|registries| {
            &registries.llm_conditional_execution_guardrails
        });
        let context = global_context();
        let state = context
            .read()
            .map_err(|error| FlowError::Internal(error.to_string()))?;
        if let Some(error) = state.llm_conditional_execution_chain(&request, &scope_locals)? {
            drop(state);
            drop(scope_guard);
            let mut rejection_data = json!({});
            if let Some(object) = rejection_data.as_object_mut() {
                object.insert("rejected".into(), json!(true));
                object.insert("rejection_reason".into(), json!(&error));
            }
            let _ = event(
                EmitMarkEventParams::builder()
                    .name(&name)
                    .parent_opt(parent.as_ref())
                    .data(rejection_data)
                    .metadata_opt(metadata.clone())
                    .build(),
            );
            return Err(FlowError::GuardrailRejected(error));
        }
    }

    let (intercepted_request, annotated_request) =
        run_request_intercepts_with_codec(&name, request, codec)?;

    let handle = create_llm_handle(
        CreateLlmHandleParams::builder()
            .name(name.as_str())
            .parent_uuid_opt(resolve_parent_uuid(parent.as_ref()))
            .attributes(attributes)
            .data_opt(data.clone())
            .metadata_opt(metadata.clone())
            .model_name_opt(model_name)
            .build(),
    )?;
    let start_emitted = Arc::new(AtomicBool::new(false));
    let fallback_request = intercepted_request.clone();
    let execution_handle = handle.clone();
    let execution_annotated_request = annotated_request.clone();
    let execution_start_emitted = start_emitted.clone();
    let instrumented_func: LlmStreamExecutionNextFn = Arc::new(move |request| {
        let next = func.clone();
        let handle = execution_handle.clone();
        let annotated_request = execution_annotated_request.clone();
        let start_emitted = execution_start_emitted.clone();
        Box::pin(async move {
            emit_llm_start_once(&start_emitted, &handle, &request, annotated_request)?;
            next(request).await
        })
    });

    let execution = {
        let scope_stack = current_scope_stack();
        let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
        let scope_locals = scope_guard.collect_scope_local_registries(|registries| {
            &registries.llm_stream_execution_intercepts
        });
        let context = global_context();
        let state = context
            .read()
            .map_err(|error| FlowError::Internal(error.to_string()))?;
        state.llm_stream_build_execution_chain(&name, instrumented_func, &scope_locals)
    };

    match execution(intercepted_request).await {
        Ok(raw_stream) => {
            emit_llm_start_once(
                &start_emitted,
                &handle,
                &fallback_request,
                annotated_request.clone(),
            )?;
            let wrapper = LlmStreamWrapper::new(
                raw_stream,
                handle,
                collector,
                finalizer,
                data,
                metadata,
                response_codec,
            );
            Ok(Box::pin(wrapper) as LlmJsonStream)
        }
        Err(error) => {
            let _ = emit_llm_start_once(
                &start_emitted,
                &handle,
                &fallback_request,
                annotated_request,
            );
            let _ = emit_llm_end_without_output(&handle, metadata);
            Err(error)
        }
    }
}

/// Run only the LLM request-intercept chain.
///
/// This applies the currently active global and scope-local request intercepts
/// without emitting lifecycle events or invoking provider execution.
///
/// # Parameters
/// - `name`: Logical provider or model family name used when resolving the
///   intercept chain.
/// - `request`: Raw [`LlmRequest`] to transform.
///
/// # Returns
/// A [`Result`] containing the transformed [`LlmRequest`].
///
/// # Errors
/// Returns any error raised by the request-intercept chain.
///
/// # Notes
/// Conditional guardrails, codecs, and execution intercepts are not run by
/// this helper.
pub fn llm_request_intercepts(name: &str, request: LlmRequest) -> Result<LlmRequest> {
    ensure_runtime_owner()?;
    let scope_stack = current_scope_stack();
    let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
    let scope_locals =
        scope_guard.collect_scope_local_registries(|registries| &registries.llm_request_intercepts);
    let context = global_context();
    let state = context
        .read()
        .map_err(|error| FlowError::Internal(error.to_string()))?;
    let (request, _) = state.llm_request_intercepts_chain(name, request, None, &scope_locals)?;
    Ok(request)
}

/// Run only the LLM conditional-execution guardrail chain.
///
/// This evaluates whether an LLM call should be allowed to proceed without
/// emitting lifecycle events or invoking request intercepts or execution.
///
/// # Parameters
/// - `request`: Raw [`LlmRequest`] to validate.
///
/// # Returns
/// A [`Result`] that is `Ok(())` when all guardrails allow execution.
///
/// # Errors
/// Returns [`FlowError::GuardrailRejected`] when a guardrail blocks execution,
/// or any error raised by the guardrail chain itself.
///
/// # Notes
/// This helper is useful for preflight checks when the caller needs the
/// rejection result without starting an LLM span.
pub fn llm_conditional_execution(request: &LlmRequest) -> Result<()> {
    ensure_runtime_owner()?;
    let scope_stack = current_scope_stack();
    let scope_guard = scope_stack.read().expect("scope stack lock poisoned");
    let scope_locals = scope_guard.collect_scope_local_registries(|registries| {
        &registries.llm_conditional_execution_guardrails
    });
    let context = global_context();
    let state = context
        .read()
        .map_err(|error| FlowError::Internal(error.to_string()))?;
    if let Some(error) = state.llm_conditional_execution_chain(request, &scope_locals)? {
        return Err(FlowError::GuardrailRejected(error));
    }
    Ok(())
}