from llmkit import LLMKitClient, Message, CompletionRequest
def main():
client = LLMKitClient.from_env()
request = CompletionRequest(
model="openai/gpt-4o",
messages=[
Message.user("Write a short poem about programming. 4 lines maximum.")
],
max_tokens=200,
).with_streaming()
print("Streaming response:\n")
for chunk in client.complete_stream(request):
if chunk.text:
print(chunk.text, end="", flush=True)
if chunk.is_done:
print("\n\n[Stream complete]")
if chunk.usage:
print(f"Total tokens: {chunk.usage.total_tokens()}")
break
print()
def stream_with_events():
client = LLMKitClient.from_env()
request = CompletionRequest(
model="openai/gpt-4o",
messages=[Message.user("Say hello in 3 languages")],
max_tokens=100,
).with_streaming()
print("\nStreaming with event inspection:\n")
for chunk in client.complete_stream(request):
event_type = chunk.event_type
print(f"[Event: {event_type}]", end=" ")
if chunk.text:
print(f"Text: {chunk.text!r}")
elif chunk.is_done:
print(f"Stop reason: {chunk.stop_reason}")
else:
print()
if __name__ == "__main__":
main()