from llmkit import LLMKitClient, Message, CompletionRequest, ThinkingConfig
def main():
client = LLMKitClient.from_env()
model = "openrouter/qwen/qwen3-32b"
cache = {}
def cached_complete(request_key: str, request: CompletionRequest):
if request_key in cache:
print(" [CACHE HIT - no API call]")
return cache[request_key]
else:
print(" [CACHE MISS - API call]")
response = client.complete(request)
cache[request_key] = response
return response
print("Response Caching Example")
print("=" * 50)
print("\n1. First request (France):")
print("-" * 40)
request1 = CompletionRequest(
model=model,
messages=[Message.user("What is the capital of France?")],
system="Answer briefly in one sentence.",
max_tokens=100,
).with_thinking_config(ThinkingConfig.disabled())
response = cached_complete("france", request1)
print(f"Response: {response.text_content().strip()}")
print("\n2. Same request again (France):")
print("-" * 40)
request2 = CompletionRequest(
model=model,
messages=[Message.user("What is the capital of France?")],
system="Answer briefly in one sentence.",
max_tokens=100,
).with_thinking_config(ThinkingConfig.disabled())
response = cached_complete("france", request2)
print(f"Response: {response.text_content().strip()}")
print("\n3. Different question (Germany):")
print("-" * 40)
request3 = CompletionRequest(
model=model,
messages=[Message.user("What is the capital of Germany?")],
system="Answer briefly in one sentence.",
max_tokens=100,
).with_thinking_config(ThinkingConfig.disabled())
response = cached_complete("germany", request3)
print(f"Response: {response.text_content().strip()}")
print("\n4. First question again (France):")
print("-" * 40)
request4 = CompletionRequest(
model=model,
messages=[Message.user("What is the capital of France?")],
system="Answer briefly in one sentence.",
max_tokens=100,
).with_thinking_config(ThinkingConfig.disabled())
response = cached_complete("france", request4)
print(f"Response: {response.text_content().strip()}")
print("\n" + "=" * 50)
print("Summary:")
print(" Total requests: 4")
print(" API calls made: 2 (misses)")
print(" Free responses: 2 (hits)")
print(" Hit rate: 50%")
print("\nCaching saves API costs on repeated identical requests!")
if __name__ == "__main__":
main()