import json
import time
import asyncio
import requests
import websocket
from typing import List, Dict, Optional, Generator
import sseclient
class InfernoClient:
def __init__(self, base_url: str = "http://localhost:8080", api_key: Optional[str] = None):
self.base_url = base_url.rstrip('/')
self.api_key = api_key
self.session = requests.Session()
if api_key:
self.session.headers['Authorization'] = f'Bearer {api_key}'
self.session.headers['Content-Type'] = 'application/json'
def health_check(self) -> Dict:
response = self.session.get(f'{self.base_url}/health')
response.raise_for_status()
return response.json()
def list_models(self) -> List[Dict]:
response = self.session.get(f'{self.base_url}/models')
response.raise_for_status()
return response.json()['models']
def load_model(self, model_id: str, **kwargs) -> Dict:
response = self.session.post(
f'{self.base_url}/models/{model_id}/load',
json=kwargs
)
response.raise_for_status()
return response.json()
def unload_model(self, model_id: str) -> Dict:
response = self.session.post(f'{self.base_url}/models/{model_id}/unload')
response.raise_for_status()
return response.json()
def inference(self,
model: str,
prompt: str,
max_tokens: int = 100,
temperature: float = 0.7,
top_p: float = 0.9,
top_k: int = 40,
**kwargs) -> str:
request_data = {
'model': model,
'prompt': prompt,
'max_tokens': max_tokens,
'temperature': temperature,
'top_p': top_p,
'top_k': top_k,
'stream': False,
**kwargs
}
response = self.session.post(
f'{self.base_url}/inference',
json=request_data
)
response.raise_for_status()
result = response.json()
return result['choices'][0]['text']
def stream_inference(self,
model: str,
prompt: str,
max_tokens: int = 100,
**kwargs) -> Generator[str, None, None]:
request_data = {
'model': model,
'prompt': prompt,
'max_tokens': max_tokens,
'stream': True,
**kwargs
}
response = self.session.post(
f'{self.base_url}/inference/stream',
json=request_data,
stream=True,
headers={'Accept': 'text/event-stream'}
)
response.raise_for_status()
client = sseclient.SSEClient(response)
for event in client.events():
data = json.loads(event.data)
if 'token' in data:
yield data['token']
elif 'done' in data:
break
elif 'error' in data:
raise Exception(f"Stream error: {data['error']}")
def embeddings(self, model: str, texts: List[str]) -> List[List[float]]:
request_data = {
'model': model,
'input': texts,
'encoding_format': 'float'
}
response = self.session.post(
f'{self.base_url}/embeddings',
json=request_data
)
response.raise_for_status()
result = response.json()
return [item['embedding'] for item in result['data']]
def chat_completion(self,
model: str,
messages: List[Dict[str, str]],
**kwargs) -> str:
request_data = {
'model': model,
'messages': messages,
**kwargs
}
response = self.session.post(
f'{self.base_url}/v1/chat/completions',
json=request_data
)
response.raise_for_status()
result = response.json()
return result['choices'][0]['message']['content']
def batch_inference(self,
model: str,
prompts: List[str],
max_tokens: int = 100,
webhook_url: Optional[str] = None) -> str:
requests_data = [
{'id': f'req_{i}', 'prompt': prompt}
for i, prompt in enumerate(prompts)
]
request_data = {
'model': model,
'requests': requests_data,
'max_tokens': max_tokens
}
if webhook_url:
request_data['webhook_url'] = webhook_url
response = self.session.post(
f'{self.base_url}/batch',
json=request_data
)
response.raise_for_status()
return response.json()['batch_id']
def get_batch_status(self, batch_id: str) -> Dict:
response = self.session.get(f'{self.base_url}/batch/{batch_id}')
response.raise_for_status()
return response.json()
def get_batch_results(self, batch_id: str) -> List[Dict]:
response = self.session.get(f'{self.base_url}/batch/{batch_id}/results')
response.raise_for_status()
return response.json()['results']
class InfernoWebSocketClient:
def __init__(self, url: str = "ws://localhost:8080/ws", api_key: Optional[str] = None):
self.url = url
self.api_key = api_key
self.ws = None
def connect(self):
self.ws = websocket.WebSocketApp(
self.url,
on_open=self._on_open,
on_message=self._on_message,
on_error=self._on_error,
on_close=self._on_close
)
def _on_open(self, ws):
print("WebSocket connection opened")
if self.api_key:
auth_msg = {
'type': 'auth',
'token': self.api_key
}
ws.send(json.dumps(auth_msg))
def _on_message(self, ws, message):
data = json.loads(message)
if data['type'] == 'token':
print(data['token'], end='', flush=True)
elif data['type'] == 'complete':
print("\n[Inference complete]")
elif data['type'] == 'error':
print(f"\n[Error: {data['message']}]")
def _on_error(self, ws, error):
print(f"WebSocket error: {error}")
def _on_close(self, ws, close_status_code, close_msg):
print(f"WebSocket connection closed: {close_msg}")
def send_inference(self, model: str, prompt: str, max_tokens: int = 100):
if not self.ws:
raise Exception("Not connected to WebSocket")
request = {
'type': 'inference',
'id': f'req_{int(time.time() * 1000)}',
'model': model,
'prompt': prompt,
'max_tokens': max_tokens,
'stream': True
}
self.ws.send(json.dumps(request))
def run(self):
self.ws.run_forever()
def main():
client = InfernoClient(api_key="your_api_key_here")
print("=== Inferno Python Client Example ===\n")
print("1. Health Check")
health = client.health_check()
print(f" Status: {health['status']}")
print(f" Version: {health['version']}\n")
print("2. Available Models")
models = client.list_models()
for model in models:
print(f" - {model['id']}: {model['name']} ({model['type']})")
print()
print("3. Loading Model")
model_id = "llama-2-7b"
print("4. Simple Inference")
prompt = "What is artificial intelligence?"
print(f" Prompt: {prompt}")
print("5. Streaming Inference")
prompt = "Tell me a short story about a robot"
print(f" Prompt: {prompt}")
print(" Response: ", end="")
print("6. Text Embeddings")
texts = ["Hello world", "How are you?", "Machine learning is fascinating"]
print(f" Texts: {texts}")
print("7. Chat Completion")
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"}
]
print(f" Messages: {len(messages)}")
print("8. Batch Processing")
prompts = [
"What is Python?",
"Explain quantum computing",
"How does photosynthesis work?"
]
print(f" Batch size: {len(prompts)}")
print("9. WebSocket Streaming")
print(" Connecting to WebSocket...")
print("\n=== Example Complete ===")
if __name__ == "__main__":
main()