1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// use axum::{
// body::Body,
// http::{Request, StatusCode},
// };
// use axum::body::to_bytes;
// use tower::ServiceExt;
// use serde_json::Value;
// use std::time::Duration;
// use tokio::time::sleep;
// use orign::create_app;
// use orign::models::{ChatCompletionRequest, Message, ChatCompletionResponse, ModelInstanceRequest};
// #[tokio::test]
// async fn test_full_model_flow() {
// // Build the application
// let app = create_app().await.unwrap();
// // Step 1: Create the model instance
// let model_name = "allenai/Molmo-7B-D-0924";
// let model_instance = ModelInstanceRequest {
// model_name: model_name.to_string(),
// datatype: "float32".to_string(),
// framework: "vllm".to_string(),
// memory_request: "40Gi".to_string(),
// cpu_request: None,
// };
// let json = serde_json::to_string(&model_instance).unwrap();
// let req = Request::builder()
// .method("POST")
// .uri("/v1/chat/models")
// .header("Content-Type", "application/json")
// .body(Body::from(json))
// .unwrap();
// let response = app.clone().oneshot(req).await.unwrap();
// assert_eq!(response.status(), StatusCode::CREATED);
// // Read the response body to get the model instance ID
// let bytes = to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
// let response_body: Value = serde_json::from_slice(&bytes).unwrap();
// // Extract the ID from the response
// let id = response_body["id"].as_str().unwrap();
// // Step 1.5: Wait for the model instance to be running
// use std::time::Instant;
// let timeout = Duration::from_secs(300); // 5 minutes timeout
// let start_time = Instant::now();
// loop {
// // Check for timeout
// if start_time.elapsed() > timeout {
// panic!("Timeout while waiting for the model instance to be running");
// }
// // Send a GET request to get the model instance status
// let req = Request::builder()
// .method("GET")
// .uri(format!("/v1/chat/models/{}", id))
// .header("Content-Type", "application/json")
// .body(Body::empty())
// .unwrap();
// let response = app.clone().oneshot(req).await.unwrap();
// assert_eq!(response.status(), StatusCode::OK);
// let bytes = to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
// let response_body: Value = serde_json::from_slice(&bytes).unwrap();
// // Extract the status
// let status = response_body["model_instance"]["status"].as_str().unwrap_or("Unknown");
// // Print the status
// println!("Model instance status: {}", status);
// if status == "Running" {
// break;
// } else {
// // Wait before the next check
// sleep(Duration::from_secs(5)).await;
// }
// }
// // Step 2: Chat with the model instance
// let request_body = ChatCompletionRequest {
// model: model_name.to_string(),
// messages: vec![
// Message {
// role: "user".to_string(),
// content: "Hello, how are you?".to_string(),
// },
// ],
// temperature: Some(0.7),
// top_p: None,
// n: None,
// stream: None,
// stop: None,
// max_tokens: Some(50),
// presence_penalty: None,
// frequency_penalty: None,
// };
// let json = serde_json::to_string(&request_body).unwrap();
// let req = Request::builder()
// .method("POST")
// .uri("/v1/chat/completions")
// .header("Content-Type", "application/json")
// .body(Body::from(json))
// .unwrap();
// let response = app.clone().oneshot(req).await.unwrap();
// // Assert the response status code
// assert_eq!(response.status(), StatusCode::OK);
// // Optionally, parse the response body and make further assertions
// let bytes = to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
// // Parse the response body into the expected type
// let response_body: ChatCompletionResponse = serde_json::from_slice(&bytes).unwrap();
// // Assert the response fields
// assert_eq!(response_body.model, model_name);
// assert!(!response_body.choices.is_empty());
// // Additional assertions as needed
// for choice in response_body.choices {
// assert_eq!(choice.message.role, "assistant");
// assert!(!choice.message.content.is_empty());
// }
// // Step 3: Delete the model instance
// let req = Request::builder()
// .method("DELETE")
// .uri(format!("/v1/chat/models/{}", id))
// .header("Content-Type", "application/json")
// .body(Body::empty())
// .unwrap();
// let response = app.oneshot(req).await.unwrap();
// assert_eq!(response.status(), StatusCode::OK);
// // Optionally, read and assert the response body
// let bytes = to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
// let response_body: Value = serde_json::from_slice(&bytes).unwrap();
// let message = response_body["message"].as_str().unwrap();
// assert_eq!(message, "Model instance deleted successfully");
// }