1use axum::http::header;
2use axum::{
3 extract::{DefaultBodyLimit, Multipart, Query, State},
4 http::{HeaderName, HeaderValue, StatusCode},
5 response::{Html, IntoResponse, Json, Response},
6 routing::{get, post},
7 Router,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::net::SocketAddr;
12use std::sync::Arc;
13use std::time::Duration;
14use tokio::net::TcpListener;
15use tower::limit::ConcurrencyLimitLayer;
16use tower::ServiceBuilder;
17use tower_governor::{governor::GovernorConfigBuilder, GovernorLayer};
18use tower_http::set_header::SetResponseHeaderLayer;
19use tower_http::timeout::TimeoutLayer;
20
21use crate::catalog::store::ReferenceCatalog;
22use crate::cli::ServeArgs;
23use crate::matching::engine::{MatchingConfig, MatchingEngine, ScoringWeights};
24use crate::matching::Suggestion;
25use crate::utils::validation::{validate_upload, ValidationError};
26use crate::web::format_detection::{
27 detect_format, parse_binary_file, parse_with_format, FileFormat,
28};
29
30pub const MAX_MULTIPART_FIELDS: usize = 10;
32pub const MAX_FILE_FIELD_SIZE: usize = 16 * 1024 * 1024; pub const MAX_TEXT_FIELD_SIZE: usize = 1024 * 1024; #[inline]
37fn count_to_f64(count: usize) -> f64 {
38 #[allow(clippy::cast_precision_loss)]
39 {
40 count as f64
41 }
42}
43
44pub struct AppState {
46 pub catalog: ReferenceCatalog,
47}
48
49#[derive(Debug)]
51struct InputData {
52 text_content: Option<String>,
54 binary_content: Option<Vec<u8>>,
56 filename: Option<String>,
58 format: Option<FileFormat>,
60}
61
62#[derive(Serialize)]
64pub struct ErrorResponse {
65 pub error: String,
66 pub error_type: String,
67 pub details: Option<String>,
68}
69
70#[derive(Serialize)]
71struct ConfigurationInfo {
72 score_threshold: f64,
73 result_limit: usize,
74 scoring_weights: ScoringWeights,
75}
76
77#[derive(Deserialize)]
79struct DetailedQueryParams {
80 mode: Option<String>,
82 match_id: Option<usize>,
84 query_page: Option<usize>,
86 query_page_size: Option<usize>,
88 ref_page: Option<usize>,
90 ref_page_size: Option<usize>,
92}
93
94pub fn create_safe_error_response(
97 error_type: &str,
98 user_message: &str,
99 internal_error: Option<&str>,
100) -> ErrorResponse {
101 if let Some(internal_msg) = internal_error {
103 tracing::error!("Internal error ({}): {}", error_type, internal_msg);
104 }
105
106 ErrorResponse {
107 error: user_message.to_string(),
108 error_type: error_type.to_string(),
109 details: None, }
111}
112
113pub fn run(args: ServeArgs) -> anyhow::Result<()> {
119 let rt = tokio::runtime::Runtime::new()?;
121 rt.block_on(async move { run_server(args).await })
122}
123
124#[allow(clippy::missing_panics_doc)] pub fn create_router() -> anyhow::Result<Router> {
131 let catalog = ReferenceCatalog::load_embedded()?;
133 let state = Arc::new(AppState { catalog });
134
135 let governor_conf = GovernorConfigBuilder::default()
137 .per_second(10) .burst_size(50) .finish()
140 .unwrap();
141
142 let app = Router::new()
144 .route("/", get(index_handler))
145 .route("/api/identify", post(identify_handler))
146 .route("/api/catalog", get(catalog_handler))
147 .route("/static/css/styles.css", get(styles_css_handler))
149 .route("/static/js/main.js", get(main_js_handler))
150 .route("/static/js/utils/helpers.js", get(helpers_js_handler))
151 .route(
152 "/static/js/managers/ConfigurationManager.js",
153 get(config_manager_js_handler),
154 )
155 .route(
156 "/static/js/managers/TabManager.js",
157 get(tab_manager_js_handler),
158 )
159 .route(
160 "/static/js/managers/ResultsManager.js",
161 get(results_manager_js_handler),
162 )
163 .route(
164 "/static/js/managers/SplitViewManager.js",
165 get(split_view_manager_js_handler),
166 )
167 .with_state(state)
168 .layer(
169 ServiceBuilder::new()
170 .layer(SetResponseHeaderLayer::if_not_present(
172 HeaderName::from_static("x-content-type-options"),
173 HeaderValue::from_static("nosniff"),
174 ))
175 .layer(SetResponseHeaderLayer::if_not_present(
176 HeaderName::from_static("x-frame-options"),
177 HeaderValue::from_static("DENY"),
178 ))
179 .layer(SetResponseHeaderLayer::if_not_present(
180 HeaderName::from_static("x-xss-protection"),
181 HeaderValue::from_static("1; mode=block"),
182 ))
183 .layer(SetResponseHeaderLayer::if_not_present(
184 HeaderName::from_static("strict-transport-security"),
185 HeaderValue::from_static("max-age=31536000; includeSubDomains"),
186 ))
187 .layer(SetResponseHeaderLayer::if_not_present(
188 HeaderName::from_static("referrer-policy"),
189 HeaderValue::from_static("strict-origin-when-cross-origin"),
190 ))
191 .layer(GovernorLayer {
193 config: Arc::new(governor_conf),
194 })
195 .layer(TimeoutLayer::with_status_code(
197 StatusCode::REQUEST_TIMEOUT,
198 Duration::from_secs(30),
199 ))
200 .layer(ConcurrencyLimitLayer::new(100))
202 .layer(DefaultBodyLimit::max(20 * 1024 * 1024)), );
205
206 Ok(app)
207}
208
209async fn run_server(args: ServeArgs) -> anyhow::Result<()> {
210 let app = create_router()?;
211
212 let addr = format!("{}:{}", args.address, args.port);
213 println!("Starting ref-solver web server at http://{addr}");
214
215 if args.open {
216 let _ = open::that(format!("http://{addr}"));
217 }
218
219 let listener = TcpListener::bind(&addr).await?;
220 axum::serve(
221 listener,
222 app.into_make_service_with_connect_info::<SocketAddr>(),
223 )
224 .await?;
225
226 Ok(())
227}
228
229async fn index_handler() -> Html<&'static str> {
231 Html(include_str!("templates/index.html"))
232}
233
234async fn styles_css_handler() -> impl IntoResponse {
236 (
237 [(header::CONTENT_TYPE, "text/css; charset=utf-8")],
238 include_str!("static/css/styles.css"),
239 )
240}
241
242async fn main_js_handler() -> impl IntoResponse {
244 (
245 [(
246 header::CONTENT_TYPE,
247 "application/javascript; charset=utf-8",
248 )],
249 include_str!("static/js/main.js"),
250 )
251}
252
253async fn helpers_js_handler() -> impl IntoResponse {
254 (
255 [(
256 header::CONTENT_TYPE,
257 "application/javascript; charset=utf-8",
258 )],
259 include_str!("static/js/utils/helpers.js"),
260 )
261}
262
263async fn config_manager_js_handler() -> impl IntoResponse {
264 (
265 [(
266 header::CONTENT_TYPE,
267 "application/javascript; charset=utf-8",
268 )],
269 include_str!("static/js/managers/ConfigurationManager.js"),
270 )
271}
272
273async fn tab_manager_js_handler() -> impl IntoResponse {
274 (
275 [(
276 header::CONTENT_TYPE,
277 "application/javascript; charset=utf-8",
278 )],
279 include_str!("static/js/managers/TabManager.js"),
280 )
281}
282
283async fn results_manager_js_handler() -> impl IntoResponse {
284 (
285 [(
286 header::CONTENT_TYPE,
287 "application/javascript; charset=utf-8",
288 )],
289 include_str!("static/js/managers/ResultsManager.js"),
290 )
291}
292
293async fn split_view_manager_js_handler() -> impl IntoResponse {
294 (
295 [(
296 header::CONTENT_TYPE,
297 "application/javascript; charset=utf-8",
298 )],
299 include_str!("static/js/managers/SplitViewManager.js"),
300 )
301}
302
303#[allow(clippy::too_many_lines)] async fn identify_handler(
306 State(state): State<Arc<AppState>>,
307 Query(params): Query<DetailedQueryParams>,
308 mut multipart: Multipart,
309) -> impl IntoResponse {
310 let start_time = std::time::Instant::now();
311
312 let (input_data, config) = match extract_request_data(&mut multipart).await {
314 Ok(data) => data,
315 Err(error_response) => return error_response,
316 };
317
318 let query = match parse_input_data(&input_data) {
320 Ok(query) => query,
321 Err(error_response) => return *error_response,
322 };
323
324 let matching_config = MatchingConfig {
326 min_score: config.score_threshold,
327 scoring_weights: config.scoring_weights.clone(),
328 };
329
330 let engine = MatchingEngine::new(&state.catalog, matching_config);
331 let matches = engine.find_matches(&query, config.result_limit);
332
333 if params.mode.as_deref() == Some("detailed") {
335 return handle_detailed_response(¶ms, &matches, &query, start_time, &config).await;
336 }
337
338 let results: Vec<serde_json::Value> = matches
340 .iter()
341 .map(|m| {
342 serde_json::json!({
343 "reference": {
344 "id": m.reference.id.0,
345 "display_name": m.reference.display_name,
346 "assembly": format!("{}", m.reference.assembly),
347 "source": format!("{}", m.reference.source),
348 "download_url": m.reference.download_url,
349 },
350 "score": {
351 "composite": m.score.composite,
352 "confidence": format!("{:?}", m.score.confidence),
353 "detailed_scores": {
354 "md5_jaccard": m.score.md5_jaccard,
355 "name_length_jaccard": m.score.name_length_jaccard,
356 "md5_query_coverage": m.score.md5_query_coverage,
357 "order_score": m.score.order_score,
358 },
359 },
360 "match_type": format!("{:?}", m.diagnosis.match_type),
361 "reordered": m.diagnosis.reordered,
362 "exact_matches": m.diagnosis.exact_matches.len(),
363 "renamed_matches": m.diagnosis.renamed_matches.len(),
364 "conflicts": m.diagnosis.conflicts.len(),
365 "query_only": m.diagnosis.query_only.len(),
366 "diagnosis": {
367 "exact_matches": m.diagnosis.exact_matches.iter().map(|_| {
368 serde_json::json!({"type": "exact"})
369 }).collect::<Vec<_>>(),
370 "renamed_matches": m.diagnosis.renamed_matches.iter().map(|r| {
371 serde_json::json!({
372 "query_name": r.query_name,
373 "reference_name": r.reference_name
374 })
375 }).collect::<Vec<_>>(),
376 "conflicts": m.diagnosis.conflicts.iter().map(|c| {
377 serde_json::json!({
378 "query_contig": {
379 "name": c.query_contig.name,
380 "length": c.query_contig.length,
381 "md5": c.query_contig.md5
382 },
383 "conflict_type": format!("{:?}", c.conflict_type),
384 "description": c.description
385 })
386 }).collect::<Vec<_>>(),
387 },
388 "suggestions": m.diagnosis.suggestions.iter().map(|s| {
389 match s {
390 Suggestion::RenameContigs { command_hint, .. } => {
391 serde_json::json!({"type": "rename", "command": command_hint})
392 }
393 Suggestion::ReorderContigs { command_hint } => {
394 serde_json::json!({"type": "reorder", "command": command_hint})
395 }
396 Suggestion::ReplaceContig { contig_name, reason, source } => {
397 serde_json::json!({"type": "replace", "contig": contig_name, "reason": reason, "source": source})
398 }
399 Suggestion::UseAsIs { warnings } => {
400 serde_json::json!({"type": "use_as_is", "warnings": warnings})
401 }
402 Suggestion::Realign { reason, suggested_reference } => {
403 serde_json::json!({"type": "realign", "reason": reason, "reference": suggested_reference})
404 }
405 }
406 }).collect::<Vec<_>>(),
407 })
408 })
409 .collect();
410
411 #[allow(clippy::cast_possible_truncation)] let processing_time = start_time.elapsed().as_millis() as u64;
413
414 Json(serde_json::json!({
415 "query": {
416 "contig_count": query.contigs.len(),
417 "has_md5": query.has_md5s(),
418 "md5_coverage": query.md5_coverage(),
419 "naming_convention": format!("{:?}", query.naming_convention),
420 },
421 "matches": results,
422 "processing_info": {
423 "detected_format": input_data.format.as_ref().map_or("unknown", super::format_detection::FileFormat::display_name),
424 "processing_time_ms": processing_time,
425 "configuration": {
426 "score_threshold": config.score_threshold,
427 "result_limit": config.result_limit,
428 "scoring_weights": config.scoring_weights,
429 }
430 }
431 }))
432 .into_response()
433}
434
435#[allow(
437 clippy::cast_possible_truncation,
438 clippy::unused_async,
439 clippy::too_many_lines
440)] async fn handle_detailed_response(
442 params: &DetailedQueryParams,
443 matches: &[crate::matching::engine::MatchResult],
444 query: &crate::core::header::QueryHeader,
445 start_time: std::time::Instant,
446 config: &ConfigurationInfo,
447) -> Response {
448 use crate::core::contig::Contig;
449
450 let match_index = params.match_id.unwrap_or(0);
452 let Some(selected_match) = matches.get(match_index) else {
453 return (
454 StatusCode::BAD_REQUEST,
455 Json(create_safe_error_response(
456 "invalid_match_id",
457 "Invalid match ID specified",
458 Some("Match index out of bounds"),
459 )),
460 )
461 .into_response();
462 };
463
464 let query_page = params.query_page.unwrap_or(0);
466 let query_page_size = params.query_page_size.unwrap_or(100).min(500);
467 let ref_page = params.ref_page.unwrap_or(0);
468 let ref_page_size = params.ref_page_size.unwrap_or(100).min(500);
469
470 let total_query_contigs = query.contigs.len();
472 let query_start = query_page * query_page_size;
473 let query_end = (query_start + query_page_size).min(total_query_contigs);
474 let query_contigs_page: Vec<&Contig> = if query_start < total_query_contigs {
475 query.contigs[query_start..query_end].iter().collect()
476 } else {
477 Vec::new()
478 };
479
480 let total_ref_contigs = selected_match.reference.contigs.len();
482 let ref_start = ref_page * ref_page_size;
483 let ref_end = (ref_start + ref_page_size).min(total_ref_contigs);
484 let ref_contigs_page: Vec<&Contig> = if ref_start < total_ref_contigs {
485 selected_match.reference.contigs[ref_start..ref_end]
486 .iter()
487 .collect()
488 } else {
489 Vec::new()
490 };
491
492 let mut exact_match_mappings = Vec::new();
494 let mut renamed_match_mappings = Vec::new();
495 let mut conflict_mappings = Vec::new();
496 let mut query_only_indices = Vec::new();
497 let mut reference_only_indices = Vec::new();
498
499 let query_name_to_index: std::collections::HashMap<&str, usize> = query
501 .contigs
502 .iter()
503 .enumerate()
504 .map(|(i, c)| (c.name.as_str(), i))
505 .collect();
506
507 let ref_name_to_index: std::collections::HashMap<&str, usize> = selected_match
508 .reference
509 .contigs
510 .iter()
511 .enumerate()
512 .map(|(i, c)| (c.name.as_str(), i))
513 .collect();
514
515 for (i, _) in selected_match.diagnosis.exact_matches.iter().enumerate() {
517 exact_match_mappings.push(serde_json::json!({
521 "type": "exact",
522 "query_index": i, "reference_index": i }));
525 }
526
527 for rename in &selected_match.diagnosis.renamed_matches {
529 if let (Some(&query_idx), Some(&ref_idx)) = (
530 query_name_to_index.get(rename.query_name.as_str()),
531 ref_name_to_index.get(rename.reference_name.as_str()),
532 ) {
533 renamed_match_mappings.push(serde_json::json!({
534 "type": "renamed",
535 "query_index": query_idx,
536 "reference_index": ref_idx,
537 "query_name": rename.query_name,
538 "reference_name": rename.reference_name
539 }));
540 }
541 }
542
543 for conflict in &selected_match.diagnosis.conflicts {
545 if let Some(&query_idx) = query_name_to_index.get(conflict.query_contig.name.as_str()) {
546 let ref_idx = conflict
547 .expected
548 .as_ref()
549 .and_then(|expected| ref_name_to_index.get(expected.name.as_str()));
550
551 conflict_mappings.push(serde_json::json!({
552 "type": "conflict",
553 "query_index": query_idx,
554 "reference_index": ref_idx,
555 "conflict_type": format!("{:?}", conflict.conflict_type),
556 "description": conflict.description
557 }));
558 }
559 }
560
561 for contig in &selected_match.diagnosis.query_only {
563 if let Some(&index) = query_name_to_index.get(contig.name.as_str()) {
564 query_only_indices.push(index);
565 }
566 }
567
568 let mut matched_ref_indices = std::collections::HashSet::new();
570 #[allow(clippy::cast_possible_truncation)] for mapping in &exact_match_mappings {
572 if let Some(ref_idx) = mapping
573 .get("reference_index")
574 .and_then(serde_json::Value::as_u64)
575 {
576 matched_ref_indices.insert(ref_idx as usize);
577 }
578 }
579 #[allow(clippy::cast_possible_truncation)]
580 for mapping in &renamed_match_mappings {
581 if let Some(ref_idx) = mapping
582 .get("reference_index")
583 .and_then(serde_json::Value::as_u64)
584 {
585 matched_ref_indices.insert(ref_idx as usize);
586 }
587 }
588 #[allow(clippy::cast_possible_truncation)]
589 for mapping in &conflict_mappings {
590 if let Some(ref_idx) = mapping
591 .get("reference_index")
592 .and_then(serde_json::Value::as_u64)
593 {
594 matched_ref_indices.insert(ref_idx as usize);
595 }
596 }
597
598 for (i, _) in selected_match.reference.contigs.iter().enumerate() {
599 if !matched_ref_indices.contains(&i) {
600 reference_only_indices.push(i);
601 }
602 }
603
604 #[allow(clippy::cast_possible_truncation)] let processing_time = start_time.elapsed().as_millis() as u64;
607
608 Json(serde_json::json!({
609 "mode": "detailed",
610 "match_id": match_index,
611 "query": {
612 "contigs": query_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
613 let global_idx = query_start + page_idx;
614 let match_status = if query_only_indices.contains(&global_idx) {
616 "missing"
617 } else if conflict_mappings.iter().any(|c| c.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
618 "conflict"
619 } else if renamed_match_mappings.iter().any(|r| r.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
620 "renamed"
621 } else if exact_match_mappings.iter().any(|e| e.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
622 "exact"
623 } else {
624 "unknown"
625 };
626
627 serde_json::json!({
628 "index": global_idx,
629 "name": contig.name,
630 "length": contig.length,
631 "md5": contig.md5,
632 "sequence_role": format!("{:?}", contig.sequence_role),
633 "aliases": contig.aliases,
634 "match_status": match_status
635 })
636 }).collect::<Vec<_>>(),
637 "pagination": {
638 "page": query_page,
639 "page_size": query_page_size,
640 "total_count": total_query_contigs,
641 "total_pages": total_query_contigs.div_ceil(query_page_size)
642 }
643 },
644 "reference": {
645 "id": selected_match.reference.id.0,
646 "display_name": selected_match.reference.display_name,
647 "assembly": format!("{}", selected_match.reference.assembly),
648 "contigs": ref_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
649 let global_idx = ref_start + page_idx;
650 let match_status = if reference_only_indices.contains(&global_idx) {
652 "missing"
653 } else if conflict_mappings.iter().any(|c| c.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
654 "conflict"
655 } else if renamed_match_mappings.iter().any(|r| r.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
656 "renamed"
657 } else if exact_match_mappings.iter().any(|e| e.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
658 "exact"
659 } else {
660 "unknown"
661 };
662
663 serde_json::json!({
664 "index": global_idx,
665 "name": contig.name,
666 "length": contig.length,
667 "md5": contig.md5,
668 "sequence_role": format!("{:?}", contig.sequence_role),
669 "aliases": contig.aliases,
670 "match_status": match_status
671 })
672 }).collect::<Vec<_>>(),
673 "pagination": {
674 "page": ref_page,
675 "page_size": ref_page_size,
676 "total_count": total_ref_contigs,
677 "total_pages": total_ref_contigs.div_ceil(ref_page_size)
678 }
679 },
680 "mappings": {
681 "exact_matches": exact_match_mappings,
682 "renamed_matches": renamed_match_mappings,
683 "conflicts": conflict_mappings,
684 "query_only": query_only_indices,
685 "reference_only": reference_only_indices
686 },
687 "match_summary": {
688 "match_type": format!("{:?}", selected_match.diagnosis.match_type),
689 "reordered": selected_match.diagnosis.reordered,
690 "score": {
691 "composite": selected_match.score.composite,
692 "confidence": format!("{:?}", selected_match.score.confidence)
693 }
694 },
695 "processing_info": {
696 "processing_time_ms": processing_time,
697 "configuration": {
698 "score_threshold": config.score_threshold,
699 "result_limit": config.result_limit,
700 "scoring_weights": config.scoring_weights,
701 }
702 }
703 }))
704 .into_response()
705}
706
707#[allow(clippy::too_many_lines)] async fn extract_request_data(
710 multipart: &mut Multipart,
711) -> Result<(InputData, ConfigurationInfo), Response> {
712 let mut input_data = InputData {
713 text_content: None,
714 binary_content: None,
715 filename: None,
716 format: None,
717 };
718
719 let mut config = ConfigurationInfo {
720 score_threshold: 0.1, result_limit: 10,
722 scoring_weights: ScoringWeights::default(),
723 };
724
725 let mut fields_received = 0usize;
726 let mut had_parse_error = false;
727
728 loop {
730 if fields_received >= MAX_MULTIPART_FIELDS {
732 return Err((
733 StatusCode::BAD_REQUEST,
734 Json(ErrorResponse {
735 error: "Too many form fields".to_string(),
736 error_type: "field_limit_exceeded".to_string(),
737 details: None, }),
739 )
740 .into_response());
741 }
742
743 match multipart.next_field().await {
744 Ok(Some(field)) => {
745 fields_received += 1;
746 let name = field.name().unwrap_or_default().to_string();
747
748 match name.as_str() {
749 "file" => {
750 let filename = field.file_name().map(std::string::ToString::to_string);
751
752 match field.bytes().await {
753 Ok(bytes) => {
754 if bytes.len() > MAX_FILE_FIELD_SIZE {
756 return Err((
757 StatusCode::PAYLOAD_TOO_LARGE,
758 Json(ErrorResponse {
759 error: "File size exceeds limit".to_string(),
760 error_type: "file_too_large".to_string(),
761 details: None,
762 }),
763 )
764 .into_response());
765 }
766
767 let detected_format = if let Some(ref name) = filename {
769 detect_binary_format(name).unwrap_or(FileFormat::Auto)
770 } else {
771 FileFormat::Auto
772 };
773
774 match validate_upload(filename.as_deref(), &bytes, detected_format)
776 {
777 Ok(validated_filename) => {
778 input_data.filename = validated_filename;
779
780 if is_binary_content(&bytes) {
782 input_data.binary_content = Some(bytes.to_vec());
783 input_data.format = Some(detected_format);
784 } else {
785 input_data.text_content =
786 Some(String::from_utf8_lossy(&bytes).to_string());
787 }
788 }
789 Err(ValidationError::FilenameTooLong) => {
790 return Err((
791 StatusCode::BAD_REQUEST,
792 Json(create_safe_error_response(
793 "filename_too_long",
794 "Filename exceeds maximum length limit",
795 Some("Filename validation failed due to length constraints")
796 )),
797 ).into_response());
798 }
799 Err(ValidationError::InvalidFilename) => {
800 return Err((
801 StatusCode::BAD_REQUEST,
802 Json(create_safe_error_response(
803 "invalid_filename",
804 "Filename contains invalid or dangerous characters",
805 Some("Filename validation failed due to invalid characters")
806 )),
807 ).into_response());
808 }
809 Err(ValidationError::FormatValidationFailed) => {
810 return Err((
811 StatusCode::BAD_REQUEST,
812 Json(create_safe_error_response(
813 "format_mismatch",
814 "File content does not match the expected format based on filename",
815 Some("Format validation failed")
816 )),
817 ).into_response());
818 }
819 Err(ValidationError::InvalidFileContent) => {
820 return Err((
821 StatusCode::BAD_REQUEST,
822 Json(create_safe_error_response(
823 "invalid_content",
824 "File content appears malformed or corrupted",
825 None,
826 )),
827 )
828 .into_response());
829 }
830 Err(_) => {
831 return Err((
832 StatusCode::BAD_REQUEST,
833 Json(create_safe_error_response(
834 "validation_failed",
835 "File validation failed",
836 None,
837 )),
838 )
839 .into_response());
840 }
841 }
842 }
843 Err(_) => had_parse_error = true,
844 }
845 }
846 "header_text" => match field.text().await {
847 Ok(text) => {
848 if text.len() > MAX_TEXT_FIELD_SIZE {
850 return Err((
851 StatusCode::PAYLOAD_TOO_LARGE,
852 Json(ErrorResponse {
853 error: "Text field size exceeds limit".to_string(),
854 error_type: "text_too_large".to_string(),
855 details: None,
856 }),
857 )
858 .into_response());
859 }
860
861 if !text.trim().is_empty() {
862 input_data.text_content = Some(text);
863 }
864 }
865 Err(_) => had_parse_error = true,
866 },
867 "score_threshold" => {
868 if let Ok(text) = field.text().await {
869 if let Ok(threshold) = text.parse::<f64>() {
870 config.score_threshold = threshold.clamp(0.0, 1.0);
871 }
872 }
873 }
874 "result_limit" => {
875 if let Ok(text) = field.text().await {
876 if let Ok(limit) = text.parse::<usize>() {
877 config.result_limit = limit.clamp(1, 50); }
879 }
880 }
881 "scoring_weights" => {
882 if let Ok(text) = field.text().await {
883 if let Ok(weights) = serde_json::from_str::<HashMap<String, f64>>(&text)
884 {
885 config.scoring_weights = parse_scoring_weights(&weights);
886 }
887 }
888 }
889 _ => {} }
891 }
892 Ok(None) => break, Err(_) => {
894 had_parse_error = true;
895 break;
896 }
897 }
898 }
899
900 if input_data.text_content.is_none() && input_data.binary_content.is_none() {
902 let error_msg = if had_parse_error {
903 "Failed to parse upload. Please check the file format."
904 } else if fields_received == 0 {
905 "No data received. Please upload a file or paste header text."
906 } else {
907 "No valid header data found in upload."
908 };
909
910 return Err((
911 StatusCode::BAD_REQUEST,
912 Json(create_safe_error_response(
913 "missing_input",
914 error_msg,
915 None, )),
917 )
918 .into_response());
919 }
920
921 Ok((input_data, config))
922}
923
924fn parse_input_data(
926 input_data: &InputData,
927) -> Result<crate::core::header::QueryHeader, Box<Response>> {
928 if let Some(text_content) = &input_data.text_content {
929 let Ok(detected_format) = detect_format(text_content, input_data.filename.as_deref())
931 else {
932 return Err(Box::new(
933 (
934 StatusCode::BAD_REQUEST,
935 Json(create_safe_error_response(
936 "format_detection_failed",
937 "Unable to detect file format. Please check the file type and try again.",
938 Some("Format detection failed during parsing"),
939 )),
940 )
941 .into_response(),
942 ));
943 };
944
945 match parse_with_format(text_content, detected_format) {
946 Ok(query) => Ok(query),
947 Err(_) => Err(Box::new((
948 StatusCode::BAD_REQUEST,
949 Json(create_safe_error_response(
950 "parse_failed",
951 "Unable to process file content. Please check the file format and try again.",
952 Some("File parsing failed during content processing"),
953 )),
954 )
955 .into_response())),
956 }
957 } else if let Some(binary_content) = &input_data.binary_content {
958 let format = input_data.format.unwrap_or(FileFormat::Bam);
960
961 match parse_binary_file(binary_content, format) {
962 Ok(query) => Ok(query),
963 Err(_) => Err(Box::new((
964 StatusCode::BAD_REQUEST,
965 Json(create_safe_error_response(
966 "binary_parse_failed",
967 "Unable to process binary file. Please verify the file format and try again.",
968 Some("Binary file parsing failed during processing"),
969 )),
970 )
971 .into_response())),
972 }
973 } else {
974 Err(Box::new(
975 (
976 StatusCode::INTERNAL_SERVER_ERROR,
977 Json(ErrorResponse {
978 error: "Internal error: no input data".to_string(),
979 error_type: "internal_error".to_string(),
980 details: None,
981 }),
982 )
983 .into_response(),
984 ))
985 }
986}
987
988fn is_binary_content(bytes: &[u8]) -> bool {
990 let sample_size = std::cmp::min(bytes.len(), 1024);
992
993 if sample_size < 10 {
995 return false; }
997
998 let non_printable_count = bytes[..sample_size]
999 .iter()
1000 .filter(|&&b| b < 9 || (b > 13 && b < 32) || b > 126)
1001 .count();
1002
1003 count_to_f64(non_printable_count) > (count_to_f64(sample_size) * 0.01)
1005}
1006
1007fn detect_binary_format(filename: &str) -> Option<FileFormat> {
1009 let lower = filename.to_lowercase();
1010 if std::path::Path::new(&lower)
1011 .extension()
1012 .is_some_and(|ext| ext.eq_ignore_ascii_case("bam"))
1013 {
1014 Some(FileFormat::Bam)
1015 } else if std::path::Path::new(&lower)
1016 .extension()
1017 .is_some_and(|ext| ext.eq_ignore_ascii_case("cram"))
1018 {
1019 Some(FileFormat::Cram)
1020 } else {
1021 None
1022 }
1023}
1024
1025fn parse_scoring_weights(weights: &HashMap<String, f64>) -> ScoringWeights {
1027 let contig_match = weights.get("contigMatch").unwrap_or(&70.0) / 100.0;
1030 let coverage = weights.get("coverage").unwrap_or(&20.0) / 100.0;
1031 let order = weights.get("orderScore").unwrap_or(&10.0) / 100.0;
1032 let conflict_penalty = weights.get("conflictPenalty").unwrap_or(&10.0) / 100.0;
1034
1035 ScoringWeights {
1036 contig_match,
1037 coverage,
1038 order,
1039 conflict_penalty,
1040 }
1041}
1042
1043async fn catalog_handler(State(state): State<Arc<AppState>>) -> Json<serde_json::Value> {
1045 let refs: Vec<serde_json::Value> = state
1046 .catalog
1047 .references
1048 .iter()
1049 .map(|r| {
1050 serde_json::json!({
1051 "id": r.id.0,
1052 "display_name": r.display_name,
1053 "assembly": format!("{}", r.assembly),
1054 "source": format!("{}", r.source),
1055 "contig_count": r.contigs.len(),
1056 "has_decoy": r.has_decoy(),
1057 "has_alt": r.has_alt(),
1058 "tags": r.tags,
1059 })
1060 })
1061 .collect();
1062
1063 Json(serde_json::json!({
1064 "count": refs.len(),
1065 "references": refs,
1066 }))
1067}