1use axum::http::header;
2use axum::{
3 extract::{DefaultBodyLimit, Multipart, Query, State},
4 http::{HeaderName, HeaderValue, StatusCode},
5 response::{Html, IntoResponse, Json, Response},
6 routing::{get, post},
7 Router,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::net::SocketAddr;
12use std::sync::Arc;
13use std::time::Duration;
14use tokio::net::TcpListener;
15use tower::limit::ConcurrencyLimitLayer;
16use tower::ServiceBuilder;
17use tower_governor::{governor::GovernorConfigBuilder, GovernorLayer};
18use tower_http::set_header::SetResponseHeaderLayer;
19use tower_http::timeout::TimeoutLayer;
20
21use crate::catalog::store::ReferenceCatalog;
22use crate::cli::ServeArgs;
23use crate::matching::engine::{MatchingConfig, MatchingEngine, ScoringWeights};
24use crate::matching::Suggestion;
25use crate::utils::validation::{validate_upload, ValidationError};
26use crate::web::format_detection::{
27 detect_format, parse_binary_file, parse_binary_file_from_path, parse_with_format, FileFormat,
28};
29
30pub const MAX_MULTIPART_FIELDS: usize = 10;
32pub const MAX_FILE_FIELD_SIZE: usize = 16 * 1024 * 1024; pub const MAX_TEXT_FIELD_SIZE: usize = 1024 * 1024; pub const BINARY_HEADER_READ_LIMIT: usize = 64 * 1024 * 1024; const MAX_BODY_SIZE: usize = 256 * 1024 * 1024; #[inline]
46fn count_to_f64(count: usize) -> f64 {
47 #[allow(clippy::cast_precision_loss)]
48 {
49 count as f64
50 }
51}
52
53pub struct AppState {
55 pub catalog: ReferenceCatalog,
56 pub refget_config: Option<crate::refget::RefgetConfig>,
57}
58
59#[derive(Debug)]
61enum BinaryContent {
62 InMemory(Vec<u8>),
64 TempFile(tempfile::NamedTempFile),
66}
67
68#[derive(Debug)]
70struct InputData {
71 text_content: Option<String>,
73 binary_content: Option<BinaryContent>,
75 filename: Option<String>,
77 format: Option<FileFormat>,
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
85#[serde(rename_all = "snake_case")]
86pub enum ErrorType {
87 FieldLimitExceeded,
88 FileTooLarge,
89 TextTooLarge,
90 InternalError,
91 InvalidMatchId,
92 FilenameTooLong,
93 InvalidFilename,
94 FormatMismatch,
95 InvalidContent,
96 ValidationFailed,
97 MissingInput,
98 FormatDetectionFailed,
99 ParseFailed,
100 BinaryParseFailed,
101}
102
103#[derive(Serialize)]
105pub struct ErrorResponse {
106 pub error: String,
107 pub error_type: ErrorType,
108 pub details: Option<String>,
109}
110
111#[derive(Serialize)]
112struct ConfigurationInfo {
113 score_threshold: f64,
114 result_limit: usize,
115 scoring_weights: ScoringWeights,
116}
117
118#[derive(Deserialize)]
120struct DetailedQueryParams {
121 mode: Option<String>,
123 match_id: Option<usize>,
125 query_page: Option<usize>,
127 query_page_size: Option<usize>,
129 ref_page: Option<usize>,
131 ref_page_size: Option<usize>,
133}
134
135pub fn create_safe_error_response(
138 error_type: ErrorType,
139 user_message: &str,
140 internal_error: Option<&str>,
141) -> ErrorResponse {
142 if let Some(internal_msg) = internal_error {
144 tracing::error!("Internal error ({:?}): {}", error_type, internal_msg);
145 }
146
147 ErrorResponse {
148 error: user_message.to_string(),
149 error_type,
150 details: None, }
152}
153
154pub fn run(args: ServeArgs) -> anyhow::Result<()> {
160 let rt = tokio::runtime::Runtime::new()?;
162 rt.block_on(async move { run_server(args).await })
163}
164
165#[allow(clippy::missing_panics_doc)] pub fn create_router(refget_config: Option<crate::refget::RefgetConfig>) -> anyhow::Result<Router> {
174 let catalog = ReferenceCatalog::load_embedded()?;
176 let state = Arc::new(AppState {
177 catalog,
178 refget_config,
179 });
180
181 let governor_conf = GovernorConfigBuilder::default()
183 .per_second(10) .burst_size(50) .finish()
186 .unwrap();
187
188 let app = Router::new()
190 .route("/", get(index_handler))
191 .route("/api/identify", post(identify_handler))
192 .route("/api/catalog", get(catalog_handler))
193 .route("/static/css/styles.css", get(styles_css_handler))
195 .route("/static/js/main.js", get(main_js_handler))
196 .route("/static/js/utils/helpers.js", get(helpers_js_handler))
197 .route(
198 "/static/js/managers/ConfigurationManager.js",
199 get(config_manager_js_handler),
200 )
201 .route(
202 "/static/js/managers/TabManager.js",
203 get(tab_manager_js_handler),
204 )
205 .route(
206 "/static/js/managers/ResultsManager.js",
207 get(results_manager_js_handler),
208 )
209 .route(
210 "/static/js/managers/SplitViewManager.js",
211 get(split_view_manager_js_handler),
212 )
213 .route(
214 "/static/js/utils/headerExtractor.js",
215 get(header_extractor_js_handler),
216 )
217 .with_state(state)
218 .layer(
219 ServiceBuilder::new()
220 .layer(SetResponseHeaderLayer::if_not_present(
222 HeaderName::from_static("x-content-type-options"),
223 HeaderValue::from_static("nosniff"),
224 ))
225 .layer(SetResponseHeaderLayer::if_not_present(
226 HeaderName::from_static("x-frame-options"),
227 HeaderValue::from_static("DENY"),
228 ))
229 .layer(SetResponseHeaderLayer::if_not_present(
230 HeaderName::from_static("x-xss-protection"),
231 HeaderValue::from_static("1; mode=block"),
232 ))
233 .layer(SetResponseHeaderLayer::if_not_present(
234 HeaderName::from_static("strict-transport-security"),
235 HeaderValue::from_static("max-age=31536000; includeSubDomains"),
236 ))
237 .layer(SetResponseHeaderLayer::if_not_present(
238 HeaderName::from_static("referrer-policy"),
239 HeaderValue::from_static("strict-origin-when-cross-origin"),
240 ))
241 .layer(GovernorLayer {
243 config: Arc::new(governor_conf),
244 })
245 .layer(TimeoutLayer::with_status_code(
247 StatusCode::REQUEST_TIMEOUT,
248 Duration::from_secs(30),
249 ))
250 .layer(ConcurrencyLimitLayer::new(100))
252 .layer(DefaultBodyLimit::max(MAX_BODY_SIZE)),
255 );
256
257 Ok(app)
258}
259
260async fn run_server(args: ServeArgs) -> anyhow::Result<()> {
261 let refget_config = if args.no_refget {
262 None
263 } else {
264 Some(crate::refget::RefgetConfig::new(&args.refget_server))
265 };
266 let app = create_router(refget_config)?;
267
268 let addr = format!("{}:{}", args.address, args.port);
269 println!("Starting ref-solver web server at http://{addr}");
270
271 if args.open {
272 let _ = open::that(format!("http://{addr}"));
273 }
274
275 let listener = TcpListener::bind(&addr).await?;
276 axum::serve(
277 listener,
278 app.into_make_service_with_connect_info::<SocketAddr>(),
279 )
280 .await?;
281
282 Ok(())
283}
284
285async fn index_handler() -> Html<&'static str> {
287 Html(include_str!("templates/index.html"))
288}
289
290async fn styles_css_handler() -> impl IntoResponse {
292 (
293 [(header::CONTENT_TYPE, "text/css; charset=utf-8")],
294 include_str!("static/css/styles.css"),
295 )
296}
297
298async fn main_js_handler() -> impl IntoResponse {
300 (
301 [(
302 header::CONTENT_TYPE,
303 "application/javascript; charset=utf-8",
304 )],
305 include_str!("static/js/main.js"),
306 )
307}
308
309async fn helpers_js_handler() -> impl IntoResponse {
310 (
311 [(
312 header::CONTENT_TYPE,
313 "application/javascript; charset=utf-8",
314 )],
315 include_str!("static/js/utils/helpers.js"),
316 )
317}
318
319async fn config_manager_js_handler() -> impl IntoResponse {
320 (
321 [(
322 header::CONTENT_TYPE,
323 "application/javascript; charset=utf-8",
324 )],
325 include_str!("static/js/managers/ConfigurationManager.js"),
326 )
327}
328
329async fn tab_manager_js_handler() -> impl IntoResponse {
330 (
331 [(
332 header::CONTENT_TYPE,
333 "application/javascript; charset=utf-8",
334 )],
335 include_str!("static/js/managers/TabManager.js"),
336 )
337}
338
339async fn results_manager_js_handler() -> impl IntoResponse {
340 (
341 [(
342 header::CONTENT_TYPE,
343 "application/javascript; charset=utf-8",
344 )],
345 include_str!("static/js/managers/ResultsManager.js"),
346 )
347}
348
349async fn split_view_manager_js_handler() -> impl IntoResponse {
350 (
351 [(
352 header::CONTENT_TYPE,
353 "application/javascript; charset=utf-8",
354 )],
355 include_str!("static/js/managers/SplitViewManager.js"),
356 )
357}
358
359async fn header_extractor_js_handler() -> impl IntoResponse {
360 (
361 [(
362 header::CONTENT_TYPE,
363 "application/javascript; charset=utf-8",
364 )],
365 include_str!("static/js/utils/headerExtractor.js"),
366 )
367}
368
369#[allow(clippy::too_many_lines)] async fn identify_handler(
372 State(state): State<Arc<AppState>>,
373 Query(params): Query<DetailedQueryParams>,
374 mut multipart: Multipart,
375) -> impl IntoResponse {
376 let start_time = std::time::Instant::now();
377
378 let (input_data, config) = match extract_request_data(&mut multipart).await {
380 Ok(data) => data,
381 Err(error_response) => return error_response,
382 };
383
384 let (query, parse_warnings) = match parse_input_data(&input_data) {
386 Ok(result) => result,
387 Err(error_response) => return *error_response,
388 };
389
390 let matching_config = MatchingConfig {
392 min_score: config.score_threshold,
393 scoring_weights: config.scoring_weights.clone(),
394 };
395
396 let engine = MatchingEngine::new(&state.catalog, matching_config);
397 let matches = engine.find_matches(&query, config.result_limit);
398
399 if params.mode.as_deref() == Some("detailed") {
401 return handle_detailed_response(
402 ¶ms,
403 &matches,
404 &query,
405 start_time,
406 &config,
407 state.refget_config.as_ref(),
408 )
409 .await;
410 }
411
412 let results: Vec<serde_json::Value> = matches
414 .iter()
415 .map(|m| {
416 serde_json::json!({
417 "reference": {
418 "id": m.reference.id.0,
419 "display_name": m.reference.display_name,
420 "assembly": format!("{}", m.reference.assembly),
421 "source": format!("{}", m.reference.source),
422 "download_url": m.reference.download_url,
423 },
424 "score": {
425 "composite": m.score.composite,
426 "confidence": format!("{:?}", m.score.confidence),
427 "detailed_scores": {
428 "md5_jaccard": m.score.md5_jaccard,
429 "name_length_jaccard": m.score.name_length_jaccard,
430 "md5_query_coverage": m.score.md5_query_coverage,
431 "order_score": m.score.order_score,
432 },
433 },
434 "match_type": format!("{:?}", m.diagnosis.match_type),
435 "reordered": m.diagnosis.reordered,
436 "exact_matches": m.diagnosis.exact_matches.len(),
437 "renamed_matches": m.diagnosis.renamed_matches.len(),
438 "conflicts": m.diagnosis.conflicts.len(),
439 "query_only": m.diagnosis.query_only.len(),
440 "diagnosis": {
441 "exact_matches": m.diagnosis.exact_matches.iter().map(|_| {
442 serde_json::json!({"type": "exact"})
443 }).collect::<Vec<_>>(),
444 "renamed_matches": m.diagnosis.renamed_matches.iter().map(|r| {
445 serde_json::json!({
446 "query_name": r.query_name,
447 "reference_name": r.reference_name
448 })
449 }).collect::<Vec<_>>(),
450 "conflicts": m.diagnosis.conflicts.iter().map(|c| {
451 serde_json::json!({
452 "query_contig": {
453 "name": c.query_contig.name,
454 "length": c.query_contig.length,
455 "md5": c.query_contig.md5
456 },
457 "conflict_type": format!("{:?}", c.conflict_type),
458 "description": c.description
459 })
460 }).collect::<Vec<_>>(),
461 },
462 "suggestions": m.diagnosis.suggestions.iter().map(|s| {
463 match s {
464 Suggestion::RenameContigs { command_hint, .. } => {
465 serde_json::json!({"type": "rename", "command": command_hint})
466 }
467 Suggestion::ReorderContigs { command_hint } => {
468 serde_json::json!({"type": "reorder", "command": command_hint})
469 }
470 Suggestion::ReplaceContig { contig_name, reason, source } => {
471 serde_json::json!({"type": "replace", "contig": contig_name, "reason": reason, "source": source})
472 }
473 Suggestion::UseAsIs { warnings } => {
474 serde_json::json!({"type": "use_as_is", "warnings": warnings})
475 }
476 Suggestion::Realign { reason, suggested_reference } => {
477 serde_json::json!({"type": "realign", "reason": reason, "reference": suggested_reference})
478 }
479 }
480 }).collect::<Vec<_>>(),
481 })
482 })
483 .collect();
484
485 #[allow(clippy::cast_possible_truncation)] let processing_time = start_time.elapsed().as_millis() as u64;
487
488 Json(serde_json::json!({
489 "query": {
490 "contig_count": query.contigs.len(),
491 "has_md5": query.has_md5s(),
492 "md5_coverage": query.md5_coverage(),
493 "naming_convention": format!("{:?}", query.naming_convention),
494 },
495 "warnings": parse_warnings,
496 "matches": results,
497 "processing_info": {
498 "detected_format": input_data.format.as_ref().map_or("unknown", super::format_detection::FileFormat::display_name),
499 "processing_time_ms": processing_time,
500 "configuration": {
501 "score_threshold": config.score_threshold,
502 "result_limit": config.result_limit,
503 "scoring_weights": config.scoring_weights,
504 }
505 }
506 }))
507 .into_response()
508}
509
510#[allow(clippy::cast_possible_truncation, clippy::too_many_lines)] async fn handle_detailed_response(
513 params: &DetailedQueryParams,
514 matches: &[crate::matching::engine::MatchResult],
515 query: &crate::core::header::QueryHeader,
516 start_time: std::time::Instant,
517 config: &ConfigurationInfo,
518 refget_config: Option<&crate::refget::RefgetConfig>,
519) -> Response {
520 use crate::core::contig::Contig;
521
522 let match_index = params.match_id.unwrap_or(0);
524 let Some(selected_match) = matches.get(match_index) else {
525 return (
526 StatusCode::BAD_REQUEST,
527 Json(create_safe_error_response(
528 ErrorType::InvalidMatchId,
529 "Invalid match ID specified",
530 Some("Match index out of bounds"),
531 )),
532 )
533 .into_response();
534 };
535
536 let query_page = params.query_page.unwrap_or(0);
538 let query_page_size = params.query_page_size.unwrap_or(100).min(500);
539 let ref_page = params.ref_page.unwrap_or(0);
540 let ref_page_size = params.ref_page_size.unwrap_or(100).min(500);
541
542 let total_query_contigs = query.contigs.len();
544 let query_start = query_page * query_page_size;
545 let query_end = (query_start + query_page_size).min(total_query_contigs);
546 let query_contigs_page: Vec<&Contig> = if query_start < total_query_contigs {
547 query.contigs[query_start..query_end].iter().collect()
548 } else {
549 Vec::new()
550 };
551
552 let total_ref_contigs = selected_match.reference.contigs.len();
554 let ref_start = ref_page * ref_page_size;
555 let ref_end = (ref_start + ref_page_size).min(total_ref_contigs);
556 let ref_contigs_page: Vec<&Contig> = if ref_start < total_ref_contigs {
557 selected_match.reference.contigs[ref_start..ref_end]
558 .iter()
559 .collect()
560 } else {
561 Vec::new()
562 };
563
564 let mut exact_match_mappings = Vec::new();
566 let mut renamed_match_mappings = Vec::new();
567 let mut conflict_mappings = Vec::new();
568 let mut query_only_indices = Vec::new();
569 let mut reference_only_indices = Vec::new();
570
571 let query_name_to_index: std::collections::HashMap<&str, usize> = query
573 .contigs
574 .iter()
575 .enumerate()
576 .map(|(i, c)| (c.name.as_str(), i))
577 .collect();
578
579 let ref_name_to_index: std::collections::HashMap<&str, usize> = selected_match
580 .reference
581 .contigs
582 .iter()
583 .enumerate()
584 .map(|(i, c)| (c.name.as_str(), i))
585 .collect();
586
587 for (i, _) in selected_match.diagnosis.exact_matches.iter().enumerate() {
589 exact_match_mappings.push(serde_json::json!({
593 "type": "exact",
594 "query_index": i, "reference_index": i }));
597 }
598
599 for rename in &selected_match.diagnosis.renamed_matches {
601 if let (Some(&query_idx), Some(&ref_idx)) = (
602 query_name_to_index.get(rename.query_name.as_str()),
603 ref_name_to_index.get(rename.reference_name.as_str()),
604 ) {
605 renamed_match_mappings.push(serde_json::json!({
606 "type": "renamed",
607 "query_index": query_idx,
608 "reference_index": ref_idx,
609 "query_name": rename.query_name,
610 "reference_name": rename.reference_name
611 }));
612 }
613 }
614
615 for conflict in &selected_match.diagnosis.conflicts {
617 if let Some(&query_idx) = query_name_to_index.get(conflict.query_contig.name.as_str()) {
618 let ref_idx = conflict
619 .expected
620 .as_ref()
621 .and_then(|expected| ref_name_to_index.get(expected.name.as_str()));
622
623 conflict_mappings.push(serde_json::json!({
624 "type": "conflict",
625 "query_index": query_idx,
626 "reference_index": ref_idx,
627 "conflict_type": format!("{:?}", conflict.conflict_type),
628 "description": conflict.description
629 }));
630 }
631 }
632
633 for contig in &selected_match.diagnosis.query_only {
635 if let Some(&index) = query_name_to_index.get(contig.name.as_str()) {
636 query_only_indices.push(index);
637 }
638 }
639
640 let mut matched_ref_indices = std::collections::HashSet::new();
642 #[allow(clippy::cast_possible_truncation)] for mapping in &exact_match_mappings {
644 if let Some(ref_idx) = mapping
645 .get("reference_index")
646 .and_then(serde_json::Value::as_u64)
647 {
648 matched_ref_indices.insert(ref_idx as usize);
649 }
650 }
651 #[allow(clippy::cast_possible_truncation)]
652 for mapping in &renamed_match_mappings {
653 if let Some(ref_idx) = mapping
654 .get("reference_index")
655 .and_then(serde_json::Value::as_u64)
656 {
657 matched_ref_indices.insert(ref_idx as usize);
658 }
659 }
660 #[allow(clippy::cast_possible_truncation)]
661 for mapping in &conflict_mappings {
662 if let Some(ref_idx) = mapping
663 .get("reference_index")
664 .and_then(serde_json::Value::as_u64)
665 {
666 matched_ref_indices.insert(ref_idx as usize);
667 }
668 }
669
670 for (i, _) in selected_match.reference.contigs.iter().enumerate() {
671 if !matched_ref_indices.contains(&i) {
672 reference_only_indices.push(i);
673 }
674 }
675
676 let enriched_map: std::collections::HashMap<String, crate::refget::EnrichedContig> =
678 if let Some(refget_cfg) = refget_config {
679 let page_unmatched: Vec<&Contig> = selected_match
681 .diagnosis
682 .query_only
683 .iter()
684 .filter(|c| {
685 query_name_to_index
686 .get(c.name.as_str())
687 .is_some_and(|&idx| idx >= query_start && idx < query_end)
688 })
689 .collect();
690 if page_unmatched.is_empty() {
691 std::collections::HashMap::new()
692 } else {
693 let to_enrich: Vec<Contig> = page_unmatched.into_iter().cloned().collect();
695 let enriched =
696 crate::refget::enrichment::enrich_contigs(&to_enrich, refget_cfg).await;
697 enriched.into_iter().map(|e| (e.name.clone(), e)).collect()
698 }
699 } else {
700 std::collections::HashMap::new()
701 };
702
703 #[allow(clippy::cast_possible_truncation)] let processing_time = start_time.elapsed().as_millis() as u64;
706
707 Json(serde_json::json!({
708 "mode": "detailed",
709 "match_id": match_index,
710 "query": {
711 "contigs": query_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
712 let global_idx = query_start + page_idx;
713 let match_status = if query_only_indices.contains(&global_idx) {
715 "missing"
716 } else if conflict_mappings.iter().any(|c| c.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
717 "conflict"
718 } else if renamed_match_mappings.iter().any(|r| r.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
719 "renamed"
720 } else if exact_match_mappings.iter().any(|e| e.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
721 "exact"
722 } else {
723 "unknown"
724 };
725
726 let mut entry = serde_json::json!({
727 "index": global_idx,
728 "name": contig.name,
729 "length": contig.length,
730 "md5": contig.md5,
731 "sha512t24u": contig.sha512t24u,
732 "sequence_role": format!("{:?}", contig.sequence_role),
733 "aliases": contig.aliases,
734 "match_status": match_status
735 });
736
737 if match_status == "missing" {
739 if let Some(enriched) = enriched_map.get(&contig.name) {
740 entry["refget_metadata"] = serde_json::json!(&enriched.refget_metadata);
741 }
742 }
743
744 entry
745 }).collect::<Vec<_>>(),
746 "pagination": {
747 "page": query_page,
748 "page_size": query_page_size,
749 "total_count": total_query_contigs,
750 "total_pages": total_query_contigs.div_ceil(query_page_size)
751 }
752 },
753 "reference": {
754 "id": selected_match.reference.id.0,
755 "display_name": selected_match.reference.display_name,
756 "assembly": format!("{}", selected_match.reference.assembly),
757 "contigs": ref_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
758 let global_idx = ref_start + page_idx;
759 let match_status = if reference_only_indices.contains(&global_idx) {
761 "missing"
762 } else if conflict_mappings.iter().any(|c| c.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
763 "conflict"
764 } else if renamed_match_mappings.iter().any(|r| r.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
765 "renamed"
766 } else if exact_match_mappings.iter().any(|e| e.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
767 "exact"
768 } else {
769 "unknown"
770 };
771
772 serde_json::json!({
773 "index": global_idx,
774 "name": contig.name,
775 "length": contig.length,
776 "md5": contig.md5,
777 "sha512t24u": contig.sha512t24u,
778 "sequence_role": format!("{:?}", contig.sequence_role),
779 "aliases": contig.aliases,
780 "match_status": match_status
781 })
782 }).collect::<Vec<_>>(),
783 "pagination": {
784 "page": ref_page,
785 "page_size": ref_page_size,
786 "total_count": total_ref_contigs,
787 "total_pages": total_ref_contigs.div_ceil(ref_page_size)
788 }
789 },
790 "mappings": {
791 "exact_matches": exact_match_mappings,
792 "renamed_matches": renamed_match_mappings,
793 "conflicts": conflict_mappings,
794 "query_only": query_only_indices,
795 "reference_only": reference_only_indices
796 },
797 "match_summary": {
798 "match_type": format!("{:?}", selected_match.diagnosis.match_type),
799 "reordered": selected_match.diagnosis.reordered,
800 "score": {
801 "composite": selected_match.score.composite,
802 "confidence": format!("{:?}", selected_match.score.confidence)
803 }
804 },
805 "processing_info": {
806 "processing_time_ms": processing_time,
807 "configuration": {
808 "score_threshold": config.score_threshold,
809 "result_limit": config.result_limit,
810 "scoring_weights": config.scoring_weights,
811 }
812 }
813 }))
814 .into_response()
815}
816
817#[allow(clippy::too_many_lines)] async fn extract_request_data(
820 multipart: &mut Multipart,
821) -> Result<(InputData, ConfigurationInfo), Response> {
822 let mut input_data = InputData {
823 text_content: None,
824 binary_content: None,
825 filename: None,
826 format: None,
827 };
828
829 let mut config = ConfigurationInfo {
830 score_threshold: 0.1, result_limit: 10,
832 scoring_weights: ScoringWeights::default(),
833 };
834
835 let mut fields_received = 0usize;
836 let mut had_parse_error = false;
837
838 loop {
840 if fields_received >= MAX_MULTIPART_FIELDS {
842 return Err((
843 StatusCode::BAD_REQUEST,
844 Json(ErrorResponse {
845 error: "Too many form fields".to_string(),
846 error_type: ErrorType::FieldLimitExceeded,
847 details: None, }),
849 )
850 .into_response());
851 }
852
853 match multipart.next_field().await {
854 Ok(Some(field)) => {
855 fields_received += 1;
856 let name = field.name().unwrap_or_default().to_string();
857
858 match name.as_str() {
859 "file" => {
860 let filename = field.file_name().map(std::string::ToString::to_string);
861
862 let detected_format = if let Some(ref name) = filename {
864 detect_binary_format(name).unwrap_or(FileFormat::Auto)
865 } else {
866 FileFormat::Auto
867 };
868
869 if matches!(detected_format, FileFormat::Bam | FileFormat::Cram) {
871 match read_binary_chunks(field, detected_format).await {
872 Ok((temp_file, _bytes_read)) => {
873 input_data.filename = filename;
874 input_data.binary_content =
875 Some(BinaryContent::TempFile(temp_file));
876 input_data.format = Some(detected_format);
877 }
878 Err(err_response) => return Err(err_response),
879 }
880 } else {
881 match field.bytes().await {
883 Ok(bytes) => {
884 if bytes.len() > MAX_FILE_FIELD_SIZE {
886 return Err((
887 StatusCode::PAYLOAD_TOO_LARGE,
888 Json(ErrorResponse {
889 error: "File size exceeds limit".to_string(),
890 error_type: ErrorType::FileTooLarge,
891 details: None,
892 }),
893 )
894 .into_response());
895 }
896
897 match validate_upload(
899 filename.as_deref(),
900 &bytes,
901 detected_format,
902 ) {
903 Ok(validated_filename) => {
904 input_data.filename = validated_filename;
905
906 if is_binary_content(&bytes) {
908 input_data.binary_content =
909 Some(BinaryContent::InMemory(bytes.to_vec()));
910 input_data.format = Some(detected_format);
911 } else {
912 input_data.text_content = Some(
913 String::from_utf8_lossy(&bytes).to_string(),
914 );
915 }
916 }
917 Err(ValidationError::FilenameTooLong) => {
918 return Err((
919 StatusCode::BAD_REQUEST,
920 Json(create_safe_error_response(
921 ErrorType::FilenameTooLong,
922 "Filename exceeds maximum length limit",
923 Some("Filename validation failed due to length constraints")
924 )),
925 ).into_response());
926 }
927 Err(ValidationError::InvalidFilename) => {
928 return Err((
929 StatusCode::BAD_REQUEST,
930 Json(create_safe_error_response(
931 ErrorType::InvalidFilename,
932 "Filename contains invalid or dangerous characters",
933 Some("Filename validation failed due to invalid characters")
934 )),
935 ).into_response());
936 }
937 Err(ValidationError::FormatValidationFailed) => {
938 return Err((
939 StatusCode::BAD_REQUEST,
940 Json(create_safe_error_response(
941 ErrorType::FormatMismatch,
942 "File content does not match the expected format based on filename",
943 Some("Format validation failed")
944 )),
945 ).into_response());
946 }
947 Err(ValidationError::InvalidFileContent) => {
948 return Err((
949 StatusCode::BAD_REQUEST,
950 Json(create_safe_error_response(
951 ErrorType::InvalidContent,
952 "File content appears malformed or corrupted",
953 None,
954 )),
955 )
956 .into_response());
957 }
958 Err(_) => {
959 return Err((
960 StatusCode::BAD_REQUEST,
961 Json(create_safe_error_response(
962 ErrorType::ValidationFailed,
963 "File validation failed",
964 None,
965 )),
966 )
967 .into_response());
968 }
969 }
970 }
971 Err(_) => had_parse_error = true,
972 }
973 }
974 }
975 "header_text" => match field.text().await {
976 Ok(text) => {
977 if text.len() > MAX_TEXT_FIELD_SIZE {
979 return Err((
980 StatusCode::PAYLOAD_TOO_LARGE,
981 Json(ErrorResponse {
982 error: "Text field size exceeds limit".to_string(),
983 error_type: ErrorType::TextTooLarge,
984 details: None,
985 }),
986 )
987 .into_response());
988 }
989
990 if !text.trim().is_empty() {
991 input_data.text_content = Some(text);
992 }
993 }
994 Err(_) => had_parse_error = true,
995 },
996 "score_threshold" => {
997 if let Ok(text) = field.text().await {
998 if let Ok(threshold) = text.parse::<f64>() {
999 config.score_threshold = threshold.clamp(0.0, 1.0);
1000 }
1001 }
1002 }
1003 "result_limit" => {
1004 if let Ok(text) = field.text().await {
1005 if let Ok(limit) = text.parse::<usize>() {
1006 config.result_limit = limit.clamp(1, 50); }
1008 }
1009 }
1010 "scoring_weights" => {
1011 if let Ok(text) = field.text().await {
1012 if let Ok(weights) = serde_json::from_str::<HashMap<String, f64>>(&text)
1013 {
1014 config.scoring_weights = parse_scoring_weights(&weights);
1015 }
1016 }
1017 }
1018 _ => {} }
1020 }
1021 Ok(None) => break, Err(_) => {
1023 had_parse_error = true;
1024 break;
1025 }
1026 }
1027 }
1028
1029 if input_data.text_content.is_none() && input_data.binary_content.is_none() {
1031 let error_msg = if had_parse_error {
1032 "Failed to parse upload. Please check the file format."
1033 } else if fields_received == 0 {
1034 "No data received. Please upload a file or paste header text."
1035 } else {
1036 "No valid header data found in upload."
1037 };
1038
1039 return Err((
1040 StatusCode::BAD_REQUEST,
1041 Json(create_safe_error_response(
1042 ErrorType::MissingInput,
1043 error_msg,
1044 None, )),
1046 )
1047 .into_response());
1048 }
1049
1050 Ok((input_data, config))
1051}
1052
1053fn parse_input_data(
1057 input_data: &InputData,
1058) -> Result<(crate::core::header::QueryHeader, Vec<String>), Box<Response>> {
1059 let mut warnings: Vec<String> = Vec::new();
1060
1061 if let Some(text_content) = &input_data.text_content {
1062 let (normalized_content, was_normalized) =
1064 crate::parsing::sam::normalize_sam_whitespace(text_content);
1065 if was_normalized {
1066 warnings.push(
1067 "Input contained spaces instead of tabs between SAM header fields. \
1068 Fields were automatically converted to tab-separated format."
1069 .to_string(),
1070 );
1071 }
1072 let text_content = &normalized_content;
1073
1074 let Ok(detected_format) = detect_format(text_content, input_data.filename.as_deref())
1076 else {
1077 return Err(Box::new(
1078 (
1079 StatusCode::BAD_REQUEST,
1080 Json(create_safe_error_response(
1081 ErrorType::FormatDetectionFailed,
1082 "Unable to detect file format. Please check the file type and try again.",
1083 Some("Format detection failed during parsing"),
1084 )),
1085 )
1086 .into_response(),
1087 ));
1088 };
1089
1090 match parse_with_format(text_content, detected_format) {
1091 Ok(query) => Ok((query, warnings)),
1092 Err(_) => Err(Box::new((
1093 StatusCode::BAD_REQUEST,
1094 Json(create_safe_error_response(
1095 ErrorType::ParseFailed,
1096 "Unable to process file content. Please check the file format and try again.",
1097 Some("File parsing failed during content processing"),
1098 )),
1099 )
1100 .into_response())),
1101 }
1102 } else if let Some(binary_content) = &input_data.binary_content {
1103 let format = input_data.format.unwrap_or(FileFormat::Bam);
1105
1106 let result = match binary_content {
1107 BinaryContent::InMemory(bytes) => parse_binary_file(bytes, format),
1108 BinaryContent::TempFile(temp) => parse_binary_file_from_path(temp.path(), format),
1109 };
1110
1111 match result {
1112 Ok(query) => Ok((query, Vec::new())),
1113 Err(_) => Err(Box::new((
1114 StatusCode::BAD_REQUEST,
1115 Json(create_safe_error_response(
1116 ErrorType::BinaryParseFailed,
1117 "Unable to process binary file. Please verify the file format and try again.",
1118 Some("Binary file parsing failed during processing"),
1119 )),
1120 )
1121 .into_response())),
1122 }
1123 } else {
1124 Err(Box::new(
1125 (
1126 StatusCode::INTERNAL_SERVER_ERROR,
1127 Json(ErrorResponse {
1128 error: "Internal error: no input data".to_string(),
1129 error_type: ErrorType::InternalError,
1130 details: None,
1131 }),
1132 )
1133 .into_response(),
1134 ))
1135 }
1136}
1137
1138async fn read_binary_chunks(
1143 mut field: axum::extract::multipart::Field<'_>,
1144 format: FileFormat,
1145) -> Result<(tempfile::NamedTempFile, usize), Response> {
1146 use std::io::Write;
1147
1148 let extension = match format {
1149 FileFormat::Bam => ".bam",
1150 FileFormat::Cram => ".cram",
1151 _ => ".bin",
1152 };
1153
1154 let mut temp_file = tempfile::NamedTempFile::with_suffix(extension).map_err(|e| {
1155 tracing::error!("Failed to create temp file for binary upload: {e}");
1156 (
1157 StatusCode::INTERNAL_SERVER_ERROR,
1158 Json(ErrorResponse {
1159 error: "Internal error processing upload".to_string(),
1160 error_type: ErrorType::InternalError,
1161 details: None,
1162 }),
1163 )
1164 .into_response()
1165 })?;
1166
1167 let mut bytes_written: usize = 0;
1168
1169 loop {
1170 match field.chunk().await {
1171 Ok(Some(chunk)) => {
1172 let remaining = BINARY_HEADER_READ_LIMIT.saturating_sub(bytes_written);
1173 if remaining == 0 {
1174 break;
1175 }
1176
1177 let to_write = chunk.len().min(remaining);
1178 temp_file.write_all(&chunk[..to_write]).map_err(|e| {
1179 tracing::error!("Failed to write binary upload to temp file: {e}");
1180 (
1181 StatusCode::INTERNAL_SERVER_ERROR,
1182 Json(ErrorResponse {
1183 error: "Internal error processing upload".to_string(),
1184 error_type: ErrorType::InternalError,
1185 details: None,
1186 }),
1187 )
1188 .into_response()
1189 })?;
1190 bytes_written += to_write;
1191
1192 if to_write < chunk.len() {
1193 break; }
1195 }
1196 Ok(None) => break, Err(_) => {
1198 return Err((
1199 StatusCode::BAD_REQUEST,
1200 Json(create_safe_error_response(
1201 ErrorType::InvalidContent,
1202 "Failed to read uploaded file",
1203 Some("Error reading multipart chunk during binary upload"),
1204 )),
1205 )
1206 .into_response());
1207 }
1208 }
1209 }
1210
1211 if bytes_written == 0 {
1212 return Err((
1213 StatusCode::BAD_REQUEST,
1214 Json(create_safe_error_response(
1215 ErrorType::MissingInput,
1216 "Uploaded file is empty",
1217 None,
1218 )),
1219 )
1220 .into_response());
1221 }
1222
1223 Ok((temp_file, bytes_written))
1224}
1225
1226fn is_binary_content(bytes: &[u8]) -> bool {
1228 let sample_size = std::cmp::min(bytes.len(), 1024);
1230
1231 if sample_size < 10 {
1233 return false; }
1235
1236 let non_printable_count = bytes[..sample_size]
1237 .iter()
1238 .filter(|&&b| b < 9 || (b > 13 && b < 32) || b > 126)
1239 .count();
1240
1241 count_to_f64(non_printable_count) > (count_to_f64(sample_size) * 0.01)
1243}
1244
1245fn detect_binary_format(filename: &str) -> Option<FileFormat> {
1247 let lower = filename.to_lowercase();
1248 if std::path::Path::new(&lower)
1249 .extension()
1250 .is_some_and(|ext| ext.eq_ignore_ascii_case("bam"))
1251 {
1252 Some(FileFormat::Bam)
1253 } else if std::path::Path::new(&lower)
1254 .extension()
1255 .is_some_and(|ext| ext.eq_ignore_ascii_case("cram"))
1256 {
1257 Some(FileFormat::Cram)
1258 } else {
1259 None
1260 }
1261}
1262
1263fn parse_scoring_weights(weights: &HashMap<String, f64>) -> ScoringWeights {
1265 let contig_match = weights.get("contigMatch").unwrap_or(&70.0) / 100.0;
1268 let coverage = weights.get("coverage").unwrap_or(&20.0) / 100.0;
1269 let order = weights.get("orderScore").unwrap_or(&10.0) / 100.0;
1270 let conflict_penalty = weights.get("conflictPenalty").unwrap_or(&10.0) / 100.0;
1272
1273 ScoringWeights {
1274 contig_match,
1275 coverage,
1276 order,
1277 conflict_penalty,
1278 }
1279}
1280
1281async fn catalog_handler(State(state): State<Arc<AppState>>) -> Json<serde_json::Value> {
1283 let refs: Vec<serde_json::Value> = state
1284 .catalog
1285 .references
1286 .iter()
1287 .map(|r| {
1288 serde_json::json!({
1289 "id": r.id.0,
1290 "display_name": r.display_name,
1291 "assembly": format!("{}", r.assembly),
1292 "source": format!("{}", r.source),
1293 "contig_count": r.contigs.len(),
1294 "has_decoy": r.has_decoy(),
1295 "has_alt": r.has_alt(),
1296 "tags": r.tags,
1297 })
1298 })
1299 .collect();
1300
1301 Json(serde_json::json!({
1302 "count": refs.len(),
1303 "references": refs,
1304 }))
1305}