1use axum::http::header;
2use axum::{
3 extract::{DefaultBodyLimit, Multipart, Query, State},
4 http::{HeaderName, HeaderValue, StatusCode},
5 response::{Html, IntoResponse, Json, Response},
6 routing::{get, post},
7 Router,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::net::SocketAddr;
12use std::sync::Arc;
13use std::time::Duration;
14use tokio::net::TcpListener;
15use tower::limit::ConcurrencyLimitLayer;
16use tower::ServiceBuilder;
17use tower_governor::{governor::GovernorConfigBuilder, GovernorLayer};
18use tower_http::set_header::SetResponseHeaderLayer;
19use tower_http::timeout::TimeoutLayer;
20
21use crate::catalog::store::ReferenceCatalog;
22use crate::cli::ServeArgs;
23use crate::matching::engine::{MatchingConfig, MatchingEngine, ScoringWeights};
24use crate::matching::Suggestion;
25use crate::utils::validation::{validate_upload, ValidationError};
26use crate::web::format_detection::{
27 detect_format, parse_binary_file, parse_with_format, FileFormat,
28};
29
30pub const MAX_MULTIPART_FIELDS: usize = 10;
32pub const MAX_FILE_FIELD_SIZE: usize = 16 * 1024 * 1024; pub const MAX_TEXT_FIELD_SIZE: usize = 1024 * 1024; #[inline]
37fn count_to_f64(count: usize) -> f64 {
38 #[allow(clippy::cast_precision_loss)]
39 {
40 count as f64
41 }
42}
43
44pub struct AppState {
46 pub catalog: ReferenceCatalog,
47}
48
49#[derive(Debug)]
51struct InputData {
52 text_content: Option<String>,
54 binary_content: Option<Vec<u8>>,
56 filename: Option<String>,
58 format: Option<FileFormat>,
60}
61
62#[derive(Serialize)]
64pub struct ErrorResponse {
65 pub error: String,
66 pub error_type: String,
67 pub details: Option<String>,
68}
69
70#[derive(Serialize)]
71struct ConfigurationInfo {
72 score_threshold: f64,
73 result_limit: usize,
74 scoring_weights: ScoringWeights,
75}
76
77#[derive(Deserialize)]
79struct DetailedQueryParams {
80 mode: Option<String>,
82 match_id: Option<usize>,
84 query_page: Option<usize>,
86 query_page_size: Option<usize>,
88 ref_page: Option<usize>,
90 ref_page_size: Option<usize>,
92}
93
94pub fn create_safe_error_response(
97 error_type: &str,
98 user_message: &str,
99 internal_error: Option<&str>,
100) -> ErrorResponse {
101 if let Some(internal_msg) = internal_error {
103 tracing::error!("Internal error ({}): {}", error_type, internal_msg);
104 }
105
106 ErrorResponse {
107 error: user_message.to_string(),
108 error_type: error_type.to_string(),
109 details: None, }
111}
112
113pub fn run(args: ServeArgs) -> anyhow::Result<()> {
119 let rt = tokio::runtime::Runtime::new()?;
121 rt.block_on(async move { run_server(args).await })
122}
123
124#[allow(clippy::missing_panics_doc)] pub fn create_router() -> anyhow::Result<Router> {
131 let catalog = ReferenceCatalog::load_embedded()?;
133 let state = Arc::new(AppState { catalog });
134
135 let governor_conf = GovernorConfigBuilder::default()
137 .per_second(10) .burst_size(50) .finish()
140 .unwrap();
141
142 let app = Router::new()
144 .route("/", get(index_handler))
145 .route("/api/identify", post(identify_handler))
146 .route("/api/catalog", get(catalog_handler))
147 .route("/static/css/styles.css", get(styles_css_handler))
149 .route("/static/js/main.js", get(main_js_handler))
150 .route("/static/js/utils/helpers.js", get(helpers_js_handler))
151 .route(
152 "/static/js/managers/ConfigurationManager.js",
153 get(config_manager_js_handler),
154 )
155 .route(
156 "/static/js/managers/TabManager.js",
157 get(tab_manager_js_handler),
158 )
159 .route(
160 "/static/js/managers/ResultsManager.js",
161 get(results_manager_js_handler),
162 )
163 .route(
164 "/static/js/managers/SplitViewManager.js",
165 get(split_view_manager_js_handler),
166 )
167 .with_state(state)
168 .layer(
169 ServiceBuilder::new()
170 .layer(SetResponseHeaderLayer::if_not_present(
172 HeaderName::from_static("x-content-type-options"),
173 HeaderValue::from_static("nosniff"),
174 ))
175 .layer(SetResponseHeaderLayer::if_not_present(
176 HeaderName::from_static("x-frame-options"),
177 HeaderValue::from_static("DENY"),
178 ))
179 .layer(SetResponseHeaderLayer::if_not_present(
180 HeaderName::from_static("x-xss-protection"),
181 HeaderValue::from_static("1; mode=block"),
182 ))
183 .layer(SetResponseHeaderLayer::if_not_present(
184 HeaderName::from_static("strict-transport-security"),
185 HeaderValue::from_static("max-age=31536000; includeSubDomains"),
186 ))
187 .layer(SetResponseHeaderLayer::if_not_present(
188 HeaderName::from_static("referrer-policy"),
189 HeaderValue::from_static("strict-origin-when-cross-origin"),
190 ))
191 .layer(GovernorLayer {
193 config: Arc::new(governor_conf),
194 })
195 .layer(TimeoutLayer::with_status_code(
197 StatusCode::REQUEST_TIMEOUT,
198 Duration::from_secs(30),
199 ))
200 .layer(ConcurrencyLimitLayer::new(100))
202 .layer(DefaultBodyLimit::max(20 * 1024 * 1024)), );
205
206 Ok(app)
207}
208
209async fn run_server(args: ServeArgs) -> anyhow::Result<()> {
210 let app = create_router()?;
211
212 let addr = format!("{}:{}", args.address, args.port);
213 println!("Starting ref-solver web server at http://{addr}");
214
215 if args.open {
216 let _ = open::that(format!("http://{addr}"));
217 }
218
219 let listener = TcpListener::bind(&addr).await?;
220 axum::serve(
221 listener,
222 app.into_make_service_with_connect_info::<SocketAddr>(),
223 )
224 .await?;
225
226 Ok(())
227}
228
229async fn index_handler() -> Html<&'static str> {
231 Html(include_str!("templates/index.html"))
232}
233
234async fn styles_css_handler() -> impl IntoResponse {
236 (
237 [(header::CONTENT_TYPE, "text/css; charset=utf-8")],
238 include_str!("static/css/styles.css"),
239 )
240}
241
242async fn main_js_handler() -> impl IntoResponse {
244 (
245 [(
246 header::CONTENT_TYPE,
247 "application/javascript; charset=utf-8",
248 )],
249 include_str!("static/js/main.js"),
250 )
251}
252
253async fn helpers_js_handler() -> impl IntoResponse {
254 (
255 [(
256 header::CONTENT_TYPE,
257 "application/javascript; charset=utf-8",
258 )],
259 include_str!("static/js/utils/helpers.js"),
260 )
261}
262
263async fn config_manager_js_handler() -> impl IntoResponse {
264 (
265 [(
266 header::CONTENT_TYPE,
267 "application/javascript; charset=utf-8",
268 )],
269 include_str!("static/js/managers/ConfigurationManager.js"),
270 )
271}
272
273async fn tab_manager_js_handler() -> impl IntoResponse {
274 (
275 [(
276 header::CONTENT_TYPE,
277 "application/javascript; charset=utf-8",
278 )],
279 include_str!("static/js/managers/TabManager.js"),
280 )
281}
282
283async fn results_manager_js_handler() -> impl IntoResponse {
284 (
285 [(
286 header::CONTENT_TYPE,
287 "application/javascript; charset=utf-8",
288 )],
289 include_str!("static/js/managers/ResultsManager.js"),
290 )
291}
292
293async fn split_view_manager_js_handler() -> impl IntoResponse {
294 (
295 [(
296 header::CONTENT_TYPE,
297 "application/javascript; charset=utf-8",
298 )],
299 include_str!("static/js/managers/SplitViewManager.js"),
300 )
301}
302
303#[allow(clippy::too_many_lines)] async fn identify_handler(
306 State(state): State<Arc<AppState>>,
307 Query(params): Query<DetailedQueryParams>,
308 mut multipart: Multipart,
309) -> impl IntoResponse {
310 let start_time = std::time::Instant::now();
311
312 let (input_data, config) = match extract_request_data(&mut multipart).await {
314 Ok(data) => data,
315 Err(error_response) => return error_response,
316 };
317
318 let query = match parse_input_data(&input_data) {
320 Ok(query) => query,
321 Err(error_response) => return *error_response,
322 };
323
324 let matching_config = MatchingConfig {
326 min_score: config.score_threshold,
327 scoring_weights: config.scoring_weights.clone(),
328 };
329
330 let engine = MatchingEngine::new(&state.catalog, matching_config);
331 let matches = engine.find_matches(&query, config.result_limit);
332
333 if params.mode.as_deref() == Some("detailed") {
335 return handle_detailed_response(¶ms, &matches, &query, start_time, &config).await;
336 }
337
338 let results: Vec<serde_json::Value> = matches
340 .iter()
341 .map(|m| {
342 serde_json::json!({
343 "reference": {
344 "id": m.reference.id.0,
345 "display_name": m.reference.display_name,
346 "assembly": format!("{}", m.reference.assembly),
347 "source": format!("{}", m.reference.source),
348 "download_url": m.reference.download_url,
349 },
350 "score": {
351 "composite": m.score.composite,
352 "confidence": format!("{:?}", m.score.confidence),
353 "detailed_scores": {
354 "md5_jaccard": m.score.md5_jaccard,
355 "name_length_jaccard": m.score.name_length_jaccard,
356 "md5_query_coverage": m.score.md5_query_coverage,
357 "order_score": m.score.order_score,
358 },
359 },
360 "match_type": format!("{:?}", m.diagnosis.match_type),
361 "reordered": m.diagnosis.reordered,
362 "exact_matches": m.diagnosis.exact_matches.len(),
363 "renamed_matches": m.diagnosis.renamed_matches.len(),
364 "conflicts": m.diagnosis.conflicts.len(),
365 "query_only": m.diagnosis.query_only.len(),
366 "diagnosis": {
367 "exact_matches": m.diagnosis.exact_matches.iter().map(|_| {
368 serde_json::json!({"type": "exact"})
369 }).collect::<Vec<_>>(),
370 "renamed_matches": m.diagnosis.renamed_matches.iter().map(|r| {
371 serde_json::json!({
372 "query_name": r.query_name,
373 "reference_name": r.reference_name
374 })
375 }).collect::<Vec<_>>(),
376 "conflicts": m.diagnosis.conflicts.iter().map(|c| {
377 serde_json::json!({
378 "query_contig": {
379 "name": c.query_contig.name,
380 "length": c.query_contig.length,
381 "md5": c.query_contig.md5
382 },
383 "conflict_type": format!("{:?}", c.conflict_type),
384 "description": c.description
385 })
386 }).collect::<Vec<_>>(),
387 },
388 "suggestions": m.diagnosis.suggestions.iter().map(|s| {
389 match s {
390 Suggestion::RenameContigs { command_hint, .. } => {
391 serde_json::json!({"type": "rename", "command": command_hint})
392 }
393 Suggestion::ReorderContigs { command_hint } => {
394 serde_json::json!({"type": "reorder", "command": command_hint})
395 }
396 Suggestion::ReplaceContig { contig_name, reason, source } => {
397 serde_json::json!({"type": "replace", "contig": contig_name, "reason": reason, "source": source})
398 }
399 Suggestion::UseAsIs { warnings } => {
400 serde_json::json!({"type": "use_as_is", "warnings": warnings})
401 }
402 Suggestion::Realign { reason, suggested_reference } => {
403 serde_json::json!({"type": "realign", "reason": reason, "reference": suggested_reference})
404 }
405 }
406 }).collect::<Vec<_>>(),
407 })
408 })
409 .collect();
410
411 #[allow(clippy::cast_possible_truncation)] let processing_time = start_time.elapsed().as_millis() as u64;
413
414 Json(serde_json::json!({
415 "query": {
416 "contig_count": query.contigs.len(),
417 "has_md5": query.has_md5s(),
418 "md5_coverage": query.md5_coverage(),
419 "naming_convention": format!("{:?}", query.naming_convention),
420 },
421 "matches": results,
422 "processing_info": {
423 "detected_format": input_data.format.as_ref().map_or("unknown", super::format_detection::FileFormat::display_name),
424 "processing_time_ms": processing_time,
425 "configuration": {
426 "score_threshold": config.score_threshold,
427 "result_limit": config.result_limit,
428 "scoring_weights": config.scoring_weights,
429 }
430 }
431 }))
432 .into_response()
433}
434
435#[allow(
437 clippy::cast_possible_truncation,
438 clippy::unused_async,
439 clippy::too_many_lines
440)] async fn handle_detailed_response(
442 params: &DetailedQueryParams,
443 matches: &[crate::matching::engine::MatchResult],
444 query: &crate::core::header::QueryHeader,
445 start_time: std::time::Instant,
446 config: &ConfigurationInfo,
447) -> Response {
448 use crate::core::contig::Contig;
449
450 let match_index = params.match_id.unwrap_or(0);
452 let Some(selected_match) = matches.get(match_index) else {
453 return (
454 StatusCode::BAD_REQUEST,
455 Json(create_safe_error_response(
456 "invalid_match_id",
457 "Invalid match ID specified",
458 Some("Match index out of bounds"),
459 )),
460 )
461 .into_response();
462 };
463
464 let query_page = params.query_page.unwrap_or(0);
466 let query_page_size = params.query_page_size.unwrap_or(100).min(500);
467 let ref_page = params.ref_page.unwrap_or(0);
468 let ref_page_size = params.ref_page_size.unwrap_or(100).min(500);
469
470 let total_query_contigs = query.contigs.len();
472 let query_start = query_page * query_page_size;
473 let query_end = (query_start + query_page_size).min(total_query_contigs);
474 let query_contigs_page: Vec<&Contig> = if query_start < total_query_contigs {
475 query.contigs[query_start..query_end].iter().collect()
476 } else {
477 Vec::new()
478 };
479
480 let total_ref_contigs = selected_match.reference.contigs.len();
482 let ref_start = ref_page * ref_page_size;
483 let ref_end = (ref_start + ref_page_size).min(total_ref_contigs);
484 let ref_contigs_page: Vec<&Contig> = if ref_start < total_ref_contigs {
485 selected_match.reference.contigs[ref_start..ref_end]
486 .iter()
487 .collect()
488 } else {
489 Vec::new()
490 };
491
492 let mut exact_match_mappings = Vec::new();
494 let mut renamed_match_mappings = Vec::new();
495 let mut conflict_mappings = Vec::new();
496 let mut query_only_indices = Vec::new();
497 let mut reference_only_indices = Vec::new();
498
499 let query_name_to_index: std::collections::HashMap<&str, usize> = query
501 .contigs
502 .iter()
503 .enumerate()
504 .map(|(i, c)| (c.name.as_str(), i))
505 .collect();
506
507 let ref_name_to_index: std::collections::HashMap<&str, usize> = selected_match
508 .reference
509 .contigs
510 .iter()
511 .enumerate()
512 .map(|(i, c)| (c.name.as_str(), i))
513 .collect();
514
515 for (i, _) in selected_match.diagnosis.exact_matches.iter().enumerate() {
517 exact_match_mappings.push(serde_json::json!({
521 "type": "exact",
522 "query_index": i, "reference_index": i }));
525 }
526
527 for rename in &selected_match.diagnosis.renamed_matches {
529 if let (Some(&query_idx), Some(&ref_idx)) = (
530 query_name_to_index.get(rename.query_name.as_str()),
531 ref_name_to_index.get(rename.reference_name.as_str()),
532 ) {
533 renamed_match_mappings.push(serde_json::json!({
534 "type": "renamed",
535 "query_index": query_idx,
536 "reference_index": ref_idx,
537 "query_name": rename.query_name,
538 "reference_name": rename.reference_name
539 }));
540 }
541 }
542
543 for conflict in &selected_match.diagnosis.conflicts {
545 if let Some(&query_idx) = query_name_to_index.get(conflict.query_contig.name.as_str()) {
546 let ref_idx = conflict
547 .expected
548 .as_ref()
549 .and_then(|expected| ref_name_to_index.get(expected.name.as_str()));
550
551 conflict_mappings.push(serde_json::json!({
552 "type": "conflict",
553 "query_index": query_idx,
554 "reference_index": ref_idx,
555 "conflict_type": format!("{:?}", conflict.conflict_type),
556 "description": conflict.description
557 }));
558 }
559 }
560
561 for contig in &selected_match.diagnosis.query_only {
563 if let Some(&index) = query_name_to_index.get(contig.name.as_str()) {
564 query_only_indices.push(index);
565 }
566 }
567
568 let mut matched_ref_indices = std::collections::HashSet::new();
570 #[allow(clippy::cast_possible_truncation)] for mapping in &exact_match_mappings {
572 if let Some(ref_idx) = mapping
573 .get("reference_index")
574 .and_then(serde_json::Value::as_u64)
575 {
576 matched_ref_indices.insert(ref_idx as usize);
577 }
578 }
579 #[allow(clippy::cast_possible_truncation)]
580 for mapping in &renamed_match_mappings {
581 if let Some(ref_idx) = mapping
582 .get("reference_index")
583 .and_then(serde_json::Value::as_u64)
584 {
585 matched_ref_indices.insert(ref_idx as usize);
586 }
587 }
588 #[allow(clippy::cast_possible_truncation)]
589 for mapping in &conflict_mappings {
590 if let Some(ref_idx) = mapping
591 .get("reference_index")
592 .and_then(serde_json::Value::as_u64)
593 {
594 matched_ref_indices.insert(ref_idx as usize);
595 }
596 }
597
598 for (i, _) in selected_match.reference.contigs.iter().enumerate() {
599 if !matched_ref_indices.contains(&i) {
600 reference_only_indices.push(i);
601 }
602 }
603
604 #[allow(clippy::cast_possible_truncation)] let processing_time = start_time.elapsed().as_millis() as u64;
607
608 Json(serde_json::json!({
609 "mode": "detailed",
610 "match_id": match_index,
611 "query": {
612 "contigs": query_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
613 let global_idx = query_start + page_idx;
614 let match_status = if query_only_indices.contains(&global_idx) {
616 "missing"
617 } else if conflict_mappings.iter().any(|c| c.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
618 "conflict"
619 } else if renamed_match_mappings.iter().any(|r| r.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
620 "renamed"
621 } else if exact_match_mappings.iter().any(|e| e.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
622 "exact"
623 } else {
624 "unknown"
625 };
626
627 serde_json::json!({
628 "index": global_idx,
629 "name": contig.name,
630 "length": contig.length,
631 "md5": contig.md5,
632 "sha512t24u": contig.sha512t24u,
633 "sequence_role": format!("{:?}", contig.sequence_role),
634 "aliases": contig.aliases,
635 "match_status": match_status
636 })
637 }).collect::<Vec<_>>(),
638 "pagination": {
639 "page": query_page,
640 "page_size": query_page_size,
641 "total_count": total_query_contigs,
642 "total_pages": total_query_contigs.div_ceil(query_page_size)
643 }
644 },
645 "reference": {
646 "id": selected_match.reference.id.0,
647 "display_name": selected_match.reference.display_name,
648 "assembly": format!("{}", selected_match.reference.assembly),
649 "contigs": ref_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
650 let global_idx = ref_start + page_idx;
651 let match_status = if reference_only_indices.contains(&global_idx) {
653 "missing"
654 } else if conflict_mappings.iter().any(|c| c.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
655 "conflict"
656 } else if renamed_match_mappings.iter().any(|r| r.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
657 "renamed"
658 } else if exact_match_mappings.iter().any(|e| e.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
659 "exact"
660 } else {
661 "unknown"
662 };
663
664 serde_json::json!({
665 "index": global_idx,
666 "name": contig.name,
667 "length": contig.length,
668 "md5": contig.md5,
669 "sha512t24u": contig.sha512t24u,
670 "sequence_role": format!("{:?}", contig.sequence_role),
671 "aliases": contig.aliases,
672 "match_status": match_status
673 })
674 }).collect::<Vec<_>>(),
675 "pagination": {
676 "page": ref_page,
677 "page_size": ref_page_size,
678 "total_count": total_ref_contigs,
679 "total_pages": total_ref_contigs.div_ceil(ref_page_size)
680 }
681 },
682 "mappings": {
683 "exact_matches": exact_match_mappings,
684 "renamed_matches": renamed_match_mappings,
685 "conflicts": conflict_mappings,
686 "query_only": query_only_indices,
687 "reference_only": reference_only_indices
688 },
689 "match_summary": {
690 "match_type": format!("{:?}", selected_match.diagnosis.match_type),
691 "reordered": selected_match.diagnosis.reordered,
692 "score": {
693 "composite": selected_match.score.composite,
694 "confidence": format!("{:?}", selected_match.score.confidence)
695 }
696 },
697 "processing_info": {
698 "processing_time_ms": processing_time,
699 "configuration": {
700 "score_threshold": config.score_threshold,
701 "result_limit": config.result_limit,
702 "scoring_weights": config.scoring_weights,
703 }
704 }
705 }))
706 .into_response()
707}
708
709#[allow(clippy::too_many_lines)] async fn extract_request_data(
712 multipart: &mut Multipart,
713) -> Result<(InputData, ConfigurationInfo), Response> {
714 let mut input_data = InputData {
715 text_content: None,
716 binary_content: None,
717 filename: None,
718 format: None,
719 };
720
721 let mut config = ConfigurationInfo {
722 score_threshold: 0.1, result_limit: 10,
724 scoring_weights: ScoringWeights::default(),
725 };
726
727 let mut fields_received = 0usize;
728 let mut had_parse_error = false;
729
730 loop {
732 if fields_received >= MAX_MULTIPART_FIELDS {
734 return Err((
735 StatusCode::BAD_REQUEST,
736 Json(ErrorResponse {
737 error: "Too many form fields".to_string(),
738 error_type: "field_limit_exceeded".to_string(),
739 details: None, }),
741 )
742 .into_response());
743 }
744
745 match multipart.next_field().await {
746 Ok(Some(field)) => {
747 fields_received += 1;
748 let name = field.name().unwrap_or_default().to_string();
749
750 match name.as_str() {
751 "file" => {
752 let filename = field.file_name().map(std::string::ToString::to_string);
753
754 match field.bytes().await {
755 Ok(bytes) => {
756 if bytes.len() > MAX_FILE_FIELD_SIZE {
758 return Err((
759 StatusCode::PAYLOAD_TOO_LARGE,
760 Json(ErrorResponse {
761 error: "File size exceeds limit".to_string(),
762 error_type: "file_too_large".to_string(),
763 details: None,
764 }),
765 )
766 .into_response());
767 }
768
769 let detected_format = if let Some(ref name) = filename {
771 detect_binary_format(name).unwrap_or(FileFormat::Auto)
772 } else {
773 FileFormat::Auto
774 };
775
776 match validate_upload(filename.as_deref(), &bytes, detected_format)
778 {
779 Ok(validated_filename) => {
780 input_data.filename = validated_filename;
781
782 if is_binary_content(&bytes) {
784 input_data.binary_content = Some(bytes.to_vec());
785 input_data.format = Some(detected_format);
786 } else {
787 input_data.text_content =
788 Some(String::from_utf8_lossy(&bytes).to_string());
789 }
790 }
791 Err(ValidationError::FilenameTooLong) => {
792 return Err((
793 StatusCode::BAD_REQUEST,
794 Json(create_safe_error_response(
795 "filename_too_long",
796 "Filename exceeds maximum length limit",
797 Some("Filename validation failed due to length constraints")
798 )),
799 ).into_response());
800 }
801 Err(ValidationError::InvalidFilename) => {
802 return Err((
803 StatusCode::BAD_REQUEST,
804 Json(create_safe_error_response(
805 "invalid_filename",
806 "Filename contains invalid or dangerous characters",
807 Some("Filename validation failed due to invalid characters")
808 )),
809 ).into_response());
810 }
811 Err(ValidationError::FormatValidationFailed) => {
812 return Err((
813 StatusCode::BAD_REQUEST,
814 Json(create_safe_error_response(
815 "format_mismatch",
816 "File content does not match the expected format based on filename",
817 Some("Format validation failed")
818 )),
819 ).into_response());
820 }
821 Err(ValidationError::InvalidFileContent) => {
822 return Err((
823 StatusCode::BAD_REQUEST,
824 Json(create_safe_error_response(
825 "invalid_content",
826 "File content appears malformed or corrupted",
827 None,
828 )),
829 )
830 .into_response());
831 }
832 Err(_) => {
833 return Err((
834 StatusCode::BAD_REQUEST,
835 Json(create_safe_error_response(
836 "validation_failed",
837 "File validation failed",
838 None,
839 )),
840 )
841 .into_response());
842 }
843 }
844 }
845 Err(_) => had_parse_error = true,
846 }
847 }
848 "header_text" => match field.text().await {
849 Ok(text) => {
850 if text.len() > MAX_TEXT_FIELD_SIZE {
852 return Err((
853 StatusCode::PAYLOAD_TOO_LARGE,
854 Json(ErrorResponse {
855 error: "Text field size exceeds limit".to_string(),
856 error_type: "text_too_large".to_string(),
857 details: None,
858 }),
859 )
860 .into_response());
861 }
862
863 if !text.trim().is_empty() {
864 input_data.text_content = Some(text);
865 }
866 }
867 Err(_) => had_parse_error = true,
868 },
869 "score_threshold" => {
870 if let Ok(text) = field.text().await {
871 if let Ok(threshold) = text.parse::<f64>() {
872 config.score_threshold = threshold.clamp(0.0, 1.0);
873 }
874 }
875 }
876 "result_limit" => {
877 if let Ok(text) = field.text().await {
878 if let Ok(limit) = text.parse::<usize>() {
879 config.result_limit = limit.clamp(1, 50); }
881 }
882 }
883 "scoring_weights" => {
884 if let Ok(text) = field.text().await {
885 if let Ok(weights) = serde_json::from_str::<HashMap<String, f64>>(&text)
886 {
887 config.scoring_weights = parse_scoring_weights(&weights);
888 }
889 }
890 }
891 _ => {} }
893 }
894 Ok(None) => break, Err(_) => {
896 had_parse_error = true;
897 break;
898 }
899 }
900 }
901
902 if input_data.text_content.is_none() && input_data.binary_content.is_none() {
904 let error_msg = if had_parse_error {
905 "Failed to parse upload. Please check the file format."
906 } else if fields_received == 0 {
907 "No data received. Please upload a file or paste header text."
908 } else {
909 "No valid header data found in upload."
910 };
911
912 return Err((
913 StatusCode::BAD_REQUEST,
914 Json(create_safe_error_response(
915 "missing_input",
916 error_msg,
917 None, )),
919 )
920 .into_response());
921 }
922
923 Ok((input_data, config))
924}
925
926fn parse_input_data(
928 input_data: &InputData,
929) -> Result<crate::core::header::QueryHeader, Box<Response>> {
930 if let Some(text_content) = &input_data.text_content {
931 let Ok(detected_format) = detect_format(text_content, input_data.filename.as_deref())
933 else {
934 return Err(Box::new(
935 (
936 StatusCode::BAD_REQUEST,
937 Json(create_safe_error_response(
938 "format_detection_failed",
939 "Unable to detect file format. Please check the file type and try again.",
940 Some("Format detection failed during parsing"),
941 )),
942 )
943 .into_response(),
944 ));
945 };
946
947 match parse_with_format(text_content, detected_format) {
948 Ok(query) => Ok(query),
949 Err(_) => Err(Box::new((
950 StatusCode::BAD_REQUEST,
951 Json(create_safe_error_response(
952 "parse_failed",
953 "Unable to process file content. Please check the file format and try again.",
954 Some("File parsing failed during content processing"),
955 )),
956 )
957 .into_response())),
958 }
959 } else if let Some(binary_content) = &input_data.binary_content {
960 let format = input_data.format.unwrap_or(FileFormat::Bam);
962
963 match parse_binary_file(binary_content, format) {
964 Ok(query) => Ok(query),
965 Err(_) => Err(Box::new((
966 StatusCode::BAD_REQUEST,
967 Json(create_safe_error_response(
968 "binary_parse_failed",
969 "Unable to process binary file. Please verify the file format and try again.",
970 Some("Binary file parsing failed during processing"),
971 )),
972 )
973 .into_response())),
974 }
975 } else {
976 Err(Box::new(
977 (
978 StatusCode::INTERNAL_SERVER_ERROR,
979 Json(ErrorResponse {
980 error: "Internal error: no input data".to_string(),
981 error_type: "internal_error".to_string(),
982 details: None,
983 }),
984 )
985 .into_response(),
986 ))
987 }
988}
989
990fn is_binary_content(bytes: &[u8]) -> bool {
992 let sample_size = std::cmp::min(bytes.len(), 1024);
994
995 if sample_size < 10 {
997 return false; }
999
1000 let non_printable_count = bytes[..sample_size]
1001 .iter()
1002 .filter(|&&b| b < 9 || (b > 13 && b < 32) || b > 126)
1003 .count();
1004
1005 count_to_f64(non_printable_count) > (count_to_f64(sample_size) * 0.01)
1007}
1008
1009fn detect_binary_format(filename: &str) -> Option<FileFormat> {
1011 let lower = filename.to_lowercase();
1012 if std::path::Path::new(&lower)
1013 .extension()
1014 .is_some_and(|ext| ext.eq_ignore_ascii_case("bam"))
1015 {
1016 Some(FileFormat::Bam)
1017 } else if std::path::Path::new(&lower)
1018 .extension()
1019 .is_some_and(|ext| ext.eq_ignore_ascii_case("cram"))
1020 {
1021 Some(FileFormat::Cram)
1022 } else {
1023 None
1024 }
1025}
1026
1027fn parse_scoring_weights(weights: &HashMap<String, f64>) -> ScoringWeights {
1029 let contig_match = weights.get("contigMatch").unwrap_or(&70.0) / 100.0;
1032 let coverage = weights.get("coverage").unwrap_or(&20.0) / 100.0;
1033 let order = weights.get("orderScore").unwrap_or(&10.0) / 100.0;
1034 let conflict_penalty = weights.get("conflictPenalty").unwrap_or(&10.0) / 100.0;
1036
1037 ScoringWeights {
1038 contig_match,
1039 coverage,
1040 order,
1041 conflict_penalty,
1042 }
1043}
1044
1045async fn catalog_handler(State(state): State<Arc<AppState>>) -> Json<serde_json::Value> {
1047 let refs: Vec<serde_json::Value> = state
1048 .catalog
1049 .references
1050 .iter()
1051 .map(|r| {
1052 serde_json::json!({
1053 "id": r.id.0,
1054 "display_name": r.display_name,
1055 "assembly": format!("{}", r.assembly),
1056 "source": format!("{}", r.source),
1057 "contig_count": r.contigs.len(),
1058 "has_decoy": r.has_decoy(),
1059 "has_alt": r.has_alt(),
1060 "tags": r.tags,
1061 })
1062 })
1063 .collect();
1064
1065 Json(serde_json::json!({
1066 "count": refs.len(),
1067 "references": refs,
1068 }))
1069}