Skip to main content

ref_solver/web/
server.rs

1use axum::http::header;
2use axum::{
3    extract::{DefaultBodyLimit, Multipart, Query, State},
4    http::{HeaderName, HeaderValue, StatusCode},
5    response::{Html, IntoResponse, Json, Response},
6    routing::{get, post},
7    Router,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::net::SocketAddr;
12use std::sync::Arc;
13use std::time::Duration;
14use tokio::net::TcpListener;
15use tower::limit::ConcurrencyLimitLayer;
16use tower::ServiceBuilder;
17use tower_governor::{governor::GovernorConfigBuilder, GovernorLayer};
18use tower_http::set_header::SetResponseHeaderLayer;
19use tower_http::timeout::TimeoutLayer;
20
21use crate::catalog::store::ReferenceCatalog;
22use crate::cli::ServeArgs;
23use crate::matching::engine::{MatchingConfig, MatchingEngine, ScoringWeights};
24use crate::matching::Suggestion;
25use crate::utils::validation::{validate_upload, ValidationError};
26use crate::web::format_detection::{
27    detect_format, parse_binary_file, parse_with_format, FileFormat,
28};
29
30/// Security configuration constants to prevent `DoS` attacks
31pub const MAX_MULTIPART_FIELDS: usize = 10;
32pub const MAX_FILE_FIELD_SIZE: usize = 16 * 1024 * 1024; // 16MB
33pub const MAX_TEXT_FIELD_SIZE: usize = 1024 * 1024; // 1MB
34
35/// Helper function to convert usize count to f64 with explicit precision loss allowance
36#[inline]
37fn count_to_f64(count: usize) -> f64 {
38    #[allow(clippy::cast_precision_loss)]
39    {
40        count as f64
41    }
42}
43
44/// Shared application state
45pub struct AppState {
46    pub catalog: ReferenceCatalog,
47}
48
49/// Input data extracted from multipart form
50#[derive(Debug)]
51struct InputData {
52    /// Text content (if provided via textarea or text file)
53    text_content: Option<String>,
54    /// Binary file content (if provided)
55    binary_content: Option<Vec<u8>>,
56    /// Original filename
57    filename: Option<String>,
58    /// Detected or specified format
59    format: Option<FileFormat>,
60}
61
62/// Enhanced error response
63#[derive(Serialize)]
64pub struct ErrorResponse {
65    pub error: String,
66    pub error_type: String,
67    pub details: Option<String>,
68}
69
70#[derive(Serialize)]
71struct ConfigurationInfo {
72    score_threshold: f64,
73    result_limit: usize,
74    scoring_weights: ScoringWeights,
75}
76
77/// Query parameters for detailed mode
78#[derive(Deserialize)]
79struct DetailedQueryParams {
80    /// Mode: "detailed" for detailed contig breakdown, omit for summary
81    mode: Option<String>,
82    /// Match index to get details for (0-based)
83    match_id: Option<usize>,
84    /// Page number for query contigs (0-based)
85    query_page: Option<usize>,
86    /// Page size for query contigs (default: 100, max: 500)
87    query_page_size: Option<usize>,
88    /// Page number for reference contigs (0-based)
89    ref_page: Option<usize>,
90    /// Page size for reference contigs (default: 100, max: 500)
91    ref_page_size: Option<usize>,
92}
93
94/// Create a safe error response that prevents information disclosure
95/// while logging detailed errors server-side for debugging
96pub fn create_safe_error_response(
97    error_type: &str,
98    user_message: &str,
99    internal_error: Option<&str>,
100) -> ErrorResponse {
101    // Log detailed error server-side for debugging (not exposed to client)
102    if let Some(internal_msg) = internal_error {
103        tracing::error!("Internal error ({}): {}", error_type, internal_msg);
104    }
105
106    ErrorResponse {
107        error: user_message.to_string(),
108        error_type: error_type.to_string(),
109        details: None, // Never expose internal details to prevent information disclosure
110    }
111}
112
113/// Run the web server
114///
115/// # Errors
116///
117/// Returns an error if the tokio runtime cannot be created or the server fails to start.
118pub fn run(args: ServeArgs) -> anyhow::Result<()> {
119    // Build tokio runtime
120    let rt = tokio::runtime::Runtime::new()?;
121    rt.block_on(async move { run_server(args).await })
122}
123
124/// Create the application router with all routes and middleware configured.
125///
126/// # Errors
127///
128/// Returns an error if the catalog cannot be loaded.
129#[allow(clippy::missing_panics_doc)] // Panics only on invalid governor config (constants are valid)
130pub fn create_router() -> anyhow::Result<Router> {
131    // Load catalog
132    let catalog = ReferenceCatalog::load_embedded()?;
133    let state = Arc::new(AppState { catalog });
134
135    // Configure IP-based rate limiting
136    let governor_conf = GovernorConfigBuilder::default()
137        .per_second(10) // 10 requests per second per IP
138        .burst_size(50) // Allow bursts of 50 requests
139        .finish()
140        .unwrap();
141
142    // Build router with comprehensive security layers
143    let app = Router::new()
144        .route("/", get(index_handler))
145        .route("/api/identify", post(identify_handler))
146        .route("/api/catalog", get(catalog_handler))
147        // Static file routes
148        .route("/static/css/styles.css", get(styles_css_handler))
149        .route("/static/js/main.js", get(main_js_handler))
150        .route("/static/js/utils/helpers.js", get(helpers_js_handler))
151        .route(
152            "/static/js/managers/ConfigurationManager.js",
153            get(config_manager_js_handler),
154        )
155        .route(
156            "/static/js/managers/TabManager.js",
157            get(tab_manager_js_handler),
158        )
159        .route(
160            "/static/js/managers/ResultsManager.js",
161            get(results_manager_js_handler),
162        )
163        .route(
164            "/static/js/managers/SplitViewManager.js",
165            get(split_view_manager_js_handler),
166        )
167        .with_state(state)
168        .layer(
169            ServiceBuilder::new()
170                // Security headers for browser protection
171                .layer(SetResponseHeaderLayer::if_not_present(
172                    HeaderName::from_static("x-content-type-options"),
173                    HeaderValue::from_static("nosniff"),
174                ))
175                .layer(SetResponseHeaderLayer::if_not_present(
176                    HeaderName::from_static("x-frame-options"),
177                    HeaderValue::from_static("DENY"),
178                ))
179                .layer(SetResponseHeaderLayer::if_not_present(
180                    HeaderName::from_static("x-xss-protection"),
181                    HeaderValue::from_static("1; mode=block"),
182                ))
183                .layer(SetResponseHeaderLayer::if_not_present(
184                    HeaderName::from_static("strict-transport-security"),
185                    HeaderValue::from_static("max-age=31536000; includeSubDomains"),
186                ))
187                .layer(SetResponseHeaderLayer::if_not_present(
188                    HeaderName::from_static("referrer-policy"),
189                    HeaderValue::from_static("strict-origin-when-cross-origin"),
190                ))
191                // IP-based rate limiting to prevent abuse
192                .layer(GovernorLayer {
193                    config: Arc::new(governor_conf),
194                })
195                // Request timeout to prevent slow client attacks
196                .layer(TimeoutLayer::with_status_code(
197                    StatusCode::REQUEST_TIMEOUT,
198                    Duration::from_secs(30),
199                ))
200                // Limit concurrent requests to prevent DOS
201                .layer(ConcurrencyLimitLayer::new(100))
202                // Limit request body size (accommodate largest file + multipart overhead)
203                .layer(DefaultBodyLimit::max(20 * 1024 * 1024)), // 20MB limit
204        );
205
206    Ok(app)
207}
208
209async fn run_server(args: ServeArgs) -> anyhow::Result<()> {
210    let app = create_router()?;
211
212    let addr = format!("{}:{}", args.address, args.port);
213    println!("Starting ref-solver web server at http://{addr}");
214
215    if args.open {
216        let _ = open::that(format!("http://{addr}"));
217    }
218
219    let listener = TcpListener::bind(&addr).await?;
220    axum::serve(
221        listener,
222        app.into_make_service_with_connect_info::<SocketAddr>(),
223    )
224    .await?;
225
226    Ok(())
227}
228
229/// Main page handler
230async fn index_handler() -> Html<&'static str> {
231    Html(include_str!("templates/index.html"))
232}
233
234/// Static CSS handler
235async fn styles_css_handler() -> impl IntoResponse {
236    (
237        [(header::CONTENT_TYPE, "text/css; charset=utf-8")],
238        include_str!("static/css/styles.css"),
239    )
240}
241
242/// Static JS handlers for ES6 modules
243async fn main_js_handler() -> impl IntoResponse {
244    (
245        [(
246            header::CONTENT_TYPE,
247            "application/javascript; charset=utf-8",
248        )],
249        include_str!("static/js/main.js"),
250    )
251}
252
253async fn helpers_js_handler() -> impl IntoResponse {
254    (
255        [(
256            header::CONTENT_TYPE,
257            "application/javascript; charset=utf-8",
258        )],
259        include_str!("static/js/utils/helpers.js"),
260    )
261}
262
263async fn config_manager_js_handler() -> impl IntoResponse {
264    (
265        [(
266            header::CONTENT_TYPE,
267            "application/javascript; charset=utf-8",
268        )],
269        include_str!("static/js/managers/ConfigurationManager.js"),
270    )
271}
272
273async fn tab_manager_js_handler() -> impl IntoResponse {
274    (
275        [(
276            header::CONTENT_TYPE,
277            "application/javascript; charset=utf-8",
278        )],
279        include_str!("static/js/managers/TabManager.js"),
280    )
281}
282
283async fn results_manager_js_handler() -> impl IntoResponse {
284    (
285        [(
286            header::CONTENT_TYPE,
287            "application/javascript; charset=utf-8",
288        )],
289        include_str!("static/js/managers/ResultsManager.js"),
290    )
291}
292
293async fn split_view_manager_js_handler() -> impl IntoResponse {
294    (
295        [(
296            header::CONTENT_TYPE,
297            "application/javascript; charset=utf-8",
298        )],
299        include_str!("static/js/managers/SplitViewManager.js"),
300    )
301}
302
303/// API endpoint for identifying references
304#[allow(clippy::too_many_lines)] // TODO: Refactor into smaller functions
305async fn identify_handler(
306    State(state): State<Arc<AppState>>,
307    Query(params): Query<DetailedQueryParams>,
308    mut multipart: Multipart,
309) -> impl IntoResponse {
310    let start_time = std::time::Instant::now();
311
312    // Extract input data and configuration from multipart form
313    let (input_data, config) = match extract_request_data(&mut multipart).await {
314        Ok(data) => data,
315        Err(error_response) => return error_response,
316    };
317
318    // Parse input using intelligent format detection
319    let query = match parse_input_data(&input_data) {
320        Ok(query) => query,
321        Err(error_response) => return *error_response,
322    };
323
324    // Create matching engine with configuration
325    let matching_config = MatchingConfig {
326        min_score: config.score_threshold,
327        scoring_weights: config.scoring_weights.clone(),
328    };
329
330    let engine = MatchingEngine::new(&state.catalog, matching_config);
331    let matches = engine.find_matches(&query, config.result_limit);
332
333    // Check if detailed mode is requested
334    if params.mode.as_deref() == Some("detailed") {
335        return handle_detailed_response(&params, &matches, &query, start_time, &config).await;
336    }
337
338    // Build enhanced response
339    let results: Vec<serde_json::Value> = matches
340        .iter()
341        .map(|m| {
342            serde_json::json!({
343                "reference": {
344                    "id": m.reference.id.0,
345                    "display_name": m.reference.display_name,
346                    "assembly": format!("{}", m.reference.assembly),
347                    "source": format!("{}", m.reference.source),
348                    "download_url": m.reference.download_url,
349                },
350                "score": {
351                    "composite": m.score.composite,
352                    "confidence": format!("{:?}", m.score.confidence),
353                    "detailed_scores": {
354                        "md5_jaccard": m.score.md5_jaccard,
355                        "name_length_jaccard": m.score.name_length_jaccard,
356                        "md5_query_coverage": m.score.md5_query_coverage,
357                        "order_score": m.score.order_score,
358                    },
359                },
360                "match_type": format!("{:?}", m.diagnosis.match_type),
361                "reordered": m.diagnosis.reordered,
362                "exact_matches": m.diagnosis.exact_matches.len(),
363                "renamed_matches": m.diagnosis.renamed_matches.len(),
364                "conflicts": m.diagnosis.conflicts.len(),
365                "query_only": m.diagnosis.query_only.len(),
366                "diagnosis": {
367                    "exact_matches": m.diagnosis.exact_matches.iter().map(|_| {
368                        serde_json::json!({"type": "exact"})
369                    }).collect::<Vec<_>>(),
370                    "renamed_matches": m.diagnosis.renamed_matches.iter().map(|r| {
371                        serde_json::json!({
372                            "query_name": r.query_name,
373                            "reference_name": r.reference_name
374                        })
375                    }).collect::<Vec<_>>(),
376                    "conflicts": m.diagnosis.conflicts.iter().map(|c| {
377                        serde_json::json!({
378                            "query_contig": {
379                                "name": c.query_contig.name,
380                                "length": c.query_contig.length,
381                                "md5": c.query_contig.md5
382                            },
383                            "conflict_type": format!("{:?}", c.conflict_type),
384                            "description": c.description
385                        })
386                    }).collect::<Vec<_>>(),
387                },
388                "suggestions": m.diagnosis.suggestions.iter().map(|s| {
389                    match s {
390                        Suggestion::RenameContigs { command_hint, .. } => {
391                            serde_json::json!({"type": "rename", "command": command_hint})
392                        }
393                        Suggestion::ReorderContigs { command_hint } => {
394                            serde_json::json!({"type": "reorder", "command": command_hint})
395                        }
396                        Suggestion::ReplaceContig { contig_name, reason, source } => {
397                            serde_json::json!({"type": "replace", "contig": contig_name, "reason": reason, "source": source})
398                        }
399                        Suggestion::UseAsIs { warnings } => {
400                            serde_json::json!({"type": "use_as_is", "warnings": warnings})
401                        }
402                        Suggestion::Realign { reason, suggested_reference } => {
403                            serde_json::json!({"type": "realign", "reason": reason, "reference": suggested_reference})
404                        }
405                    }
406                }).collect::<Vec<_>>(),
407            })
408        })
409        .collect();
410
411    #[allow(clippy::cast_possible_truncation)] // Processing time won't exceed u64
412    let processing_time = start_time.elapsed().as_millis() as u64;
413
414    Json(serde_json::json!({
415        "query": {
416            "contig_count": query.contigs.len(),
417            "has_md5": query.has_md5s(),
418            "md5_coverage": query.md5_coverage(),
419            "naming_convention": format!("{:?}", query.naming_convention),
420        },
421        "matches": results,
422        "processing_info": {
423            "detected_format": input_data.format.as_ref().map_or("unknown", super::format_detection::FileFormat::display_name),
424            "processing_time_ms": processing_time,
425            "configuration": {
426                "score_threshold": config.score_threshold,
427                "result_limit": config.result_limit,
428                "scoring_weights": config.scoring_weights,
429            }
430        }
431    }))
432    .into_response()
433}
434
435/// Handle detailed response mode for contig breakdown
436#[allow(
437    clippy::cast_possible_truncation,
438    clippy::unused_async,
439    clippy::too_many_lines
440)] // JSON indices; TODO: refactor
441async fn handle_detailed_response(
442    params: &DetailedQueryParams,
443    matches: &[crate::matching::engine::MatchResult],
444    query: &crate::core::header::QueryHeader,
445    start_time: std::time::Instant,
446    config: &ConfigurationInfo,
447) -> Response {
448    use crate::core::contig::Contig;
449
450    // Get the specific match or default to first match
451    let match_index = params.match_id.unwrap_or(0);
452    let Some(selected_match) = matches.get(match_index) else {
453        return (
454            StatusCode::BAD_REQUEST,
455            Json(create_safe_error_response(
456                "invalid_match_id",
457                "Invalid match ID specified",
458                Some("Match index out of bounds"),
459            )),
460        )
461            .into_response();
462    };
463
464    // Set up pagination parameters
465    let query_page = params.query_page.unwrap_or(0);
466    let query_page_size = params.query_page_size.unwrap_or(100).min(500);
467    let ref_page = params.ref_page.unwrap_or(0);
468    let ref_page_size = params.ref_page_size.unwrap_or(100).min(500);
469
470    // Extract query contigs with pagination
471    let total_query_contigs = query.contigs.len();
472    let query_start = query_page * query_page_size;
473    let query_end = (query_start + query_page_size).min(total_query_contigs);
474    let query_contigs_page: Vec<&Contig> = if query_start < total_query_contigs {
475        query.contigs[query_start..query_end].iter().collect()
476    } else {
477        Vec::new()
478    };
479
480    // Extract reference contigs with pagination
481    let total_ref_contigs = selected_match.reference.contigs.len();
482    let ref_start = ref_page * ref_page_size;
483    let ref_end = (ref_start + ref_page_size).min(total_ref_contigs);
484    let ref_contigs_page: Vec<&Contig> = if ref_start < total_ref_contigs {
485        selected_match.reference.contigs[ref_start..ref_end]
486            .iter()
487            .collect()
488    } else {
489        Vec::new()
490    };
491
492    // Build detailed mapping information
493    let mut exact_match_mappings = Vec::new();
494    let mut renamed_match_mappings = Vec::new();
495    let mut conflict_mappings = Vec::new();
496    let mut query_only_indices = Vec::new();
497    let mut reference_only_indices = Vec::new();
498
499    // Create lookup maps for efficient indexing
500    let query_name_to_index: std::collections::HashMap<&str, usize> = query
501        .contigs
502        .iter()
503        .enumerate()
504        .map(|(i, c)| (c.name.as_str(), i))
505        .collect();
506
507    let ref_name_to_index: std::collections::HashMap<&str, usize> = selected_match
508        .reference
509        .contigs
510        .iter()
511        .enumerate()
512        .map(|(i, c)| (c.name.as_str(), i))
513        .collect();
514
515    // Process exact matches (need to map back to contigs since ContigMatch is empty)
516    for (i, _) in selected_match.diagnosis.exact_matches.iter().enumerate() {
517        // Since ContigMatch doesn't contain contig data, we need to reconstruct
518        // the mapping by analyzing the query and reference contigs
519        // This is a limitation of the current data structure
520        exact_match_mappings.push(serde_json::json!({
521            "type": "exact",
522            "query_index": i, // This is approximate - we'd need better data structure
523            "reference_index": i // This is approximate - we'd need better data structure
524        }));
525    }
526
527    // Process renamed matches
528    for rename in &selected_match.diagnosis.renamed_matches {
529        if let (Some(&query_idx), Some(&ref_idx)) = (
530            query_name_to_index.get(rename.query_name.as_str()),
531            ref_name_to_index.get(rename.reference_name.as_str()),
532        ) {
533            renamed_match_mappings.push(serde_json::json!({
534                "type": "renamed",
535                "query_index": query_idx,
536                "reference_index": ref_idx,
537                "query_name": rename.query_name,
538                "reference_name": rename.reference_name
539            }));
540        }
541    }
542
543    // Process conflicts
544    for conflict in &selected_match.diagnosis.conflicts {
545        if let Some(&query_idx) = query_name_to_index.get(conflict.query_contig.name.as_str()) {
546            let ref_idx = conflict
547                .expected
548                .as_ref()
549                .and_then(|expected| ref_name_to_index.get(expected.name.as_str()));
550
551            conflict_mappings.push(serde_json::json!({
552                "type": "conflict",
553                "query_index": query_idx,
554                "reference_index": ref_idx,
555                "conflict_type": format!("{:?}", conflict.conflict_type),
556                "description": conflict.description
557            }));
558        }
559    }
560
561    // Process query-only contigs
562    for contig in &selected_match.diagnosis.query_only {
563        if let Some(&index) = query_name_to_index.get(contig.name.as_str()) {
564            query_only_indices.push(index);
565        }
566    }
567
568    // Identify reference-only contigs (those not matched by any query contig)
569    let mut matched_ref_indices = std::collections::HashSet::new();
570    #[allow(clippy::cast_possible_truncation)] // Contig indices bounded by MAX_CONTIGS
571    for mapping in &exact_match_mappings {
572        if let Some(ref_idx) = mapping
573            .get("reference_index")
574            .and_then(serde_json::Value::as_u64)
575        {
576            matched_ref_indices.insert(ref_idx as usize);
577        }
578    }
579    #[allow(clippy::cast_possible_truncation)]
580    for mapping in &renamed_match_mappings {
581        if let Some(ref_idx) = mapping
582            .get("reference_index")
583            .and_then(serde_json::Value::as_u64)
584        {
585            matched_ref_indices.insert(ref_idx as usize);
586        }
587    }
588    #[allow(clippy::cast_possible_truncation)]
589    for mapping in &conflict_mappings {
590        if let Some(ref_idx) = mapping
591            .get("reference_index")
592            .and_then(serde_json::Value::as_u64)
593        {
594            matched_ref_indices.insert(ref_idx as usize);
595        }
596    }
597
598    for (i, _) in selected_match.reference.contigs.iter().enumerate() {
599        if !matched_ref_indices.contains(&i) {
600            reference_only_indices.push(i);
601        }
602    }
603
604    // Build response
605    #[allow(clippy::cast_possible_truncation)] // Processing time won't exceed u64
606    let processing_time = start_time.elapsed().as_millis() as u64;
607
608    Json(serde_json::json!({
609        "mode": "detailed",
610        "match_id": match_index,
611        "query": {
612            "contigs": query_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
613                let global_idx = query_start + page_idx;
614                // Determine match status for this contig
615                let match_status = if query_only_indices.contains(&global_idx) {
616                    "missing"
617                } else if conflict_mappings.iter().any(|c| c.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
618                    "conflict"
619                } else if renamed_match_mappings.iter().any(|r| r.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
620                    "renamed"
621                } else if exact_match_mappings.iter().any(|e| e.get("query_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
622                    "exact"
623                } else {
624                    "unknown"
625                };
626
627                serde_json::json!({
628                    "index": global_idx,
629                    "name": contig.name,
630                    "length": contig.length,
631                    "md5": contig.md5,
632                    "sequence_role": format!("{:?}", contig.sequence_role),
633                    "aliases": contig.aliases,
634                    "match_status": match_status
635                })
636            }).collect::<Vec<_>>(),
637            "pagination": {
638                "page": query_page,
639                "page_size": query_page_size,
640                "total_count": total_query_contigs,
641                "total_pages": total_query_contigs.div_ceil(query_page_size)
642            }
643        },
644        "reference": {
645            "id": selected_match.reference.id.0,
646            "display_name": selected_match.reference.display_name,
647            "assembly": format!("{}", selected_match.reference.assembly),
648            "contigs": ref_contigs_page.iter().enumerate().map(|(page_idx, contig)| {
649                let global_idx = ref_start + page_idx;
650                // Determine match status for this reference contig
651                let match_status = if reference_only_indices.contains(&global_idx) {
652                    "missing"
653                } else if conflict_mappings.iter().any(|c| c.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
654                    "conflict"
655                } else if renamed_match_mappings.iter().any(|r| r.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
656                    "renamed"
657                } else if exact_match_mappings.iter().any(|e| e.get("reference_index").and_then(serde_json::Value::as_u64).map(|i| i as usize) == Some(global_idx)) {
658                    "exact"
659                } else {
660                    "unknown"
661                };
662
663                serde_json::json!({
664                    "index": global_idx,
665                    "name": contig.name,
666                    "length": contig.length,
667                    "md5": contig.md5,
668                    "sequence_role": format!("{:?}", contig.sequence_role),
669                    "aliases": contig.aliases,
670                    "match_status": match_status
671                })
672            }).collect::<Vec<_>>(),
673            "pagination": {
674                "page": ref_page,
675                "page_size": ref_page_size,
676                "total_count": total_ref_contigs,
677                "total_pages": total_ref_contigs.div_ceil(ref_page_size)
678            }
679        },
680        "mappings": {
681            "exact_matches": exact_match_mappings,
682            "renamed_matches": renamed_match_mappings,
683            "conflicts": conflict_mappings,
684            "query_only": query_only_indices,
685            "reference_only": reference_only_indices
686        },
687        "match_summary": {
688            "match_type": format!("{:?}", selected_match.diagnosis.match_type),
689            "reordered": selected_match.diagnosis.reordered,
690            "score": {
691                "composite": selected_match.score.composite,
692                "confidence": format!("{:?}", selected_match.score.confidence)
693            }
694        },
695        "processing_info": {
696            "processing_time_ms": processing_time,
697            "configuration": {
698                "score_threshold": config.score_threshold,
699                "result_limit": config.result_limit,
700                "scoring_weights": config.scoring_weights,
701            }
702        }
703    }))
704    .into_response()
705}
706
707/// Extract input data and configuration from multipart form
708#[allow(clippy::too_many_lines)] // TODO: Refactor into smaller functions
709async fn extract_request_data(
710    multipart: &mut Multipart,
711) -> Result<(InputData, ConfigurationInfo), Response> {
712    let mut input_data = InputData {
713        text_content: None,
714        binary_content: None,
715        filename: None,
716        format: None,
717    };
718
719    let mut config = ConfigurationInfo {
720        score_threshold: 0.1, // Default 10%
721        result_limit: 10,
722        scoring_weights: ScoringWeights::default(),
723    };
724
725    let mut fields_received = 0usize;
726    let mut had_parse_error = false;
727
728    // Process multipart fields
729    loop {
730        // Check field count limit before processing
731        if fields_received >= MAX_MULTIPART_FIELDS {
732            return Err((
733                StatusCode::BAD_REQUEST,
734                Json(ErrorResponse {
735                    error: "Too many form fields".to_string(),
736                    error_type: "field_limit_exceeded".to_string(),
737                    details: None, // No internal details for security
738                }),
739            )
740                .into_response());
741        }
742
743        match multipart.next_field().await {
744            Ok(Some(field)) => {
745                fields_received += 1;
746                let name = field.name().unwrap_or_default().to_string();
747
748                match name.as_str() {
749                    "file" => {
750                        let filename = field.file_name().map(std::string::ToString::to_string);
751
752                        match field.bytes().await {
753                            Ok(bytes) => {
754                                // Validate field size before processing
755                                if bytes.len() > MAX_FILE_FIELD_SIZE {
756                                    return Err((
757                                        StatusCode::PAYLOAD_TOO_LARGE,
758                                        Json(ErrorResponse {
759                                            error: "File size exceeds limit".to_string(),
760                                            error_type: "file_too_large".to_string(),
761                                            details: None,
762                                        }),
763                                    )
764                                        .into_response());
765                                }
766
767                                // Detect format from filename for validation
768                                let detected_format = if let Some(ref name) = filename {
769                                    detect_binary_format(name).unwrap_or(FileFormat::Auto)
770                                } else {
771                                    FileFormat::Auto
772                                };
773
774                                // Use comprehensive validation function for security
775                                match validate_upload(filename.as_deref(), &bytes, detected_format)
776                                {
777                                    Ok(validated_filename) => {
778                                        input_data.filename = validated_filename;
779
780                                        // Detect if content is binary or text
781                                        if is_binary_content(&bytes) {
782                                            input_data.binary_content = Some(bytes.to_vec());
783                                            input_data.format = Some(detected_format);
784                                        } else {
785                                            input_data.text_content =
786                                                Some(String::from_utf8_lossy(&bytes).to_string());
787                                        }
788                                    }
789                                    Err(ValidationError::FilenameTooLong) => {
790                                        return Err((
791                                            StatusCode::BAD_REQUEST,
792                                            Json(create_safe_error_response(
793                                                "filename_too_long",
794                                                "Filename exceeds maximum length limit",
795                                                Some("Filename validation failed due to length constraints")
796                                            )),
797                                        ).into_response());
798                                    }
799                                    Err(ValidationError::InvalidFilename) => {
800                                        return Err((
801                                            StatusCode::BAD_REQUEST,
802                                            Json(create_safe_error_response(
803                                                "invalid_filename",
804                                                "Filename contains invalid or dangerous characters",
805                                                Some("Filename validation failed due to invalid characters")
806                                            )),
807                                        ).into_response());
808                                    }
809                                    Err(ValidationError::FormatValidationFailed) => {
810                                        return Err((
811                                            StatusCode::BAD_REQUEST,
812                                            Json(create_safe_error_response(
813                                                "format_mismatch",
814                                                "File content does not match the expected format based on filename",
815                                                Some("Format validation failed")
816                                            )),
817                                        ).into_response());
818                                    }
819                                    Err(ValidationError::InvalidFileContent) => {
820                                        return Err((
821                                            StatusCode::BAD_REQUEST,
822                                            Json(create_safe_error_response(
823                                                "invalid_content",
824                                                "File content appears malformed or corrupted",
825                                                None,
826                                            )),
827                                        )
828                                            .into_response());
829                                    }
830                                    Err(_) => {
831                                        return Err((
832                                            StatusCode::BAD_REQUEST,
833                                            Json(create_safe_error_response(
834                                                "validation_failed",
835                                                "File validation failed",
836                                                None,
837                                            )),
838                                        )
839                                            .into_response());
840                                    }
841                                }
842                            }
843                            Err(_) => had_parse_error = true,
844                        }
845                    }
846                    "header_text" => match field.text().await {
847                        Ok(text) => {
848                            // Validate text field size
849                            if text.len() > MAX_TEXT_FIELD_SIZE {
850                                return Err((
851                                    StatusCode::PAYLOAD_TOO_LARGE,
852                                    Json(ErrorResponse {
853                                        error: "Text field size exceeds limit".to_string(),
854                                        error_type: "text_too_large".to_string(),
855                                        details: None,
856                                    }),
857                                )
858                                    .into_response());
859                            }
860
861                            if !text.trim().is_empty() {
862                                input_data.text_content = Some(text);
863                            }
864                        }
865                        Err(_) => had_parse_error = true,
866                    },
867                    "score_threshold" => {
868                        if let Ok(text) = field.text().await {
869                            if let Ok(threshold) = text.parse::<f64>() {
870                                config.score_threshold = threshold.clamp(0.0, 1.0);
871                            }
872                        }
873                    }
874                    "result_limit" => {
875                        if let Ok(text) = field.text().await {
876                            if let Ok(limit) = text.parse::<usize>() {
877                                config.result_limit = limit.clamp(1, 50); // Reasonable limits
878                            }
879                        }
880                    }
881                    "scoring_weights" => {
882                        if let Ok(text) = field.text().await {
883                            if let Ok(weights) = serde_json::from_str::<HashMap<String, f64>>(&text)
884                            {
885                                config.scoring_weights = parse_scoring_weights(&weights);
886                            }
887                        }
888                    }
889                    _ => {} // Ignore unknown fields
890                }
891            }
892            Ok(None) => break, // No more fields
893            Err(_) => {
894                had_parse_error = true;
895                break;
896            }
897        }
898    }
899
900    // Validate that we have some input
901    if input_data.text_content.is_none() && input_data.binary_content.is_none() {
902        let error_msg = if had_parse_error {
903            "Failed to parse upload. Please check the file format."
904        } else if fields_received == 0 {
905            "No data received. Please upload a file or paste header text."
906        } else {
907            "No valid header data found in upload."
908        };
909
910        return Err((
911            StatusCode::BAD_REQUEST,
912            Json(create_safe_error_response(
913                "missing_input",
914                error_msg,
915                None, // Never include details for consistency
916            )),
917        )
918            .into_response());
919    }
920
921    Ok((input_data, config))
922}
923
924/// Parse input data using intelligent format detection
925fn parse_input_data(
926    input_data: &InputData,
927) -> Result<crate::core::header::QueryHeader, Box<Response>> {
928    if let Some(text_content) = &input_data.text_content {
929        // Text-based parsing with format detection
930        let Ok(detected_format) = detect_format(text_content, input_data.filename.as_deref())
931        else {
932            return Err(Box::new(
933                (
934                    StatusCode::BAD_REQUEST,
935                    Json(create_safe_error_response(
936                        "format_detection_failed",
937                        "Unable to detect file format. Please check the file type and try again.",
938                        Some("Format detection failed during parsing"),
939                    )),
940                )
941                    .into_response(),
942            ));
943        };
944
945        match parse_with_format(text_content, detected_format) {
946            Ok(query) => Ok(query),
947            Err(_) => Err(Box::new((
948                StatusCode::BAD_REQUEST,
949                Json(create_safe_error_response(
950                    "parse_failed",
951                    "Unable to process file content. Please check the file format and try again.",
952                    Some("File parsing failed during content processing"),
953                )),
954            )
955                .into_response())),
956        }
957    } else if let Some(binary_content) = &input_data.binary_content {
958        // Binary file parsing
959        let format = input_data.format.unwrap_or(FileFormat::Bam);
960
961        match parse_binary_file(binary_content, format) {
962            Ok(query) => Ok(query),
963            Err(_) => Err(Box::new((
964                StatusCode::BAD_REQUEST,
965                Json(create_safe_error_response(
966                    "binary_parse_failed",
967                    "Unable to process binary file. Please verify the file format and try again.",
968                    Some("Binary file parsing failed during processing"),
969                )),
970            )
971                .into_response())),
972        }
973    } else {
974        Err(Box::new(
975            (
976                StatusCode::INTERNAL_SERVER_ERROR,
977                Json(ErrorResponse {
978                    error: "Internal error: no input data".to_string(),
979                    error_type: "internal_error".to_string(),
980                    details: None,
981                }),
982            )
983                .into_response(),
984        ))
985    }
986}
987
988/// Check if content appears to be binary
989fn is_binary_content(bytes: &[u8]) -> bool {
990    // Simple heuristic: if more than 1% of first 1024 bytes are non-printable, consider binary
991    let sample_size = std::cmp::min(bytes.len(), 1024);
992
993    // For very small samples, use a minimum threshold to avoid false positives
994    if sample_size < 10 {
995        return false; // Assume text for very small samples
996    }
997
998    let non_printable_count = bytes[..sample_size]
999        .iter()
1000        .filter(|&&b| b < 9 || (b > 13 && b < 32) || b > 126)
1001        .count();
1002
1003    // Use floating-point math to maintain consistent 1% threshold
1004    count_to_f64(non_printable_count) > (count_to_f64(sample_size) * 0.01)
1005}
1006
1007/// Detect binary format from filename
1008fn detect_binary_format(filename: &str) -> Option<FileFormat> {
1009    let lower = filename.to_lowercase();
1010    if std::path::Path::new(&lower)
1011        .extension()
1012        .is_some_and(|ext| ext.eq_ignore_ascii_case("bam"))
1013    {
1014        Some(FileFormat::Bam)
1015    } else if std::path::Path::new(&lower)
1016        .extension()
1017        .is_some_and(|ext| ext.eq_ignore_ascii_case("cram"))
1018    {
1019        Some(FileFormat::Cram)
1020    } else {
1021        None
1022    }
1023}
1024
1025/// Parse scoring weights from frontend format
1026fn parse_scoring_weights(weights: &HashMap<String, f64>) -> ScoringWeights {
1027    // Note: The frontend sends percentages (0-100), but the backend expects ratios (0-1)
1028    // New scoring model: contig_match, coverage, order, and conflict_penalty
1029    let contig_match = weights.get("contigMatch").unwrap_or(&70.0) / 100.0;
1030    let coverage = weights.get("coverage").unwrap_or(&20.0) / 100.0;
1031    let order = weights.get("orderScore").unwrap_or(&10.0) / 100.0;
1032    // Conflict penalty is a multiplier (0-1), not a weight percentage
1033    let conflict_penalty = weights.get("conflictPenalty").unwrap_or(&10.0) / 100.0;
1034
1035    ScoringWeights {
1036        contig_match,
1037        coverage,
1038        order,
1039        conflict_penalty,
1040    }
1041}
1042
1043/// Return list of references in catalog
1044async fn catalog_handler(State(state): State<Arc<AppState>>) -> Json<serde_json::Value> {
1045    let refs: Vec<serde_json::Value> = state
1046        .catalog
1047        .references
1048        .iter()
1049        .map(|r| {
1050            serde_json::json!({
1051                "id": r.id.0,
1052                "display_name": r.display_name,
1053                "assembly": format!("{}", r.assembly),
1054                "source": format!("{}", r.source),
1055                "contig_count": r.contigs.len(),
1056                "has_decoy": r.has_decoy(),
1057                "has_alt": r.has_alt(),
1058                "tags": r.tags,
1059            })
1060        })
1061        .collect();
1062
1063    Json(serde_json::json!({
1064        "count": refs.len(),
1065        "references": refs,
1066    }))
1067}