1use std::collections::HashMap;
10use std::sync::Arc;
11use std::time::Duration;
12
13use chrono::{NaiveDate, Utc};
14use reqwest::{Client, StatusCode};
15use serde::Deserialize;
16use tokio::time::sleep;
17
18use crate::api_clients::SimpleEmbedder;
19use crate::ruvector_native::{Domain, SemanticVector};
20use crate::{FrameworkError, Result};
21
22const USPTO_RATE_LIMIT_MS: u64 = 200; const EPO_RATE_LIMIT_MS: u64 = 1000; const MAX_RETRIES: u32 = 3;
26const RETRY_DELAY_MS: u64 = 1000;
27
28#[derive(Debug, Deserialize)]
34struct UsptoPatentsResponse {
35 #[serde(default)]
36 patents: Vec<UsptoPatent>,
37 #[serde(default)]
38 count: i32,
39 #[serde(default)]
40 total_patent_count: Option<i32>,
41}
42
43#[derive(Debug, Deserialize)]
45struct UsptoPatent {
46 patent_number: String,
48 #[serde(default)]
50 patent_title: Option<String>,
51 #[serde(default)]
53 patent_abstract: Option<String>,
54 #[serde(default)]
56 patent_date: Option<String>,
57 #[serde(default)]
59 app_date: Option<String>,
60 #[serde(default)]
62 assignees: Vec<UsptoAssignee>,
63 #[serde(default)]
65 inventors: Vec<UsptoInventor>,
66 #[serde(default)]
68 cpcs: Vec<UsptoCpc>,
69 #[serde(default)]
71 cited_patent_count: Option<i32>,
72 #[serde(default)]
73 citedby_patent_count: Option<i32>,
74}
75
76#[derive(Debug, Deserialize)]
77struct UsptoAssignee {
78 #[serde(default)]
79 assignee_organization: Option<String>,
80 #[serde(default)]
81 assignee_individual_name_first: Option<String>,
82 #[serde(default)]
83 assignee_individual_name_last: Option<String>,
84}
85
86#[derive(Debug, Deserialize)]
87struct UsptoInventor {
88 #[serde(default)]
89 inventor_name_first: Option<String>,
90 #[serde(default)]
91 inventor_name_last: Option<String>,
92}
93
94#[derive(Debug, Deserialize)]
95struct UsptoCpc {
96 #[serde(default)]
98 cpc_section_id: Option<String>,
99 #[serde(default)]
101 cpc_subclass_id: Option<String>,
102 #[serde(default)]
104 cpc_group_id: Option<String>,
105}
106
107#[derive(Debug, Deserialize)]
109struct UsptoCitationsResponse {
110 #[serde(default)]
111 patents: Vec<UsptoCitation>,
112}
113
114#[derive(Debug, Deserialize)]
115struct UsptoCitation {
116 patent_number: String,
117 #[serde(default)]
118 patent_title: Option<String>,
119}
120
121pub struct UsptoPatentClient {
127 client: Client,
128 base_url: String,
129 rate_limit_delay: Duration,
130 embedder: Arc<SimpleEmbedder>,
131}
132
133impl UsptoPatentClient {
134 pub fn new() -> Result<Self> {
139 let client = Client::builder()
140 .timeout(Duration::from_secs(30))
141 .user_agent("RuVector-Discovery/1.0")
142 .build()
143 .map_err(FrameworkError::Network)?;
144
145 Ok(Self {
146 client,
147 base_url: "https://search.patentsview.org/api/v1".to_string(),
148 rate_limit_delay: Duration::from_millis(USPTO_RATE_LIMIT_MS),
149 embedder: Arc::new(SimpleEmbedder::new(512)), })
151 }
152
153 pub async fn search_patents(
165 &self,
166 query: &str,
167 max_results: usize,
168 ) -> Result<Vec<SemanticVector>> {
169 let per_page = max_results.min(100);
170 let encoded_query = urlencoding::encode(query);
171
172 let url = format!(
175 "{}/patent/?q=patent_title:*{}*%20OR%20patent_abstract:*{}*&f=patent_id,patent_title,patent_abstract,patent_date,assignees,inventors,cpcs&o={{\"size\":{},\"matched_subentities_only\":true}}",
176 self.base_url, encoded_query, encoded_query, per_page
177 );
178
179 sleep(self.rate_limit_delay).await;
180
181 let response = self.fetch_with_retry(&url).await?;
182 let uspto_response: UsptoPatentsResponse = response.json().await?;
183
184 self.convert_patents_to_vectors(uspto_response.patents)
185 }
186
187 pub async fn search_by_assignee(
198 &self,
199 company_name: &str,
200 max_results: usize,
201 ) -> Result<Vec<SemanticVector>> {
202 let per_page = max_results.min(100);
203 let encoded_name = urlencoding::encode(company_name);
204
205 let url = format!(
207 "{}/patent/?q=assignees.assignee_organization:*{}*&f=patent_id,patent_title,patent_abstract,patent_date,assignees,inventors,cpcs&o={{\"size\":{},\"matched_subentities_only\":true}}",
208 self.base_url, encoded_name, per_page
209 );
210
211 sleep(self.rate_limit_delay).await;
212
213 let response = self.fetch_with_retry(&url).await?;
214 let uspto_response: UsptoPatentsResponse = response.json().await?;
215
216 self.convert_patents_to_vectors(uspto_response.patents)
217 }
218
219 pub async fn search_by_cpc(
237 &self,
238 cpc_class: &str,
239 max_results: usize,
240 ) -> Result<Vec<SemanticVector>> {
241 let per_page = max_results.min(100);
242 let encoded_cpc = urlencoding::encode(cpc_class);
243
244 let url = format!(
246 "{}/patent/?q=cpcs.cpc_group:{}*&f=patent_id,patent_title,patent_abstract,patent_date,assignees,inventors,cpcs&o={{\"size\":{},\"matched_subentities_only\":true}}",
247 self.base_url, encoded_cpc, per_page
248 );
249
250 sleep(self.rate_limit_delay).await;
251
252 let response = self.fetch_with_retry(&url).await?;
253 let uspto_response: UsptoPatentsResponse = response.json().await?;
254
255 self.convert_patents_to_vectors(uspto_response.patents)
256 }
257
258 pub async fn get_patent(&self, patent_number: &str) -> Result<Option<SemanticVector>> {
268 let url = format!(
270 "{}/patent/?q=patent_id:{}&f=patent_id,patent_title,patent_abstract,patent_date,assignees,inventors,cpcs&o={{\"size\":1}}",
271 self.base_url, patent_number
272 );
273
274 sleep(self.rate_limit_delay).await;
275
276 let response = self.fetch_with_retry(&url).await?;
277 let uspto_response: UsptoPatentsResponse = response.json().await?;
278
279 let mut vectors = self.convert_patents_to_vectors(uspto_response.patents)?;
280 Ok(vectors.pop())
281 }
282
283 pub async fn get_citations(
291 &self,
292 patent_number: &str,
293 ) -> Result<(Vec<SemanticVector>, Vec<SemanticVector>)> {
294 let citing = self.get_citing_patents(patent_number).await?;
296
297 let cited = self.get_cited_patents(patent_number).await?;
299
300 Ok((citing, cited))
301 }
302
303 async fn get_citing_patents(&self, _patent_number: &str) -> Result<Vec<SemanticVector>> {
306 Ok(Vec::new())
310 }
311
312 async fn get_cited_patents(&self, _patent_number: &str) -> Result<Vec<SemanticVector>> {
315 Ok(Vec::new())
319 }
320
321 fn convert_patents_to_vectors(&self, patents: Vec<UsptoPatent>) -> Result<Vec<SemanticVector>> {
323 let mut vectors = Vec::new();
324
325 for patent in patents {
326 let title = patent.patent_title.unwrap_or_else(|| "Untitled Patent".to_string());
327 let abstract_text = patent.patent_abstract.unwrap_or_default();
328
329 let text = format!("{} {}", title, abstract_text);
331 let embedding = self.embedder.embed_text(&text);
332
333 let timestamp = patent
335 .patent_date
336 .or(patent.app_date)
337 .as_ref()
338 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
339 .and_then(|d| d.and_hms_opt(0, 0, 0))
340 .map(|dt| dt.and_utc())
341 .unwrap_or_else(Utc::now);
342
343 let assignees = patent
345 .assignees
346 .iter()
347 .map(|a| {
348 a.assignee_organization
349 .clone()
350 .or_else(|| {
351 let first = a.assignee_individual_name_first.as_deref().unwrap_or("");
352 let last = a.assignee_individual_name_last.as_deref().unwrap_or("");
353 if !first.is_empty() || !last.is_empty() {
354 Some(format!("{} {}", first, last).trim().to_string())
355 } else {
356 None
357 }
358 })
359 .unwrap_or_default()
360 })
361 .filter(|s| !s.is_empty())
362 .collect::<Vec<_>>()
363 .join(", ");
364
365 let inventors = patent
367 .inventors
368 .iter()
369 .filter_map(|i| {
370 let first = i.inventor_name_first.as_deref().unwrap_or("");
371 let last = i.inventor_name_last.as_deref().unwrap_or("");
372 if !first.is_empty() || !last.is_empty() {
373 Some(format!("{} {}", first, last).trim().to_string())
374 } else {
375 None
376 }
377 })
378 .collect::<Vec<_>>()
379 .join(", ");
380
381 let cpc_codes = patent
383 .cpcs
384 .iter()
385 .filter_map(|cpc| {
386 cpc.cpc_group_id
387 .clone()
388 .or_else(|| cpc.cpc_subclass_id.clone())
389 .or_else(|| cpc.cpc_section_id.clone())
390 })
391 .collect::<Vec<_>>()
392 .join(", ");
393
394 let mut metadata = HashMap::new();
396 metadata.insert("patent_number".to_string(), patent.patent_number.clone());
397 metadata.insert("title".to_string(), title);
398 metadata.insert("abstract".to_string(), abstract_text);
399 metadata.insert("assignee".to_string(), assignees);
400 metadata.insert("inventors".to_string(), inventors);
401 metadata.insert("cpc_codes".to_string(), cpc_codes);
402 metadata.insert(
403 "citations_count".to_string(),
404 patent.citedby_patent_count.unwrap_or(0).to_string(),
405 );
406 metadata.insert(
407 "cited_count".to_string(),
408 patent.cited_patent_count.unwrap_or(0).to_string(),
409 );
410 metadata.insert("source".to_string(), "uspto".to_string());
411
412 vectors.push(SemanticVector {
413 id: format!("US{}", patent.patent_number),
414 embedding,
415 domain: Domain::Research, timestamp,
417 metadata,
418 });
419 }
420
421 Ok(vectors)
422 }
423
424 async fn fetch_with_retry(&self, url: &str) -> Result<reqwest::Response> {
426 let mut retries = 0;
427 loop {
428 match self.client.get(url).send().await {
429 Ok(response) => {
430 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES
431 {
432 retries += 1;
433 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
434 continue;
435 }
436 if !response.status().is_success() {
437 return Err(FrameworkError::Network(
438 reqwest::Error::from(response.error_for_status().unwrap_err()),
439 ));
440 }
441 return Ok(response);
442 }
443 Err(_) if retries < MAX_RETRIES => {
444 retries += 1;
445 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
446 }
447 Err(e) => return Err(FrameworkError::Network(e)),
448 }
449 }
450 }
451
452 #[allow(dead_code)]
454 async fn post_with_retry(
455 &self,
456 url: &str,
457 json: &serde_json::Value,
458 ) -> Result<reqwest::Response> {
459 let mut retries = 0;
460 loop {
461 match self.client.post(url).json(json).send().await {
462 Ok(response) => {
463 if response.status() == StatusCode::TOO_MANY_REQUESTS && retries < MAX_RETRIES
464 {
465 retries += 1;
466 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
467 continue;
468 }
469 if !response.status().is_success() {
470 return Err(FrameworkError::Network(
471 reqwest::Error::from(response.error_for_status().unwrap_err()),
472 ));
473 }
474 return Ok(response);
475 }
476 Err(_) if retries < MAX_RETRIES => {
477 retries += 1;
478 sleep(Duration::from_millis(RETRY_DELAY_MS * retries as u64)).await;
479 }
480 Err(e) => return Err(FrameworkError::Network(e)),
481 }
482 }
483 }
484}
485
486impl Default for UsptoPatentClient {
487 fn default() -> Self {
488 Self::new().expect("Failed to create USPTO client")
489 }
490}
491
492pub struct EpoClient {
502 client: Client,
503 base_url: String,
504 consumer_key: Option<String>,
505 consumer_secret: Option<String>,
506 rate_limit_delay: Duration,
507 embedder: Arc<SimpleEmbedder>,
508}
509
510impl EpoClient {
511 pub fn new(consumer_key: Option<String>, consumer_secret: Option<String>) -> Result<Self> {
519 let client = Client::builder()
520 .timeout(Duration::from_secs(30))
521 .build()
522 .map_err(FrameworkError::Network)?;
523
524 Ok(Self {
525 client,
526 base_url: "https://ops.epo.org/3.2/rest-services".to_string(),
527 consumer_key,
528 consumer_secret,
529 rate_limit_delay: Duration::from_millis(EPO_RATE_LIMIT_MS),
530 embedder: Arc::new(SimpleEmbedder::new(512)),
531 })
532 }
533
534 pub async fn search_patents(
539 &self,
540 _query: &str,
541 _max_results: usize,
542 ) -> Result<Vec<SemanticVector>> {
543 Err(FrameworkError::Config(
544 "EPO client not yet implemented. Requires OAuth authentication.".to_string(),
545 ))
546 }
547}
548
549#[cfg(test)]
554mod tests {
555 use super::*;
556
557 #[tokio::test]
558 async fn test_uspto_client_creation() {
559 let client = UsptoPatentClient::new();
560 assert!(client.is_ok());
561 }
562
563 #[tokio::test]
564 async fn test_epo_client_creation() {
565 let client = EpoClient::new(None, None);
566 assert!(client.is_ok());
567 }
568
569 #[test]
570 fn test_default_client() {
571 let client = UsptoPatentClient::default();
572 assert_eq!(
573 client.rate_limit_delay,
574 Duration::from_millis(USPTO_RATE_LIMIT_MS)
575 );
576 }
577
578 #[test]
579 fn test_rate_limiting() {
580 let client = UsptoPatentClient::new().unwrap();
581 assert_eq!(
582 client.rate_limit_delay,
583 Duration::from_millis(USPTO_RATE_LIMIT_MS)
584 );
585 }
586
587 #[test]
588 fn test_cpc_classification_mapping() {
589 let test_cases = vec![
591 ("Y02", "cpc_section_id"),
592 ("G06N", "cpc_subclass_id"),
593 ("Y02E10/50", "cpc_group_id"),
594 ];
595
596 for (code, expected_field) in test_cases {
597 let field = if code.len() <= 3 {
598 "cpc_section_id"
599 } else if code.len() <= 4 {
600 "cpc_subclass_id"
601 } else {
602 "cpc_group_id"
603 };
604 assert_eq!(field, expected_field, "Failed for CPC code: {}", code);
605 }
606 }
607
608 #[tokio::test]
609 #[ignore] async fn test_search_patents_integration() {
611 let client = UsptoPatentClient::new().unwrap();
612
613 let result = client.search_patents("quantum computing", 5).await;
615
616 match result {
618 Ok(patents) => {
619 assert!(patents.len() <= 5);
620 for patent in patents {
621 assert!(patent.id.starts_with("US"));
622 assert_eq!(patent.domain, Domain::Research);
623 assert!(!patent.metadata.is_empty());
624 }
625 }
626 Err(e) => {
627 println!("Network test skipped: {}", e);
629 }
630 }
631 }
632
633 #[tokio::test]
634 #[ignore] async fn test_search_by_cpc_integration() {
636 let client = UsptoPatentClient::new().unwrap();
637
638 let result = client.search_by_cpc("G06N", 5).await;
640
641 match result {
642 Ok(patents) => {
643 assert!(patents.len() <= 5);
644 for patent in patents {
645 let cpc_codes = patent.metadata.get("cpc_codes").map(|s| s.as_str()).unwrap_or("");
646 assert!(
648 cpc_codes.contains("G06N") || cpc_codes.is_empty(),
649 "Expected G06N in CPC codes, got: {}",
650 cpc_codes
651 );
652 }
653 }
654 Err(e) => {
655 println!("Network test skipped: {}", e);
656 }
657 }
658 }
659
660 #[test]
661 fn test_embedding_dimension() {
662 let client = UsptoPatentClient::new().unwrap();
663 let embedding = client.embedder.embed_text("test patent description");
665 assert_eq!(embedding.len(), 512);
666 }
667}