use std::collections::HashSet;
use std::time::Duration;
use futures::future::join_all;
use crate::model::{Match, Query};
use crate::Result;
pub mod crates_io;
pub mod docker_hub;
pub mod github;
pub mod go;
pub mod hacker_news;
pub mod maven;
pub mod npm;
pub mod nuget;
pub mod pypi;
pub mod rubygems;
pub mod vscode;
#[async_trait::async_trait]
pub trait SourceAdapter: Send + Sync {
fn id(&self) -> crate::model::Source;
async fn search(&self, query: &Query) -> Result<Vec<Match>>;
}
use crate::model::Source as S;
fn http_client() -> reqwest::Client {
reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.user_agent(concat!(
"patent/",
env!("CARGO_PKG_VERSION"),
" (prior-art search)"
))
.build()
.expect("failed to build HTTP client")
}
fn idea_contains(idea: &str, terms: &[&str]) -> bool {
let lower = idea.to_lowercase();
let bytes = lower.as_bytes();
terms.iter().any(|t| {
lower.match_indices(t).any(|(pos, _)| {
let before = pos == 0 || !bytes[pos - 1].is_ascii_alphanumeric();
let after_pos = pos + t.len();
let after = after_pos >= bytes.len() || !bytes[after_pos].is_ascii_alphanumeric();
before && after
})
})
}
fn add(set: &mut HashSet<S>, sources: &[S]) {
set.extend(sources);
}
fn detect_sources(idea: &str) -> HashSet<S> {
let mut s = HashSet::new();
s.insert(S::GitHub);
s.insert(S::HackerNews);
if idea_contains(idea, &["rust", "crate", "cargo"]) {
s.insert(S::CratesIo);
}
if idea_contains(
idea,
&["npm", "node", "javascript", "typescript", "deno", "bun"],
) {
s.insert(S::Npm);
}
if idea_contains(
idea,
&["python", "pip", "django", "flask", "pytorch", "pandas"],
) {
s.insert(S::PyPI);
}
if idea_contains(idea, &["go", "golang", "goroutine"]) {
s.insert(S::Go);
}
if idea_contains(
idea,
&["java", "kotlin", "spring", "maven", "gradle", "scala"],
) {
s.insert(S::Maven);
}
if idea_contains(idea, &["ruby", "rails", "sinatra", "gem"]) {
s.insert(S::RubyGems);
}
if idea_contains(
idea,
&["c#", ".net", "csharp", "dotnet", "nuget", "blazor", "unity"],
) {
s.insert(S::NuGet);
}
if idea_contains(
idea,
&[
"ai",
"llm",
"machine learning",
"deep learning",
"neural",
"model training",
"inference",
"embedding",
"nlp",
"computer vision",
"data science",
"data pipeline",
],
) {
add(&mut s, &[S::PyPI, S::Npm]);
}
if idea_contains(idea, &["cli", "command line", "terminal tool", "shell"]) {
add(&mut s, &[S::CratesIo, S::Go, S::Npm, S::PyPI]);
}
if idea_contains(
idea,
&[
"frontend",
"react",
"vue",
"angular",
"svelte",
"browser",
"css",
"ui component",
"web component",
"spa",
],
) {
s.insert(S::Npm);
}
if idea_contains(
idea,
&[
"api",
"backend",
"rest",
"graphql",
"microservice",
"web server",
],
) {
add(&mut s, &[S::Npm, S::PyPI, S::Go]);
}
if idea_contains(
idea,
&[
"mobile",
"ios",
"android",
"react native",
"flutter",
"swift",
"swiftui",
],
) {
add(&mut s, &[S::Npm, S::Maven]);
}
if idea_contains(
idea,
&[
"game",
"graphics",
"rendering",
"opengl",
"vulkan",
"bevy",
"godot",
],
) {
add(&mut s, &[S::CratesIo, S::NuGet]);
}
if idea_contains(idea, &["embedded", "firmware", "microcontroller", "rtos"]) {
s.insert(S::CratesIo);
}
if idea_contains(
idea,
&[
"docker",
"container",
"kubernetes",
"k8s",
"helm",
"deploy",
"infrastructure",
],
) {
add(&mut s, &[S::DockerHub, S::Go]);
}
if idea_contains(idea, &["vscode", "extension", "plugin", "ide", "editor"]) {
add(&mut s, &[S::VsCodeMarketplace, S::Npm]);
}
const ALWAYS_ON: usize = 2; if s.len() <= ALWAYS_ON {
add(&mut s, &[S::Npm, S::PyPI, S::CratesIo]);
}
s
}
fn build_source(id: S, client: reqwest::Client) -> Box<dyn SourceAdapter> {
match id {
S::CratesIo => Box::new(crates_io::CratesIo::new(client)),
S::GitHub => Box::new(github::GitHub::new(client)),
S::Npm => Box::new(npm::Npm::new(client)),
S::PyPI => Box::new(pypi::PyPI::new(client)),
S::HackerNews => Box::new(hacker_news::HackerNews::new(client)),
S::Go => Box::new(go::GoPkgDev::new(client)),
S::Maven => Box::new(maven::Maven::new(client)),
S::RubyGems => Box::new(rubygems::RubyGems::new(client)),
S::DockerHub => Box::new(docker_hub::DockerHub::new(client)),
S::VsCodeMarketplace => Box::new(vscode::VsCodeMarketplace::new(client)),
S::NuGet => Box::new(nuget::NuGet::new(client)),
}
}
fn sources_for(query: &Query) -> Vec<Box<dyn SourceAdapter>> {
let client = http_client();
let ids = detect_sources(&query.idea);
ids.into_iter()
.map(|id| build_source(id, client.clone()))
.collect()
}
pub struct SearchOutcome {
pub matches: Vec<Match>,
pub reached: Vec<crate::model::Source>,
pub failed: Vec<crate::model::Source>,
}
pub async fn search_all(query: &Query) -> SearchOutcome {
search_sources(&sources_for(query), query).await
}
pub async fn search_sources(sources: &[Box<dyn SourceAdapter>], query: &Query) -> SearchOutcome {
let results = join_all(sources.iter().map(|s| {
let id = s.id();
async move {
let first = s.search(query).await;
if first.is_ok() {
return (id, first);
}
tokio::time::sleep(Duration::from_millis(800)).await;
(id, s.search(query).await)
}
}))
.await;
let mut reached = Vec::new();
let mut failed = Vec::new();
let mut all = Vec::new();
for (id, result) in results {
match result {
Ok(matches) => {
reached.push(id);
all.extend(matches);
}
Err(e) => {
eprintln!("⚠ {id} not reached: {e}");
failed.push(id);
}
}
}
SearchOutcome {
matches: dedup(all),
reached,
failed,
}
}
pub fn dedup(matches: Vec<Match>) -> Vec<Match> {
let mut seen = HashSet::new();
matches
.into_iter()
.filter(|m| seen.insert(m.url.clone()))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn idea_contains_respects_word_boundaries() {
assert!(idea_contains("a fast async runtime", &["async"]));
assert!(!idea_contains("rainbow trains", &["ai"]));
assert!(!idea_contains("googol", &["go"]));
assert!(!idea_contains("django framework", &["go"]));
}
#[test]
fn idea_contains_checks_all_occurrences_not_just_the_first() {
assert!(idea_contains(
"a tool for cargo packages written in go",
&["go"]
));
assert!(idea_contains("email summarizer that uses ai", &["ai"]));
assert!(idea_contains("a good way to go fast", &["go"]));
}
#[test]
fn github_and_hacker_news_are_always_selected() {
for idea in ["a ruby gem for parsing csv", "asdf qwer zxcv", "rust crate"] {
let s = detect_sources(idea);
assert!(s.contains(&S::GitHub), "GitHub missing for {idea:?}");
assert!(
s.contains(&S::HackerNews),
"Hacker News missing for {idea:?}"
);
}
}
#[test]
fn every_built_source_is_reachable_from_some_idea() {
let ideas = [
"rust crate for embedded firmware",
"a python pandas data pipeline",
"a typescript react frontend component",
"a golang microservice",
"a java spring boot service",
"a ruby on rails gem",
"a c# dotnet unity game",
"a docker container for kubernetes",
"a vscode extension for editors",
"anything at all with no signal",
];
let mut seen: HashSet<S> = HashSet::new();
for idea in ideas {
seen.extend(detect_sources(idea));
}
for variant in [
S::CratesIo,
S::GitHub,
S::Npm,
S::PyPI,
S::HackerNews,
S::Go,
S::Maven,
S::RubyGems,
S::DockerHub,
S::VsCodeMarketplace,
S::NuGet,
] {
assert!(
seen.contains(&variant),
"{variant} is built but never selected by detect_sources"
);
}
}
#[test]
fn language_mentions_select_their_registry() {
assert!(detect_sources("a rust crate for parsing").contains(&S::CratesIo));
assert!(detect_sources("a python library for parsing").contains(&S::PyPI));
assert!(detect_sources("a docker image for caching").contains(&S::DockerHub));
assert!(detect_sources("a ruby gem for parsing").contains(&S::RubyGems));
}
#[test]
fn go_and_ai_match_natural_phrasings() {
assert!(detect_sources("a fast Go library for parsing json").contains(&S::Go));
assert!(detect_sources("a library that uses AI to summarize text").contains(&S::PyPI));
assert!(detect_sources("a cargo workspace tool also written in go").contains(&S::Go));
}
#[test]
fn port_killer_demo_searches_npm() {
for idea in [
"interactive cli to kill whatever's on a port",
"CLI tool that kills whatever's on a port",
] {
let s = detect_sources(idea);
assert!(s.contains(&S::Npm), "npm missing for {idea:?}: {s:?}");
}
}
#[test]
fn no_signal_falls_back_to_broad_sweep() {
let s = detect_sources("asdf qwer zxcv hjkl");
assert!(s.contains(&S::Npm));
assert!(s.contains(&S::PyPI));
assert!(s.contains(&S::CratesIo));
}
}