nika_init/
showcase_fetch.rs

1//! Showcase Fetch — 15 workflows demonstrating ALL 9 extract modes + 3 response modes
2//!
3//! Complete coverage of the `fetch:` verb's extraction and response capabilities:
4//!
5//! **Extract modes (9):**
6//! - `markdown` — Full HTML to clean Markdown (htmd)
7//! - `article` — Main article content only (Readability/dom_smoothie)
8//! - `text` — Visible text, optionally filtered by `selector:`
9//! - `selector` — Raw HTML matching a CSS selector
10//! - `metadata` — OG tags, Twitter Cards, JSON-LD, SEO tags
11//! - `links` — Classified link list (internal/external, nav/content/footer)
12//! - `jsonpath` — JSONPath query on JSON API responses
13//! - `feed` — RSS/Atom/JSON Feed parsing (feed-rs)
14//! - `llm_txt` — AI-era content discovery (/.well-known/llm.txt, /llms.txt)
15//!
16//! **Response modes (3):**
17//! - `full` — JSON envelope: { status, headers, body, url }
18//! - `binary` — Store in CAS, return hash for media pipeline
19//! - (default) — Raw body text
20//!
21//! Workflows 01-09: one extract mode each
22//! Workflows 10-12: one response mode each
23//! Workflows 13-15: combos (multi-extract, feed-to-newsletter, scrape+analyze)
24//!
25//! All use `{{PROVIDER}}`/`{{MODEL}}` for LLM tasks. Pure-fetch workflows set
26//! `requires_llm: false` in comments.
27
28use super::WorkflowTemplate;
29
30/// Return all 15 showcase fetch workflows.
31pub fn get_showcase_fetch_workflows() -> Vec<WorkflowTemplate> {
32    vec![
33        WorkflowTemplate {
34            filename: "01-fetch-markdown.nika.yaml",
35            tier_dir: "showcase-fetch",
36            content: FETCH_01_MARKDOWN,
37        },
38        WorkflowTemplate {
39            filename: "02-fetch-article.nika.yaml",
40            tier_dir: "showcase-fetch",
41            content: FETCH_02_ARTICLE,
42        },
43        WorkflowTemplate {
44            filename: "03-fetch-text-selector.nika.yaml",
45            tier_dir: "showcase-fetch",
46            content: FETCH_03_TEXT,
47        },
48        WorkflowTemplate {
49            filename: "04-fetch-selector-html.nika.yaml",
50            tier_dir: "showcase-fetch",
51            content: FETCH_04_SELECTOR,
52        },
53        WorkflowTemplate {
54            filename: "05-fetch-metadata.nika.yaml",
55            tier_dir: "showcase-fetch",
56            content: FETCH_05_METADATA,
57        },
58        WorkflowTemplate {
59            filename: "06-fetch-links.nika.yaml",
60            tier_dir: "showcase-fetch",
61            content: FETCH_06_LINKS,
62        },
63        WorkflowTemplate {
64            filename: "07-fetch-jsonpath.nika.yaml",
65            tier_dir: "showcase-fetch",
66            content: FETCH_07_JSONPATH,
67        },
68        WorkflowTemplate {
69            filename: "08-fetch-feed.nika.yaml",
70            tier_dir: "showcase-fetch",
71            content: FETCH_08_FEED,
72        },
73        WorkflowTemplate {
74            filename: "09-fetch-llm-txt.nika.yaml",
75            tier_dir: "showcase-fetch",
76            content: FETCH_09_LLM_TXT,
77        },
78        WorkflowTemplate {
79            filename: "10-response-full.nika.yaml",
80            tier_dir: "showcase-fetch",
81            content: FETCH_10_RESPONSE_FULL,
82        },
83        WorkflowTemplate {
84            filename: "11-response-binary.nika.yaml",
85            tier_dir: "showcase-fetch",
86            content: FETCH_11_RESPONSE_BINARY,
87        },
88        WorkflowTemplate {
89            filename: "12-response-default.nika.yaml",
90            tier_dir: "showcase-fetch",
91            content: FETCH_12_RESPONSE_DEFAULT,
92        },
93        WorkflowTemplate {
94            filename: "13-multi-extract-comparison.nika.yaml",
95            tier_dir: "showcase-fetch",
96            content: FETCH_13_MULTI_EXTRACT,
97        },
98        WorkflowTemplate {
99            filename: "14-rss-to-newsletter.nika.yaml",
100            tier_dir: "showcase-fetch",
101            content: FETCH_14_RSS_NEWSLETTER,
102        },
103        WorkflowTemplate {
104            filename: "15-scrape-and-analyze.nika.yaml",
105            tier_dir: "showcase-fetch",
106            content: FETCH_15_SCRAPE_ANALYZE,
107        },
108    ]
109}
110
111// =============================================================================
112// 01 — Markdown Extraction
113// fetch: blog URL -> extract:markdown -> artifact
114// =============================================================================
115
116const FETCH_01_MARKDOWN: &str = r##"# =============================================================================
117# SHOWCASE FETCH 01 — Markdown Extraction
118# =============================================================================
119# requires_llm: false
120# category: fetch-extract
121# features: fetch-markdown
122#
123# Fetches the Rust Blog and converts the entire HTML page to clean Markdown.
124# The htmd library strips navigation, scripts, styles, and produces
125# LLM-ready content. Artifact saves the result to disk.
126#
127# Run: nika run workflows/showcase-fetch/01-fetch-markdown.nika.yaml
128
129schema: "nika/workflow@0.12"
130workflow: fetch-markdown-showcase
131description: "Convert a blog homepage to clean Markdown via extract: markdown"
132
133artifacts:
134  dir: .output/showcase-fetch
135
136tasks:
137  - id: fetch_blog
138    description: "Fetch Rust Blog and convert to Markdown"
139    fetch:
140      url: "https://blog.rust-lang.org/"
141      extract: markdown
142      timeout: 20
143    artifact:
144      path: rust-blog-markdown.md
145
146  - id: log_size
147    depends_on: [fetch_blog]
148    with:
149      content: $fetch_blog
150    exec:
151      command: |
152        echo "Markdown extraction complete. Content length: $(echo '{{with.content}}' | wc -c | tr -d ' ') bytes"
153      shell: true
154"##;
155
156// =============================================================================
157// 02 — Article Extraction
158// fetch: news URL -> extract:article -> artifact
159// =============================================================================
160
161const FETCH_02_ARTICLE: &str = r##"# =============================================================================
162# SHOWCASE FETCH 02 — Article Extraction (Readability)
163# =============================================================================
164# requires_llm: false
165# category: fetch-extract
166# features: fetch-article
167#
168# Extracts only the main article content from a webpage using the
169# Readability algorithm (dom_smoothie). Strips navigation, ads, sidebars,
170# cookie banners — leaving just the primary reading content.
171#
172# Run: nika run workflows/showcase-fetch/02-fetch-article.nika.yaml
173
174schema: "nika/workflow@0.12"
175workflow: fetch-article-showcase
176description: "Extract main article content with Readability via extract: article"
177
178artifacts:
179  dir: .output/showcase-fetch
180
181tasks:
182  - id: fetch_article
183    description: "Extract article content from Rust Blog"
184    fetch:
185      url: "https://blog.rust-lang.org/"
186      extract: article
187      timeout: 20
188    artifact:
189      path: rust-blog-article.md
190
191  - id: log_result
192    depends_on: [fetch_article]
193    with:
194      article: $fetch_article
195    invoke:
196      tool: "nika:log"
197      params:
198        level: "info"
199        message: "Article extraction complete — content ready for LLM consumption"
200"##;
201
202// =============================================================================
203// 03 — Text Extraction with CSS Selector
204// fetch: URL -> extract:text -> selector: "article p"
205// =============================================================================
206
207const FETCH_03_TEXT: &str = r##"# =============================================================================
208# SHOWCASE FETCH 03 — Text Extraction with CSS Selector
209# =============================================================================
210# requires_llm: false
211# category: fetch-extract
212# features: fetch-html
213#
214# Extracts visible text from a webpage. When combined with selector:,
215# only text from matching CSS elements is returned. Without selector:,
216# returns all visible text (no HTML tags).
217#
218# Run: nika run workflows/showcase-fetch/03-fetch-text-selector.nika.yaml
219
220schema: "nika/workflow@0.12"
221workflow: fetch-text-selector-showcase
222description: "Extract visible text filtered by CSS selector via extract: text"
223
224artifacts:
225  dir: .output/showcase-fetch
226
227tasks:
228  # Text from specific elements only
229  - id: fetch_paragraphs
230    description: "Extract paragraph text from httpbin HTML page"
231    fetch:
232      url: "https://httpbin.org/html"
233      extract: text
234      selector: "p"
235      timeout: 15
236    artifact:
237      path: httpbin-paragraphs.txt
238
239  # All visible text (no selector)
240  - id: fetch_all_text
241    description: "Extract all visible text from httpbin HTML page"
242    fetch:
243      url: "https://httpbin.org/html"
244      extract: text
245      timeout: 15
246    artifact:
247      path: httpbin-all-text.txt
248
249  - id: compare_sizes
250    depends_on: [fetch_paragraphs, fetch_all_text]
251    with:
252      filtered: $fetch_paragraphs
253      full: $fetch_all_text
254    invoke:
255      tool: "nika:log"
256      params:
257        level: "info"
258        message: "Filtered paragraphs vs full text extracted successfully"
259"##;
260
261// =============================================================================
262// 04 — Raw HTML Selector
263// fetch: URL -> extract:selector -> selector: "h1, h2, h3"
264// =============================================================================
265
266const FETCH_04_SELECTOR: &str = r##"# =============================================================================
267# SHOWCASE FETCH 04 — Raw HTML Selector Extraction
268# =============================================================================
269# requires_llm: false
270# category: fetch-extract
271# features: fetch-html
272#
273# Returns the raw HTML of elements matching a CSS selector. Unlike
274# extract: text (which strips tags), this preserves the HTML structure.
275# Useful for scraping specific DOM fragments.
276#
277# Run: nika run workflows/showcase-fetch/04-fetch-selector-html.nika.yaml
278
279schema: "nika/workflow@0.12"
280workflow: fetch-selector-html-showcase
281description: "Extract raw HTML matching CSS selectors via extract: selector"
282
283artifacts:
284  dir: .output/showcase-fetch
285
286tasks:
287  - id: fetch_headings
288    description: "Extract all heading elements from httpbin HTML"
289    fetch:
290      url: "https://httpbin.org/html"
291      extract: selector
292      selector: "h1"
293      timeout: 15
294    artifact:
295      path: httpbin-headings.html
296
297  - id: fetch_paragraphs_html
298    description: "Extract paragraph HTML from httpbin"
299    fetch:
300      url: "https://httpbin.org/html"
301      extract: selector
302      selector: "p"
303      timeout: 15
304    artifact:
305      path: httpbin-paragraphs.html
306
307  - id: log_done
308    depends_on: [fetch_headings, fetch_paragraphs_html]
309    invoke:
310      tool: "nika:log"
311      params:
312        level: "info"
313        message: "Raw HTML selector extraction complete — headings and paragraphs captured"
314"##;
315
316// =============================================================================
317// 05 — Metadata Extraction
318// fetch: URL -> extract:metadata -> structured OG/Twitter/JSON-LD
319// =============================================================================
320
321const FETCH_05_METADATA: &str = r##"# =============================================================================
322# SHOWCASE FETCH 05 — Metadata Extraction (OG / Twitter / JSON-LD / SEO)
323# =============================================================================
324# requires_llm: false
325# category: fetch-extract
326# features: fetch-html
327#
328# Extracts structured metadata from a webpage: Open Graph tags,
329# Twitter Cards, JSON-LD structured data, and basic SEO tags
330# (title, description, canonical URL, etc.). Returns JSON.
331#
332# Run: nika run workflows/showcase-fetch/05-fetch-metadata.nika.yaml
333
334schema: "nika/workflow@0.12"
335workflow: fetch-metadata-showcase
336description: "Extract OG, Twitter Cards, JSON-LD, and SEO metadata via extract: metadata"
337
338artifacts:
339  dir: .output/showcase-fetch
340
341tasks:
342  - id: github_metadata
343    description: "Extract metadata from GitHub homepage"
344    fetch:
345      url: "https://github.com"
346      extract: metadata
347      timeout: 15
348    artifact:
349      path: github-metadata.json
350      format: json
351
352  - id: rust_blog_metadata
353    description: "Extract metadata from Rust Blog"
354    fetch:
355      url: "https://blog.rust-lang.org/"
356      extract: metadata
357      timeout: 15
358    artifact:
359      path: rust-blog-metadata.json
360      format: json
361
362  - id: log_metadata
363    depends_on: [github_metadata, rust_blog_metadata]
364    with:
365      gh: $github_metadata
366      rust: $rust_blog_metadata
367    invoke:
368      tool: "nika:log"
369      params:
370        level: "info"
371        message: "Metadata extraction complete for GitHub and Rust Blog"
372"##;
373
374// =============================================================================
375// 06 — Link Extraction
376// fetch: URL -> extract:links -> internal/external classification
377// =============================================================================
378
379const FETCH_06_LINKS: &str = r##"# =============================================================================
380# SHOWCASE FETCH 06 — Link Extraction and Classification
381# =============================================================================
382# requires_llm: false
383# category: fetch-extract
384# features: fetch-html
385#
386# Extracts all links from a webpage and classifies them:
387# - Internal vs external
388# - Navigation vs content vs footer
389# Returns structured JSON with URL, text, type, and zone.
390#
391# Run: nika run workflows/showcase-fetch/06-fetch-links.nika.yaml
392
393schema: "nika/workflow@0.12"
394workflow: fetch-links-showcase
395description: "Extract and classify links via extract: links"
396
397artifacts:
398  dir: .output/showcase-fetch
399
400tasks:
401  - id: extract_links
402    description: "Extract and classify all links from Hacker News"
403    fetch:
404      url: "https://news.ycombinator.com"
405      extract: links
406      timeout: 15
407    artifact:
408      path: hn-links.json
409      format: json
410
411  - id: log_links
412    depends_on: [extract_links]
413    with:
414      links: $extract_links
415    invoke:
416      tool: "nika:log"
417      params:
418        level: "info"
419        message: "Link extraction complete — internal/external classification ready"
420"##;
421
422// =============================================================================
423// 07 — JSONPath Extraction
424// fetch: API -> extract:jsonpath -> selector: "$.data[*].name"
425// =============================================================================
426
427const FETCH_07_JSONPATH: &str = r##"# =============================================================================
428# SHOWCASE FETCH 07 — JSONPath Extraction
429# =============================================================================
430# requires_llm: false
431# category: fetch-extract
432#
433# Queries JSON APIs using JSONPath expressions. Zero external dependencies —
434# JSONPath is always available. The selector: field holds the JSONPath query.
435# Surgical extraction from massive JSON payloads.
436#
437# Run: nika run workflows/showcase-fetch/07-fetch-jsonpath.nika.yaml
438
439schema: "nika/workflow@0.12"
440workflow: fetch-jsonpath-showcase
441description: "Extract specific fields from JSON APIs via extract: jsonpath"
442
443artifacts:
444  dir: .output/showcase-fetch
445
446tasks:
447  # JSONPath on httpbin structured JSON
448  - id: slideshow_title
449    description: "Extract slideshow title from httpbin JSON"
450    fetch:
451      url: "https://httpbin.org/json"
452      extract: jsonpath
453      selector: "$.slideshow.title"
454      timeout: 10
455    artifact:
456      path: slideshow-title.json
457      format: json
458
459  # JSONPath on nested array
460  - id: slide_titles
461    description: "Extract all slide titles from httpbin JSON"
462    fetch:
463      url: "https://httpbin.org/json"
464      extract: jsonpath
465      selector: "$.slideshow.slides[*].title"
466      timeout: 10
467    artifact:
468      path: slide-titles.json
469      format: json
470
471  # JSONPath on Hacker News Algolia API
472  - id: hn_search
473    description: "Search Hacker News and extract story titles"
474    fetch:
475      url: "https://hn.algolia.com/api/v1/search?query=rust&tags=story&hitsPerPage=5"
476      extract: jsonpath
477      selector: "$.hits[*].title"
478      timeout: 15
479    artifact:
480      path: hn-rust-titles.json
481      format: json
482
483  - id: log_results
484    depends_on: [slideshow_title, slide_titles, hn_search]
485    with:
486      title: $slideshow_title
487      slides: $slide_titles
488      hn: $hn_search
489    invoke:
490      tool: "nika:log"
491      params:
492        level: "info"
493        message: "JSONPath extraction complete — 3 queries across 2 APIs"
494"##;
495
496// =============================================================================
497// 08 — RSS/Atom Feed Parsing
498// fetch: RSS URL -> extract:feed -> structured entries
499// =============================================================================
500
501const FETCH_08_FEED: &str = r##"# =============================================================================
502# SHOWCASE FETCH 08 — RSS/Atom Feed Parsing
503# =============================================================================
504# requires_llm: false
505# category: fetch-extract
506# features: fetch-feed
507#
508# Parses RSS, Atom, and JSON Feed formats using the feed-rs library.
509# Returns structured JSON with title, entries, dates, authors, and links.
510# Works with any standard syndication feed.
511#
512# Run: nika run workflows/showcase-fetch/08-fetch-feed.nika.yaml
513
514schema: "nika/workflow@0.12"
515workflow: fetch-feed-showcase
516description: "Parse RSS/Atom feeds into structured JSON via extract: feed"
517
518artifacts:
519  dir: .output/showcase-fetch
520
521tasks:
522  - id: rust_feed
523    description: "Parse the Rust Blog Atom feed"
524    fetch:
525      url: "https://blog.rust-lang.org/feed.xml"
526      extract: feed
527      timeout: 15
528    artifact:
529      path: rust-feed.json
530      format: json
531
532  - id: log_feed
533    depends_on: [rust_feed]
534    with:
535      feed: $rust_feed
536    invoke:
537      tool: "nika:log"
538      params:
539        level: "info"
540        message: "RSS feed parsed — entries extracted and structured as JSON"
541"##;
542
543// =============================================================================
544// 09 — LLM.txt Discovery
545// fetch: URL -> extract:llm_txt -> display content
546// =============================================================================
547
548const FETCH_09_LLM_TXT: &str = r##"# =============================================================================
549# SHOWCASE FETCH 09 — LLM.txt Content Discovery
550# =============================================================================
551# requires_llm: false
552# category: fetch-extract
553#
554# AI-era content discovery. Checks for /.well-known/llm.txt and /llms.txt
555# files that websites publish to help LLMs understand their content.
556# Part of the llms.txt standard for AI-friendly web content.
557#
558# Run: nika run workflows/showcase-fetch/09-fetch-llm-txt.nika.yaml
559
560schema: "nika/workflow@0.12"
561workflow: fetch-llm-txt-showcase
562description: "Discover AI content via extract: llm_txt"
563
564artifacts:
565  dir: .output/showcase-fetch
566
567tasks:
568  - id: check_anthropic
569    description: "Check Anthropic docs for llm.txt"
570    fetch:
571      url: "https://docs.anthropic.com"
572      extract: llm_txt
573      timeout: 15
574    artifact:
575      path: anthropic-llm-txt.md
576
577  - id: log_discovery
578    depends_on: [check_anthropic]
579    with:
580      result: $check_anthropic
581    invoke:
582      tool: "nika:log"
583      params:
584        level: "info"
585        message: "LLM.txt discovery complete"
586"##;
587
588// =============================================================================
589// 10 — Full Response Envelope
590// fetch: URL -> response:full -> status + headers + body + url
591// =============================================================================
592
593const FETCH_10_RESPONSE_FULL: &str = r##"# =============================================================================
594# SHOWCASE FETCH 10 — Full Response Envelope
595# =============================================================================
596# requires_llm: false
597# category: fetch-response
598#
599# Returns the complete HTTP response as a JSON envelope containing:
600# - status: HTTP status code
601# - headers: all response headers
602# - body: response body text
603# - url: after redirect resolution
604#
605# Perfect for debugging redirects, checking security headers, API monitoring.
606#
607# Run: nika run workflows/showcase-fetch/10-response-full.nika.yaml
608
609schema: "nika/workflow@0.12"
610workflow: fetch-response-full-showcase
611description: "Inspect complete HTTP response via response: full"
612
613artifacts:
614  dir: .output/showcase-fetch
615
616tasks:
617  # Full response from a simple GET
618  - id: get_full
619    description: "Fetch httpbin GET with full response envelope"
620    fetch:
621      url: "https://httpbin.org/get"
622      response: full
623      timeout: 10
624    artifact:
625      path: httpbin-full-response.json
626      format: json
627
628  # Full response showing headers
629  - id: inspect_headers
630    description: "Fetch httpbin headers with full envelope"
631    fetch:
632      url: "https://httpbin.org/headers"
633      response: full
634      timeout: 10
635    artifact:
636      path: httpbin-headers-full.json
637      format: json
638
639  - id: log_responses
640    depends_on: [get_full, inspect_headers]
641    with:
642      get: $get_full
643      headers: $inspect_headers
644    invoke:
645      tool: "nika:log"
646      params:
647        level: "info"
648        message: "Full response envelopes captured — status, headers, body, and url available"
649"##;
650
651// =============================================================================
652// 11 — Binary Response (CAS storage)
653// fetch: image URL -> response:binary -> nika:import -> nika:dimensions
654// =============================================================================
655
656const FETCH_11_RESPONSE_BINARY: &str = r##"# =============================================================================
657# SHOWCASE FETCH 11 — Binary Response + Media Pipeline
658# =============================================================================
659# requires_llm: false
660# category: fetch-response
661#
662# Downloads a binary file (image) into content-addressable storage (CAS).
663# The task output is the CAS hash, which can be piped into media tools
664# like nika:dimensions and nika:thumbhash for further processing.
665#
666# Run: nika run workflows/showcase-fetch/11-response-binary.nika.yaml
667
668schema: "nika/workflow@0.12"
669workflow: fetch-response-binary-showcase
670description: "Download binary into CAS and extract dimensions via response: binary"
671
672artifacts:
673  dir: .output/showcase-fetch
674
675tasks:
676  # Download a PNG image into CAS
677  - id: download_image
678    description: "Download a PNG image into content-addressable storage"
679    fetch:
680      url: "https://httpbin.org/image/png"
681      response: binary
682      timeout: 15
683    artifact:
684      path: downloaded-image.png
685      format: binary
686
687  # Extract dimensions from the downloaded image
688  - id: get_dimensions
689    depends_on: [download_image]
690    with:
691      img: $download_image
692    invoke:
693      tool: "nika:dimensions"
694      params:
695        hash: "{{with.img.hash}}"
696
697  # Generate a thumbhash placeholder
698  - id: get_thumbhash
699    depends_on: [download_image]
700    with:
701      img: $download_image
702    invoke:
703      tool: "nika:thumbhash"
704      params:
705        hash: "{{with.img.hash}}"
706
707  - id: log_media
708    depends_on: [get_dimensions, get_thumbhash]
709    with:
710      dims: $get_dimensions
711      hash: $get_thumbhash
712    invoke:
713      tool: "nika:log"
714      params:
715        level: "info"
716        message: "Binary download + media pipeline complete — dimensions and thumbhash extracted"
717"##;
718
719// =============================================================================
720// 12 — Default Text Response
721// fetch: URL -> (no response: field) -> display raw body
722// =============================================================================
723
724const FETCH_12_RESPONSE_DEFAULT: &str = r##"# =============================================================================
725# SHOWCASE FETCH 12 — Default Text Response
726# =============================================================================
727# requires_llm: false
728# category: fetch-response
729#
730# When no response: field is specified, fetch returns the raw body text.
731# This is the simplest mode — no JSON envelope, no CAS storage.
732# Just the HTTP response body as a string.
733#
734# Run: nika run workflows/showcase-fetch/12-response-default.nika.yaml
735
736schema: "nika/workflow@0.12"
737workflow: fetch-response-default-showcase
738description: "Fetch raw body text with default response mode (no response: field)"
739
740artifacts:
741  dir: .output/showcase-fetch
742
743tasks:
744  # Default response — just the body text
745  - id: fetch_ip
746    description: "Fetch public IP as raw JSON text"
747    fetch:
748      url: "https://httpbin.org/ip"
749      timeout: 10
750    artifact:
751      path: public-ip.txt
752
753  # Another default fetch — UUID
754  - id: fetch_uuid
755    description: "Fetch a random UUID as raw text"
756    fetch:
757      url: "https://httpbin.org/uuid"
758      timeout: 10
759    artifact:
760      path: random-uuid.txt
761
762  # Default fetch from a JSON API
763  - id: fetch_json_raw
764    description: "Fetch httpbin JSON as raw text (no extraction)"
765    fetch:
766      url: "https://httpbin.org/json"
767      timeout: 10
768    artifact:
769      path: raw-json-body.txt
770
771  - id: log_defaults
772    depends_on: [fetch_ip, fetch_uuid, fetch_json_raw]
773    with:
774      ip: $fetch_ip
775      uuid: $fetch_uuid
776    invoke:
777      tool: "nika:log"
778      params:
779        level: "info"
780        message: "Default response mode — raw body text captured for all 3 endpoints"
781"##;
782
783// =============================================================================
784// 13 — Multi-Extract Comparison
785// fetch: same URL -> markdown vs article vs text -> LLM compare
786// =============================================================================
787
788const FETCH_13_MULTI_EXTRACT: &str = r##"# =============================================================================
789# SHOWCASE FETCH 13 — Multi-Extract Comparison
790# =============================================================================
791# requires_llm: true
792# category: fetch-combo
793# features: fetch-markdown, fetch-article, fetch-html
794#
795# Fetches the SAME URL with 3 different extract modes (markdown, article,
796# text) and asks an LLM to compare the results. Shows how each mode
797# produces different output from identical source HTML.
798#
799# Run: nika run workflows/showcase-fetch/13-multi-extract-comparison.nika.yaml
800
801schema: "nika/workflow@0.12"
802workflow: multi-extract-comparison
803description: "Compare markdown vs article vs text extraction on the same URL"
804provider: "{{PROVIDER}}"
805model: "{{MODEL}}"
806
807artifacts:
808  dir: .output/showcase-fetch
809
810tasks:
811  # Same URL, three extraction modes
812  - id: as_markdown
813    description: "Full Markdown extraction"
814    fetch:
815      url: "https://blog.rust-lang.org/"
816      extract: markdown
817      timeout: 20
818    artifact:
819      path: comparison-markdown.md
820
821  - id: as_article
822    description: "Article-only extraction (Readability)"
823    fetch:
824      url: "https://blog.rust-lang.org/"
825      extract: article
826      timeout: 20
827    artifact:
828      path: comparison-article.md
829
830  - id: as_text
831    description: "Plain text extraction"
832    fetch:
833      url: "https://blog.rust-lang.org/"
834      extract: text
835      timeout: 20
836    artifact:
837      path: comparison-text.txt
838
839  # LLM compares all three outputs
840  - id: compare
841    description: "LLM analysis of extraction mode differences"
842    depends_on: [as_markdown, as_article, as_text]
843    with:
844      md: $as_markdown
845      article: $as_article
846      text: $as_text
847    infer:
848      prompt: |
849        Compare these 3 extraction modes applied to the same URL (blog.rust-lang.org):
850
851        ## 1. extract: markdown (first 1500 chars)
852        {{with.md | first(1500)}}
853
854        ## 2. extract: article (first 1500 chars)
855        {{with.article | first(1500)}}
856
857        ## 3. extract: text (first 1500 chars)
858        {{with.text | first(1500)}}
859
860        Analyze:
861        1. What does each mode preserve vs strip?
862        2. Which is best for LLM summarization?
863        3. Which is best for data extraction?
864        4. Which is best for human reading?
865        5. When would you pick each one?
866
867        Be specific about the structural differences you observe.
868      max_tokens: 600
869    artifact:
870      path: extraction-comparison-report.md
871      template: |
872        # Multi-Extract Comparison Report
873
874        {{output}}
875"##;
876
877// =============================================================================
878// 14 — RSS to Newsletter
879// fetch: 3 feeds -> extract:feed -> for_each entries -> infer summarize
880// =============================================================================
881
882const FETCH_14_RSS_NEWSLETTER: &str = r##"# =============================================================================
883# SHOWCASE FETCH 14 — RSS Feed to Newsletter Pipeline
884# =============================================================================
885# requires_llm: true
886# category: fetch-combo
887# features: fetch-feed
888#
889# Fetches an RSS feed, then uses an LLM to summarize the entries into
890# a newsletter-style digest. Demonstrates extract: feed piped into
891# infer: for AI-powered content curation.
892#
893# Run: nika run workflows/showcase-fetch/14-rss-to-newsletter.nika.yaml
894
895schema: "nika/workflow@0.12"
896workflow: rss-to-newsletter
897description: "Fetch RSS feed and generate an AI-curated newsletter digest"
898provider: "{{PROVIDER}}"
899model: "{{MODEL}}"
900
901artifacts:
902  dir: .output/showcase-fetch
903
904tasks:
905  # Phase 1: Fetch the Rust Blog feed
906  - id: rust_feed
907    description: "Parse Rust Blog Atom feed"
908    fetch:
909      url: "https://blog.rust-lang.org/feed.xml"
910      extract: feed
911      timeout: 15
912    artifact:
913      path: newsletter-rust-feed.json
914      format: json
915
916  # Phase 2: Summarize into a newsletter digest
917  - id: create_digest
918    description: "Generate newsletter digest from feed entries"
919    depends_on: [rust_feed]
920    with:
921      feed: $rust_feed
922    infer:
923      prompt: |
924        You are a tech newsletter curator. Create a concise weekly digest
925        from this RSS feed data.
926
927        FEED DATA:
928        {{with.feed}}
929
930        Format as a newsletter with:
931        1. A catchy header with the feed name
932        2. Top 5 most recent entries, each with:
933           - Title (as a heading)
934           - 2-sentence summary of what the post covers
935           - Why it matters for Rust developers
936        3. A "Quick Links" section with remaining entry titles
937        4. A brief editorial closing paragraph
938
939        Write in an engaging but professional tone.
940      max_tokens: 800
941    artifact:
942      path: rust-newsletter-digest.md
943      template: |
944        {{output}}
945
946  - id: log_done
947    depends_on: [create_digest]
948    invoke:
949      tool: "nika:log"
950      params:
951        level: "info"
952        message: "Newsletter digest generated from RSS feed"
953"##;
954
955// =============================================================================
956// 15 — Scrape + Analyze
957// fetch: metadata -> infer: SEO analysis -> structured report -> artifact
958// =============================================================================
959
960const FETCH_15_SCRAPE_ANALYZE: &str = r##"# =============================================================================
961# SHOWCASE FETCH 15 — Scrape + SEO Analysis Pipeline
962# =============================================================================
963# requires_llm: true
964# category: fetch-combo
965# features: fetch-html
966#
967# Fetches metadata and links from a website, then uses an LLM to produce
968# a structured SEO analysis report. Combines extract: metadata and
969# extract: links with structured output and artifact generation.
970#
971# Run: nika run workflows/showcase-fetch/15-scrape-and-analyze.nika.yaml
972
973schema: "nika/workflow@0.12"
974workflow: scrape-and-analyze
975description: "Scrape metadata + links, then generate a structured SEO report"
976provider: "{{PROVIDER}}"
977model: "{{MODEL}}"
978
979artifacts:
980  dir: .output/showcase-fetch
981
982tasks:
983  # Phase 1: Extract metadata
984  - id: scrape_metadata
985    description: "Extract OG, Twitter Cards, JSON-LD, and SEO tags"
986    fetch:
987      url: "https://github.com"
988      extract: metadata
989      timeout: 15
990    artifact:
991      path: seo-metadata.json
992      format: json
993
994  # Phase 2: Extract and classify links
995  - id: scrape_links
996    description: "Extract and classify all links"
997    fetch:
998      url: "https://github.com"
999      extract: links
1000      timeout: 15
1001    artifact:
1002      path: seo-links.json
1003      format: json
1004
1005  # Phase 3: Fetch full response for header analysis
1006  - id: check_headers
1007    description: "Inspect HTTP response headers for security and caching"
1008    fetch:
1009      url: "https://github.com"
1010      response: full
1011      timeout: 15
1012
1013  # Phase 4: LLM analyzes everything
1014  - id: seo_analysis
1015    description: "AI-powered SEO analysis from scraped data"
1016    depends_on: [scrape_metadata, scrape_links, check_headers]
1017    with:
1018      metadata: $scrape_metadata
1019      links: $scrape_links
1020      resp_status: $check_headers.status
1021      resp_headers: $check_headers.headers
1022    infer:
1023      prompt: |
1024        You are an SEO expert. Analyze this website's SEO posture from the scraped data.
1025
1026        ## Metadata (OG, Twitter Cards, JSON-LD, SEO tags)
1027        {{with.metadata}}
1028
1029        ## Link Classification (internal/external, nav/content/footer)
1030        {{with.links}}
1031
1032        ## HTTP Headers (security, caching, performance)
1033        Status: {{with.resp_status}}
1034        {{with.resp_headers | to_json}}
1035
1036        Produce a structured SEO report with:
1037        1. Overall SEO Score (0-100)
1038        2. Metadata Quality: title, description, OG completeness, Twitter Cards
1039        3. Link Health: internal/external ratio, broken-link risk areas
1040        4. Security Headers: CSP, HSTS, X-Frame-Options presence
1041        5. Top 5 Issues (ranked by impact)
1042        6. Top 5 Quick Wins (easy to fix, high impact)
1043
1044        Return as JSON with fields: score, metadata_quality, link_health,
1045        security_headers, top_issues (array), quick_wins (array).
1046      max_tokens: 800
1047      temperature: 0.2
1048    structured:
1049      schema:
1050        type: object
1051        properties:
1052          score:
1053            type: integer
1054            description: "Overall SEO score 0-100"
1055          metadata_quality:
1056            type: object
1057            properties:
1058              title_present:
1059                type: boolean
1060              description_present:
1061                type: boolean
1062              og_completeness:
1063                type: string
1064              twitter_cards:
1065                type: string
1066          link_health:
1067            type: object
1068            properties:
1069              internal_count:
1070                type: integer
1071              external_count:
1072                type: integer
1073              assessment:
1074                type: string
1075          security_headers:
1076            type: object
1077            properties:
1078              csp:
1079                type: boolean
1080              hsts:
1081                type: boolean
1082              x_frame_options:
1083                type: boolean
1084          top_issues:
1085            type: array
1086            items:
1087              type: string
1088          quick_wins:
1089            type: array
1090            items:
1091              type: string
1092        required: [score, top_issues, quick_wins]
1093    artifact:
1094      path: seo-analysis-report.json
1095      format: json
1096
1097  # Phase 5: Generate human-readable report
1098  - id: final_report
1099    description: "Generate formatted SEO report from structured analysis"
1100    depends_on: [seo_analysis]
1101    with:
1102      analysis: $seo_analysis
1103    infer:
1104      prompt: |
1105        Convert this structured SEO analysis into a professional Markdown report.
1106
1107        ANALYSIS DATA:
1108        {{with.analysis}}
1109
1110        Include:
1111        - Executive summary with the overall score
1112        - Detailed breakdown of each category
1113        - Prioritized action items
1114        - A summary table of findings
1115
1116        Format as clean, well-structured Markdown.
1117      max_tokens: 600
1118    artifact:
1119      path: seo-report-final.md
1120      template: |
1121        # SEO Analysis Report — github.com
1122
1123        {{output}}
1124"##;
1125
1126// =============================================================================
1127// Tests
1128// =============================================================================
1129
1130#[cfg(test)]
1131mod tests {
1132    use super::*;
1133
1134    #[test]
1135    fn test_showcase_fetch_workflow_count() {
1136        let workflows = get_showcase_fetch_workflows();
1137        assert_eq!(
1138            workflows.len(),
1139            15,
1140            "Should have exactly 15 showcase fetch workflows"
1141        );
1142    }
1143
1144    #[test]
1145    fn test_showcase_fetch_filenames_unique() {
1146        let workflows = get_showcase_fetch_workflows();
1147        let mut names: Vec<&str> = workflows.iter().map(|w| w.filename).collect();
1148        let len = names.len();
1149        names.sort();
1150        names.dedup();
1151        assert_eq!(names.len(), len, "All filenames must be unique");
1152    }
1153
1154    #[test]
1155    fn test_showcase_fetch_all_have_schema() {
1156        let workflows = get_showcase_fetch_workflows();
1157        for w in &workflows {
1158            assert!(
1159                w.content.contains("schema: \"nika/workflow@0.12\""),
1160                "Workflow {} must declare schema",
1161                w.filename
1162            );
1163        }
1164    }
1165
1166    #[test]
1167    fn test_showcase_fetch_all_have_workflow_name() {
1168        let workflows = get_showcase_fetch_workflows();
1169        for w in &workflows {
1170            assert!(
1171                w.content.contains("workflow:"),
1172                "Workflow {} must have workflow: declaration",
1173                w.filename
1174            );
1175        }
1176    }
1177
1178    #[test]
1179    fn test_showcase_fetch_all_have_tasks() {
1180        let workflows = get_showcase_fetch_workflows();
1181        for w in &workflows {
1182            assert!(
1183                w.content.contains("tasks:"),
1184                "Workflow {} must have tasks section",
1185                w.filename
1186            );
1187        }
1188    }
1189
1190    #[test]
1191    fn test_showcase_fetch_all_nika_yaml_extension() {
1192        let workflows = get_showcase_fetch_workflows();
1193        for w in &workflows {
1194            assert!(
1195                w.filename.ends_with(".nika.yaml"),
1196                "Workflow {} must end with .nika.yaml",
1197                w.filename
1198            );
1199        }
1200    }
1201
1202    #[test]
1203    fn test_showcase_fetch_all_in_showcase_fetch_dir() {
1204        let workflows = get_showcase_fetch_workflows();
1205        for w in &workflows {
1206            assert_eq!(
1207                w.tier_dir, "showcase-fetch",
1208                "Workflow {} must be in showcase-fetch directory",
1209                w.filename
1210            );
1211        }
1212    }
1213
1214    #[test]
1215    fn test_showcase_fetch_valid_yaml() {
1216        let workflows = get_showcase_fetch_workflows();
1217        for w in &workflows {
1218            // Skip YAML validation for templates with placeholders
1219            if w.content.contains("{{PROVIDER}}") || w.content.contains("{{MODEL}}") {
1220                continue;
1221            }
1222            let parsed: Result<serde_json::Value, _> = serde_saphyr::from_str(w.content);
1223            assert!(
1224                parsed.is_ok(),
1225                "Workflow {} should be valid YAML: {:?}",
1226                w.filename,
1227                parsed.err()
1228            );
1229        }
1230    }
1231
1232    #[test]
1233    fn test_showcase_fetch_all_use_fetch_verb() {
1234        let workflows = get_showcase_fetch_workflows();
1235        for w in &workflows {
1236            assert!(
1237                w.content.contains("fetch:"),
1238                "Workflow {} must use the fetch: verb (it's a fetch showcase)",
1239                w.filename
1240            );
1241        }
1242    }
1243
1244    #[test]
1245    fn test_showcase_fetch_extract_modes_coverage() {
1246        let all_content: String = get_showcase_fetch_workflows()
1247            .iter()
1248            .map(|w| w.content)
1249            .collect::<Vec<_>>()
1250            .join("\n");
1251
1252        let modes = [
1253            "extract: markdown",
1254            "extract: article",
1255            "extract: text",
1256            "extract: selector",
1257            "extract: metadata",
1258            "extract: links",
1259            "extract: jsonpath",
1260            "extract: feed",
1261            "extract: llm_txt",
1262        ];
1263
1264        for mode in &modes {
1265            assert!(all_content.contains(mode), "Missing extract mode: {}", mode);
1266        }
1267    }
1268
1269    #[test]
1270    fn test_showcase_fetch_response_modes_coverage() {
1271        let all_content: String = get_showcase_fetch_workflows()
1272            .iter()
1273            .map(|w| w.content)
1274            .collect::<Vec<_>>()
1275            .join("\n");
1276
1277        assert!(
1278            all_content.contains("response: full"),
1279            "Missing response mode: full"
1280        );
1281        assert!(
1282            all_content.contains("response: binary"),
1283            "Missing response mode: binary"
1284        );
1285        // Default mode = no response: field, verified by workflow 12 existing
1286        // and NOT having response: in its main fetch tasks
1287    }
1288
1289    #[test]
1290    fn test_showcase_fetch_all_have_artifacts_dir() {
1291        let workflows = get_showcase_fetch_workflows();
1292        for w in &workflows {
1293            assert!(
1294                w.content.contains("artifacts:") || w.content.contains("artifact:"),
1295                "Workflow {} should produce artifacts",
1296                w.filename
1297            );
1298        }
1299    }
1300}
nika_init/showcase_fetch.rs

nika_init/
showcase_fetch.rs