1use super::WorkflowTemplate;
29
30pub fn get_showcase_fetch_workflows() -> Vec<WorkflowTemplate> {
32 vec![
33 WorkflowTemplate {
34 filename: "01-fetch-markdown.nika.yaml",
35 tier_dir: "showcase-fetch",
36 content: FETCH_01_MARKDOWN,
37 },
38 WorkflowTemplate {
39 filename: "02-fetch-article.nika.yaml",
40 tier_dir: "showcase-fetch",
41 content: FETCH_02_ARTICLE,
42 },
43 WorkflowTemplate {
44 filename: "03-fetch-text-selector.nika.yaml",
45 tier_dir: "showcase-fetch",
46 content: FETCH_03_TEXT,
47 },
48 WorkflowTemplate {
49 filename: "04-fetch-selector-html.nika.yaml",
50 tier_dir: "showcase-fetch",
51 content: FETCH_04_SELECTOR,
52 },
53 WorkflowTemplate {
54 filename: "05-fetch-metadata.nika.yaml",
55 tier_dir: "showcase-fetch",
56 content: FETCH_05_METADATA,
57 },
58 WorkflowTemplate {
59 filename: "06-fetch-links.nika.yaml",
60 tier_dir: "showcase-fetch",
61 content: FETCH_06_LINKS,
62 },
63 WorkflowTemplate {
64 filename: "07-fetch-jsonpath.nika.yaml",
65 tier_dir: "showcase-fetch",
66 content: FETCH_07_JSONPATH,
67 },
68 WorkflowTemplate {
69 filename: "08-fetch-feed.nika.yaml",
70 tier_dir: "showcase-fetch",
71 content: FETCH_08_FEED,
72 },
73 WorkflowTemplate {
74 filename: "09-fetch-llm-txt.nika.yaml",
75 tier_dir: "showcase-fetch",
76 content: FETCH_09_LLM_TXT,
77 },
78 WorkflowTemplate {
79 filename: "10-response-full.nika.yaml",
80 tier_dir: "showcase-fetch",
81 content: FETCH_10_RESPONSE_FULL,
82 },
83 WorkflowTemplate {
84 filename: "11-response-binary.nika.yaml",
85 tier_dir: "showcase-fetch",
86 content: FETCH_11_RESPONSE_BINARY,
87 },
88 WorkflowTemplate {
89 filename: "12-response-default.nika.yaml",
90 tier_dir: "showcase-fetch",
91 content: FETCH_12_RESPONSE_DEFAULT,
92 },
93 WorkflowTemplate {
94 filename: "13-multi-extract-comparison.nika.yaml",
95 tier_dir: "showcase-fetch",
96 content: FETCH_13_MULTI_EXTRACT,
97 },
98 WorkflowTemplate {
99 filename: "14-rss-to-newsletter.nika.yaml",
100 tier_dir: "showcase-fetch",
101 content: FETCH_14_RSS_NEWSLETTER,
102 },
103 WorkflowTemplate {
104 filename: "15-scrape-and-analyze.nika.yaml",
105 tier_dir: "showcase-fetch",
106 content: FETCH_15_SCRAPE_ANALYZE,
107 },
108 ]
109}
110
111const FETCH_01_MARKDOWN: &str = r##"# =============================================================================
117# SHOWCASE FETCH 01 — Markdown Extraction
118# =============================================================================
119# requires_llm: false
120# category: fetch-extract
121# features: fetch-markdown
122#
123# Fetches the Rust Blog and converts the entire HTML page to clean Markdown.
124# The htmd library strips navigation, scripts, styles, and produces
125# LLM-ready content. Artifact saves the result to disk.
126#
127# Run: nika run workflows/showcase-fetch/01-fetch-markdown.nika.yaml
128
129schema: "nika/workflow@0.12"
130workflow: fetch-markdown-showcase
131description: "Convert a blog homepage to clean Markdown via extract: markdown"
132
133artifacts:
134 dir: .output/showcase-fetch
135
136tasks:
137 - id: fetch_blog
138 description: "Fetch Rust Blog and convert to Markdown"
139 fetch:
140 url: "https://blog.rust-lang.org/"
141 extract: markdown
142 timeout: 20
143 artifact:
144 path: rust-blog-markdown.md
145
146 - id: log_size
147 depends_on: [fetch_blog]
148 with:
149 content: $fetch_blog
150 exec:
151 command: |
152 echo "Markdown extraction complete. Content length: $(echo '{{with.content}}' | wc -c | tr -d ' ') bytes"
153 shell: true
154"##;
155
156const FETCH_02_ARTICLE: &str = r##"# =============================================================================
162# SHOWCASE FETCH 02 — Article Extraction (Readability)
163# =============================================================================
164# requires_llm: false
165# category: fetch-extract
166# features: fetch-article
167#
168# Extracts only the main article content from a webpage using the
169# Readability algorithm (dom_smoothie). Strips navigation, ads, sidebars,
170# cookie banners — leaving just the primary reading content.
171#
172# Run: nika run workflows/showcase-fetch/02-fetch-article.nika.yaml
173
174schema: "nika/workflow@0.12"
175workflow: fetch-article-showcase
176description: "Extract main article content with Readability via extract: article"
177
178artifacts:
179 dir: .output/showcase-fetch
180
181tasks:
182 - id: fetch_article
183 description: "Extract article content from Rust Blog"
184 fetch:
185 url: "https://blog.rust-lang.org/"
186 extract: article
187 timeout: 20
188 artifact:
189 path: rust-blog-article.md
190
191 - id: log_result
192 depends_on: [fetch_article]
193 with:
194 article: $fetch_article
195 invoke:
196 tool: "nika:log"
197 params:
198 level: "info"
199 message: "Article extraction complete — content ready for LLM consumption"
200"##;
201
202const FETCH_03_TEXT: &str = r##"# =============================================================================
208# SHOWCASE FETCH 03 — Text Extraction with CSS Selector
209# =============================================================================
210# requires_llm: false
211# category: fetch-extract
212# features: fetch-html
213#
214# Extracts visible text from a webpage. When combined with selector:,
215# only text from matching CSS elements is returned. Without selector:,
216# returns all visible text (no HTML tags).
217#
218# Run: nika run workflows/showcase-fetch/03-fetch-text-selector.nika.yaml
219
220schema: "nika/workflow@0.12"
221workflow: fetch-text-selector-showcase
222description: "Extract visible text filtered by CSS selector via extract: text"
223
224artifacts:
225 dir: .output/showcase-fetch
226
227tasks:
228 # Text from specific elements only
229 - id: fetch_paragraphs
230 description: "Extract paragraph text from httpbin HTML page"
231 fetch:
232 url: "https://httpbin.org/html"
233 extract: text
234 selector: "p"
235 timeout: 15
236 artifact:
237 path: httpbin-paragraphs.txt
238
239 # All visible text (no selector)
240 - id: fetch_all_text
241 description: "Extract all visible text from httpbin HTML page"
242 fetch:
243 url: "https://httpbin.org/html"
244 extract: text
245 timeout: 15
246 artifact:
247 path: httpbin-all-text.txt
248
249 - id: compare_sizes
250 depends_on: [fetch_paragraphs, fetch_all_text]
251 with:
252 filtered: $fetch_paragraphs
253 full: $fetch_all_text
254 invoke:
255 tool: "nika:log"
256 params:
257 level: "info"
258 message: "Filtered paragraphs vs full text extracted successfully"
259"##;
260
261const FETCH_04_SELECTOR: &str = r##"# =============================================================================
267# SHOWCASE FETCH 04 — Raw HTML Selector Extraction
268# =============================================================================
269# requires_llm: false
270# category: fetch-extract
271# features: fetch-html
272#
273# Returns the raw HTML of elements matching a CSS selector. Unlike
274# extract: text (which strips tags), this preserves the HTML structure.
275# Useful for scraping specific DOM fragments.
276#
277# Run: nika run workflows/showcase-fetch/04-fetch-selector-html.nika.yaml
278
279schema: "nika/workflow@0.12"
280workflow: fetch-selector-html-showcase
281description: "Extract raw HTML matching CSS selectors via extract: selector"
282
283artifacts:
284 dir: .output/showcase-fetch
285
286tasks:
287 - id: fetch_headings
288 description: "Extract all heading elements from httpbin HTML"
289 fetch:
290 url: "https://httpbin.org/html"
291 extract: selector
292 selector: "h1"
293 timeout: 15
294 artifact:
295 path: httpbin-headings.html
296
297 - id: fetch_paragraphs_html
298 description: "Extract paragraph HTML from httpbin"
299 fetch:
300 url: "https://httpbin.org/html"
301 extract: selector
302 selector: "p"
303 timeout: 15
304 artifact:
305 path: httpbin-paragraphs.html
306
307 - id: log_done
308 depends_on: [fetch_headings, fetch_paragraphs_html]
309 invoke:
310 tool: "nika:log"
311 params:
312 level: "info"
313 message: "Raw HTML selector extraction complete — headings and paragraphs captured"
314"##;
315
316const FETCH_05_METADATA: &str = r##"# =============================================================================
322# SHOWCASE FETCH 05 — Metadata Extraction (OG / Twitter / JSON-LD / SEO)
323# =============================================================================
324# requires_llm: false
325# category: fetch-extract
326# features: fetch-html
327#
328# Extracts structured metadata from a webpage: Open Graph tags,
329# Twitter Cards, JSON-LD structured data, and basic SEO tags
330# (title, description, canonical URL, etc.). Returns JSON.
331#
332# Run: nika run workflows/showcase-fetch/05-fetch-metadata.nika.yaml
333
334schema: "nika/workflow@0.12"
335workflow: fetch-metadata-showcase
336description: "Extract OG, Twitter Cards, JSON-LD, and SEO metadata via extract: metadata"
337
338artifacts:
339 dir: .output/showcase-fetch
340
341tasks:
342 - id: github_metadata
343 description: "Extract metadata from GitHub homepage"
344 fetch:
345 url: "https://github.com"
346 extract: metadata
347 timeout: 15
348 artifact:
349 path: github-metadata.json
350 format: json
351
352 - id: rust_blog_metadata
353 description: "Extract metadata from Rust Blog"
354 fetch:
355 url: "https://blog.rust-lang.org/"
356 extract: metadata
357 timeout: 15
358 artifact:
359 path: rust-blog-metadata.json
360 format: json
361
362 - id: log_metadata
363 depends_on: [github_metadata, rust_blog_metadata]
364 with:
365 gh: $github_metadata
366 rust: $rust_blog_metadata
367 invoke:
368 tool: "nika:log"
369 params:
370 level: "info"
371 message: "Metadata extraction complete for GitHub and Rust Blog"
372"##;
373
374const FETCH_06_LINKS: &str = r##"# =============================================================================
380# SHOWCASE FETCH 06 — Link Extraction and Classification
381# =============================================================================
382# requires_llm: false
383# category: fetch-extract
384# features: fetch-html
385#
386# Extracts all links from a webpage and classifies them:
387# - Internal vs external
388# - Navigation vs content vs footer
389# Returns structured JSON with URL, text, type, and zone.
390#
391# Run: nika run workflows/showcase-fetch/06-fetch-links.nika.yaml
392
393schema: "nika/workflow@0.12"
394workflow: fetch-links-showcase
395description: "Extract and classify links via extract: links"
396
397artifacts:
398 dir: .output/showcase-fetch
399
400tasks:
401 - id: extract_links
402 description: "Extract and classify all links from Hacker News"
403 fetch:
404 url: "https://news.ycombinator.com"
405 extract: links
406 timeout: 15
407 artifact:
408 path: hn-links.json
409 format: json
410
411 - id: log_links
412 depends_on: [extract_links]
413 with:
414 links: $extract_links
415 invoke:
416 tool: "nika:log"
417 params:
418 level: "info"
419 message: "Link extraction complete — internal/external classification ready"
420"##;
421
422const FETCH_07_JSONPATH: &str = r##"# =============================================================================
428# SHOWCASE FETCH 07 — JSONPath Extraction
429# =============================================================================
430# requires_llm: false
431# category: fetch-extract
432#
433# Queries JSON APIs using JSONPath expressions. Zero external dependencies —
434# JSONPath is always available. The selector: field holds the JSONPath query.
435# Surgical extraction from massive JSON payloads.
436#
437# Run: nika run workflows/showcase-fetch/07-fetch-jsonpath.nika.yaml
438
439schema: "nika/workflow@0.12"
440workflow: fetch-jsonpath-showcase
441description: "Extract specific fields from JSON APIs via extract: jsonpath"
442
443artifacts:
444 dir: .output/showcase-fetch
445
446tasks:
447 # JSONPath on httpbin structured JSON
448 - id: slideshow_title
449 description: "Extract slideshow title from httpbin JSON"
450 fetch:
451 url: "https://httpbin.org/json"
452 extract: jsonpath
453 selector: "$.slideshow.title"
454 timeout: 10
455 artifact:
456 path: slideshow-title.json
457 format: json
458
459 # JSONPath on nested array
460 - id: slide_titles
461 description: "Extract all slide titles from httpbin JSON"
462 fetch:
463 url: "https://httpbin.org/json"
464 extract: jsonpath
465 selector: "$.slideshow.slides[*].title"
466 timeout: 10
467 artifact:
468 path: slide-titles.json
469 format: json
470
471 # JSONPath on Hacker News Algolia API
472 - id: hn_search
473 description: "Search Hacker News and extract story titles"
474 fetch:
475 url: "https://hn.algolia.com/api/v1/search?query=rust&tags=story&hitsPerPage=5"
476 extract: jsonpath
477 selector: "$.hits[*].title"
478 timeout: 15
479 artifact:
480 path: hn-rust-titles.json
481 format: json
482
483 - id: log_results
484 depends_on: [slideshow_title, slide_titles, hn_search]
485 with:
486 title: $slideshow_title
487 slides: $slide_titles
488 hn: $hn_search
489 invoke:
490 tool: "nika:log"
491 params:
492 level: "info"
493 message: "JSONPath extraction complete — 3 queries across 2 APIs"
494"##;
495
496const FETCH_08_FEED: &str = r##"# =============================================================================
502# SHOWCASE FETCH 08 — RSS/Atom Feed Parsing
503# =============================================================================
504# requires_llm: false
505# category: fetch-extract
506# features: fetch-feed
507#
508# Parses RSS, Atom, and JSON Feed formats using the feed-rs library.
509# Returns structured JSON with title, entries, dates, authors, and links.
510# Works with any standard syndication feed.
511#
512# Run: nika run workflows/showcase-fetch/08-fetch-feed.nika.yaml
513
514schema: "nika/workflow@0.12"
515workflow: fetch-feed-showcase
516description: "Parse RSS/Atom feeds into structured JSON via extract: feed"
517
518artifacts:
519 dir: .output/showcase-fetch
520
521tasks:
522 - id: rust_feed
523 description: "Parse the Rust Blog Atom feed"
524 fetch:
525 url: "https://blog.rust-lang.org/feed.xml"
526 extract: feed
527 timeout: 15
528 artifact:
529 path: rust-feed.json
530 format: json
531
532 - id: log_feed
533 depends_on: [rust_feed]
534 with:
535 feed: $rust_feed
536 invoke:
537 tool: "nika:log"
538 params:
539 level: "info"
540 message: "RSS feed parsed — entries extracted and structured as JSON"
541"##;
542
543const FETCH_09_LLM_TXT: &str = r##"# =============================================================================
549# SHOWCASE FETCH 09 — LLM.txt Content Discovery
550# =============================================================================
551# requires_llm: false
552# category: fetch-extract
553#
554# AI-era content discovery. Checks for /.well-known/llm.txt and /llms.txt
555# files that websites publish to help LLMs understand their content.
556# Part of the llms.txt standard for AI-friendly web content.
557#
558# Run: nika run workflows/showcase-fetch/09-fetch-llm-txt.nika.yaml
559
560schema: "nika/workflow@0.12"
561workflow: fetch-llm-txt-showcase
562description: "Discover AI content via extract: llm_txt"
563
564artifacts:
565 dir: .output/showcase-fetch
566
567tasks:
568 - id: check_anthropic
569 description: "Check Anthropic docs for llm.txt"
570 fetch:
571 url: "https://docs.anthropic.com"
572 extract: llm_txt
573 timeout: 15
574 artifact:
575 path: anthropic-llm-txt.md
576
577 - id: log_discovery
578 depends_on: [check_anthropic]
579 with:
580 result: $check_anthropic
581 invoke:
582 tool: "nika:log"
583 params:
584 level: "info"
585 message: "LLM.txt discovery complete"
586"##;
587
588const FETCH_10_RESPONSE_FULL: &str = r##"# =============================================================================
594# SHOWCASE FETCH 10 — Full Response Envelope
595# =============================================================================
596# requires_llm: false
597# category: fetch-response
598#
599# Returns the complete HTTP response as a JSON envelope containing:
600# - status: HTTP status code
601# - headers: all response headers
602# - body: response body text
603# - url: after redirect resolution
604#
605# Perfect for debugging redirects, checking security headers, API monitoring.
606#
607# Run: nika run workflows/showcase-fetch/10-response-full.nika.yaml
608
609schema: "nika/workflow@0.12"
610workflow: fetch-response-full-showcase
611description: "Inspect complete HTTP response via response: full"
612
613artifacts:
614 dir: .output/showcase-fetch
615
616tasks:
617 # Full response from a simple GET
618 - id: get_full
619 description: "Fetch httpbin GET with full response envelope"
620 fetch:
621 url: "https://httpbin.org/get"
622 response: full
623 timeout: 10
624 artifact:
625 path: httpbin-full-response.json
626 format: json
627
628 # Full response showing headers
629 - id: inspect_headers
630 description: "Fetch httpbin headers with full envelope"
631 fetch:
632 url: "https://httpbin.org/headers"
633 response: full
634 timeout: 10
635 artifact:
636 path: httpbin-headers-full.json
637 format: json
638
639 - id: log_responses
640 depends_on: [get_full, inspect_headers]
641 with:
642 get: $get_full
643 headers: $inspect_headers
644 invoke:
645 tool: "nika:log"
646 params:
647 level: "info"
648 message: "Full response envelopes captured — status, headers, body, and url available"
649"##;
650
651const FETCH_11_RESPONSE_BINARY: &str = r##"# =============================================================================
657# SHOWCASE FETCH 11 — Binary Response + Media Pipeline
658# =============================================================================
659# requires_llm: false
660# category: fetch-response
661#
662# Downloads a binary file (image) into content-addressable storage (CAS).
663# The task output is the CAS hash, which can be piped into media tools
664# like nika:dimensions and nika:thumbhash for further processing.
665#
666# Run: nika run workflows/showcase-fetch/11-response-binary.nika.yaml
667
668schema: "nika/workflow@0.12"
669workflow: fetch-response-binary-showcase
670description: "Download binary into CAS and extract dimensions via response: binary"
671
672artifacts:
673 dir: .output/showcase-fetch
674
675tasks:
676 # Download a PNG image into CAS
677 - id: download_image
678 description: "Download a PNG image into content-addressable storage"
679 fetch:
680 url: "https://httpbin.org/image/png"
681 response: binary
682 timeout: 15
683 artifact:
684 path: downloaded-image.png
685 format: binary
686
687 # Extract dimensions from the downloaded image
688 - id: get_dimensions
689 depends_on: [download_image]
690 with:
691 img: $download_image
692 invoke:
693 tool: "nika:dimensions"
694 params:
695 hash: "{{with.img.hash}}"
696
697 # Generate a thumbhash placeholder
698 - id: get_thumbhash
699 depends_on: [download_image]
700 with:
701 img: $download_image
702 invoke:
703 tool: "nika:thumbhash"
704 params:
705 hash: "{{with.img.hash}}"
706
707 - id: log_media
708 depends_on: [get_dimensions, get_thumbhash]
709 with:
710 dims: $get_dimensions
711 hash: $get_thumbhash
712 invoke:
713 tool: "nika:log"
714 params:
715 level: "info"
716 message: "Binary download + media pipeline complete — dimensions and thumbhash extracted"
717"##;
718
719const FETCH_12_RESPONSE_DEFAULT: &str = r##"# =============================================================================
725# SHOWCASE FETCH 12 — Default Text Response
726# =============================================================================
727# requires_llm: false
728# category: fetch-response
729#
730# When no response: field is specified, fetch returns the raw body text.
731# This is the simplest mode — no JSON envelope, no CAS storage.
732# Just the HTTP response body as a string.
733#
734# Run: nika run workflows/showcase-fetch/12-response-default.nika.yaml
735
736schema: "nika/workflow@0.12"
737workflow: fetch-response-default-showcase
738description: "Fetch raw body text with default response mode (no response: field)"
739
740artifacts:
741 dir: .output/showcase-fetch
742
743tasks:
744 # Default response — just the body text
745 - id: fetch_ip
746 description: "Fetch public IP as raw JSON text"
747 fetch:
748 url: "https://httpbin.org/ip"
749 timeout: 10
750 artifact:
751 path: public-ip.txt
752
753 # Another default fetch — UUID
754 - id: fetch_uuid
755 description: "Fetch a random UUID as raw text"
756 fetch:
757 url: "https://httpbin.org/uuid"
758 timeout: 10
759 artifact:
760 path: random-uuid.txt
761
762 # Default fetch from a JSON API
763 - id: fetch_json_raw
764 description: "Fetch httpbin JSON as raw text (no extraction)"
765 fetch:
766 url: "https://httpbin.org/json"
767 timeout: 10
768 artifact:
769 path: raw-json-body.txt
770
771 - id: log_defaults
772 depends_on: [fetch_ip, fetch_uuid, fetch_json_raw]
773 with:
774 ip: $fetch_ip
775 uuid: $fetch_uuid
776 invoke:
777 tool: "nika:log"
778 params:
779 level: "info"
780 message: "Default response mode — raw body text captured for all 3 endpoints"
781"##;
782
783const FETCH_13_MULTI_EXTRACT: &str = r##"# =============================================================================
789# SHOWCASE FETCH 13 — Multi-Extract Comparison
790# =============================================================================
791# requires_llm: true
792# category: fetch-combo
793# features: fetch-markdown, fetch-article, fetch-html
794#
795# Fetches the SAME URL with 3 different extract modes (markdown, article,
796# text) and asks an LLM to compare the results. Shows how each mode
797# produces different output from identical source HTML.
798#
799# Run: nika run workflows/showcase-fetch/13-multi-extract-comparison.nika.yaml
800
801schema: "nika/workflow@0.12"
802workflow: multi-extract-comparison
803description: "Compare markdown vs article vs text extraction on the same URL"
804provider: "{{PROVIDER}}"
805model: "{{MODEL}}"
806
807artifacts:
808 dir: .output/showcase-fetch
809
810tasks:
811 # Same URL, three extraction modes
812 - id: as_markdown
813 description: "Full Markdown extraction"
814 fetch:
815 url: "https://blog.rust-lang.org/"
816 extract: markdown
817 timeout: 20
818 artifact:
819 path: comparison-markdown.md
820
821 - id: as_article
822 description: "Article-only extraction (Readability)"
823 fetch:
824 url: "https://blog.rust-lang.org/"
825 extract: article
826 timeout: 20
827 artifact:
828 path: comparison-article.md
829
830 - id: as_text
831 description: "Plain text extraction"
832 fetch:
833 url: "https://blog.rust-lang.org/"
834 extract: text
835 timeout: 20
836 artifact:
837 path: comparison-text.txt
838
839 # LLM compares all three outputs
840 - id: compare
841 description: "LLM analysis of extraction mode differences"
842 depends_on: [as_markdown, as_article, as_text]
843 with:
844 md: $as_markdown
845 article: $as_article
846 text: $as_text
847 infer:
848 prompt: |
849 Compare these 3 extraction modes applied to the same URL (blog.rust-lang.org):
850
851 ## 1. extract: markdown (first 1500 chars)
852 {{with.md | first(1500)}}
853
854 ## 2. extract: article (first 1500 chars)
855 {{with.article | first(1500)}}
856
857 ## 3. extract: text (first 1500 chars)
858 {{with.text | first(1500)}}
859
860 Analyze:
861 1. What does each mode preserve vs strip?
862 2. Which is best for LLM summarization?
863 3. Which is best for data extraction?
864 4. Which is best for human reading?
865 5. When would you pick each one?
866
867 Be specific about the structural differences you observe.
868 max_tokens: 600
869 artifact:
870 path: extraction-comparison-report.md
871 template: |
872 # Multi-Extract Comparison Report
873
874 {{output}}
875"##;
876
877const FETCH_14_RSS_NEWSLETTER: &str = r##"# =============================================================================
883# SHOWCASE FETCH 14 — RSS Feed to Newsletter Pipeline
884# =============================================================================
885# requires_llm: true
886# category: fetch-combo
887# features: fetch-feed
888#
889# Fetches an RSS feed, then uses an LLM to summarize the entries into
890# a newsletter-style digest. Demonstrates extract: feed piped into
891# infer: for AI-powered content curation.
892#
893# Run: nika run workflows/showcase-fetch/14-rss-to-newsletter.nika.yaml
894
895schema: "nika/workflow@0.12"
896workflow: rss-to-newsletter
897description: "Fetch RSS feed and generate an AI-curated newsletter digest"
898provider: "{{PROVIDER}}"
899model: "{{MODEL}}"
900
901artifacts:
902 dir: .output/showcase-fetch
903
904tasks:
905 # Phase 1: Fetch the Rust Blog feed
906 - id: rust_feed
907 description: "Parse Rust Blog Atom feed"
908 fetch:
909 url: "https://blog.rust-lang.org/feed.xml"
910 extract: feed
911 timeout: 15
912 artifact:
913 path: newsletter-rust-feed.json
914 format: json
915
916 # Phase 2: Summarize into a newsletter digest
917 - id: create_digest
918 description: "Generate newsletter digest from feed entries"
919 depends_on: [rust_feed]
920 with:
921 feed: $rust_feed
922 infer:
923 prompt: |
924 You are a tech newsletter curator. Create a concise weekly digest
925 from this RSS feed data.
926
927 FEED DATA:
928 {{with.feed}}
929
930 Format as a newsletter with:
931 1. A catchy header with the feed name
932 2. Top 5 most recent entries, each with:
933 - Title (as a heading)
934 - 2-sentence summary of what the post covers
935 - Why it matters for Rust developers
936 3. A "Quick Links" section with remaining entry titles
937 4. A brief editorial closing paragraph
938
939 Write in an engaging but professional tone.
940 max_tokens: 800
941 artifact:
942 path: rust-newsletter-digest.md
943 template: |
944 {{output}}
945
946 - id: log_done
947 depends_on: [create_digest]
948 invoke:
949 tool: "nika:log"
950 params:
951 level: "info"
952 message: "Newsletter digest generated from RSS feed"
953"##;
954
955const FETCH_15_SCRAPE_ANALYZE: &str = r##"# =============================================================================
961# SHOWCASE FETCH 15 — Scrape + SEO Analysis Pipeline
962# =============================================================================
963# requires_llm: true
964# category: fetch-combo
965# features: fetch-html
966#
967# Fetches metadata and links from a website, then uses an LLM to produce
968# a structured SEO analysis report. Combines extract: metadata and
969# extract: links with structured output and artifact generation.
970#
971# Run: nika run workflows/showcase-fetch/15-scrape-and-analyze.nika.yaml
972
973schema: "nika/workflow@0.12"
974workflow: scrape-and-analyze
975description: "Scrape metadata + links, then generate a structured SEO report"
976provider: "{{PROVIDER}}"
977model: "{{MODEL}}"
978
979artifacts:
980 dir: .output/showcase-fetch
981
982tasks:
983 # Phase 1: Extract metadata
984 - id: scrape_metadata
985 description: "Extract OG, Twitter Cards, JSON-LD, and SEO tags"
986 fetch:
987 url: "https://github.com"
988 extract: metadata
989 timeout: 15
990 artifact:
991 path: seo-metadata.json
992 format: json
993
994 # Phase 2: Extract and classify links
995 - id: scrape_links
996 description: "Extract and classify all links"
997 fetch:
998 url: "https://github.com"
999 extract: links
1000 timeout: 15
1001 artifact:
1002 path: seo-links.json
1003 format: json
1004
1005 # Phase 3: Fetch full response for header analysis
1006 - id: check_headers
1007 description: "Inspect HTTP response headers for security and caching"
1008 fetch:
1009 url: "https://github.com"
1010 response: full
1011 timeout: 15
1012
1013 # Phase 4: LLM analyzes everything
1014 - id: seo_analysis
1015 description: "AI-powered SEO analysis from scraped data"
1016 depends_on: [scrape_metadata, scrape_links, check_headers]
1017 with:
1018 metadata: $scrape_metadata
1019 links: $scrape_links
1020 resp_status: $check_headers.status
1021 resp_headers: $check_headers.headers
1022 infer:
1023 prompt: |
1024 You are an SEO expert. Analyze this website's SEO posture from the scraped data.
1025
1026 ## Metadata (OG, Twitter Cards, JSON-LD, SEO tags)
1027 {{with.metadata}}
1028
1029 ## Link Classification (internal/external, nav/content/footer)
1030 {{with.links}}
1031
1032 ## HTTP Headers (security, caching, performance)
1033 Status: {{with.resp_status}}
1034 {{with.resp_headers | to_json}}
1035
1036 Produce a structured SEO report with:
1037 1. Overall SEO Score (0-100)
1038 2. Metadata Quality: title, description, OG completeness, Twitter Cards
1039 3. Link Health: internal/external ratio, broken-link risk areas
1040 4. Security Headers: CSP, HSTS, X-Frame-Options presence
1041 5. Top 5 Issues (ranked by impact)
1042 6. Top 5 Quick Wins (easy to fix, high impact)
1043
1044 Return as JSON with fields: score, metadata_quality, link_health,
1045 security_headers, top_issues (array), quick_wins (array).
1046 max_tokens: 800
1047 temperature: 0.2
1048 structured:
1049 schema:
1050 type: object
1051 properties:
1052 score:
1053 type: integer
1054 description: "Overall SEO score 0-100"
1055 metadata_quality:
1056 type: object
1057 properties:
1058 title_present:
1059 type: boolean
1060 description_present:
1061 type: boolean
1062 og_completeness:
1063 type: string
1064 twitter_cards:
1065 type: string
1066 link_health:
1067 type: object
1068 properties:
1069 internal_count:
1070 type: integer
1071 external_count:
1072 type: integer
1073 assessment:
1074 type: string
1075 security_headers:
1076 type: object
1077 properties:
1078 csp:
1079 type: boolean
1080 hsts:
1081 type: boolean
1082 x_frame_options:
1083 type: boolean
1084 top_issues:
1085 type: array
1086 items:
1087 type: string
1088 quick_wins:
1089 type: array
1090 items:
1091 type: string
1092 required: [score, top_issues, quick_wins]
1093 artifact:
1094 path: seo-analysis-report.json
1095 format: json
1096
1097 # Phase 5: Generate human-readable report
1098 - id: final_report
1099 description: "Generate formatted SEO report from structured analysis"
1100 depends_on: [seo_analysis]
1101 with:
1102 analysis: $seo_analysis
1103 infer:
1104 prompt: |
1105 Convert this structured SEO analysis into a professional Markdown report.
1106
1107 ANALYSIS DATA:
1108 {{with.analysis}}
1109
1110 Include:
1111 - Executive summary with the overall score
1112 - Detailed breakdown of each category
1113 - Prioritized action items
1114 - A summary table of findings
1115
1116 Format as clean, well-structured Markdown.
1117 max_tokens: 600
1118 artifact:
1119 path: seo-report-final.md
1120 template: |
1121 # SEO Analysis Report — github.com
1122
1123 {{output}}
1124"##;
1125
1126#[cfg(test)]
1131mod tests {
1132 use super::*;
1133
1134 #[test]
1135 fn test_showcase_fetch_workflow_count() {
1136 let workflows = get_showcase_fetch_workflows();
1137 assert_eq!(
1138 workflows.len(),
1139 15,
1140 "Should have exactly 15 showcase fetch workflows"
1141 );
1142 }
1143
1144 #[test]
1145 fn test_showcase_fetch_filenames_unique() {
1146 let workflows = get_showcase_fetch_workflows();
1147 let mut names: Vec<&str> = workflows.iter().map(|w| w.filename).collect();
1148 let len = names.len();
1149 names.sort();
1150 names.dedup();
1151 assert_eq!(names.len(), len, "All filenames must be unique");
1152 }
1153
1154 #[test]
1155 fn test_showcase_fetch_all_have_schema() {
1156 let workflows = get_showcase_fetch_workflows();
1157 for w in &workflows {
1158 assert!(
1159 w.content.contains("schema: \"nika/workflow@0.12\""),
1160 "Workflow {} must declare schema",
1161 w.filename
1162 );
1163 }
1164 }
1165
1166 #[test]
1167 fn test_showcase_fetch_all_have_workflow_name() {
1168 let workflows = get_showcase_fetch_workflows();
1169 for w in &workflows {
1170 assert!(
1171 w.content.contains("workflow:"),
1172 "Workflow {} must have workflow: declaration",
1173 w.filename
1174 );
1175 }
1176 }
1177
1178 #[test]
1179 fn test_showcase_fetch_all_have_tasks() {
1180 let workflows = get_showcase_fetch_workflows();
1181 for w in &workflows {
1182 assert!(
1183 w.content.contains("tasks:"),
1184 "Workflow {} must have tasks section",
1185 w.filename
1186 );
1187 }
1188 }
1189
1190 #[test]
1191 fn test_showcase_fetch_all_nika_yaml_extension() {
1192 let workflows = get_showcase_fetch_workflows();
1193 for w in &workflows {
1194 assert!(
1195 w.filename.ends_with(".nika.yaml"),
1196 "Workflow {} must end with .nika.yaml",
1197 w.filename
1198 );
1199 }
1200 }
1201
1202 #[test]
1203 fn test_showcase_fetch_all_in_showcase_fetch_dir() {
1204 let workflows = get_showcase_fetch_workflows();
1205 for w in &workflows {
1206 assert_eq!(
1207 w.tier_dir, "showcase-fetch",
1208 "Workflow {} must be in showcase-fetch directory",
1209 w.filename
1210 );
1211 }
1212 }
1213
1214 #[test]
1215 fn test_showcase_fetch_valid_yaml() {
1216 let workflows = get_showcase_fetch_workflows();
1217 for w in &workflows {
1218 if w.content.contains("{{PROVIDER}}") || w.content.contains("{{MODEL}}") {
1220 continue;
1221 }
1222 let parsed: Result<serde_json::Value, _> = serde_saphyr::from_str(w.content);
1223 assert!(
1224 parsed.is_ok(),
1225 "Workflow {} should be valid YAML: {:?}",
1226 w.filename,
1227 parsed.err()
1228 );
1229 }
1230 }
1231
1232 #[test]
1233 fn test_showcase_fetch_all_use_fetch_verb() {
1234 let workflows = get_showcase_fetch_workflows();
1235 for w in &workflows {
1236 assert!(
1237 w.content.contains("fetch:"),
1238 "Workflow {} must use the fetch: verb (it's a fetch showcase)",
1239 w.filename
1240 );
1241 }
1242 }
1243
1244 #[test]
1245 fn test_showcase_fetch_extract_modes_coverage() {
1246 let all_content: String = get_showcase_fetch_workflows()
1247 .iter()
1248 .map(|w| w.content)
1249 .collect::<Vec<_>>()
1250 .join("\n");
1251
1252 let modes = [
1253 "extract: markdown",
1254 "extract: article",
1255 "extract: text",
1256 "extract: selector",
1257 "extract: metadata",
1258 "extract: links",
1259 "extract: jsonpath",
1260 "extract: feed",
1261 "extract: llm_txt",
1262 ];
1263
1264 for mode in &modes {
1265 assert!(all_content.contains(mode), "Missing extract mode: {}", mode);
1266 }
1267 }
1268
1269 #[test]
1270 fn test_showcase_fetch_response_modes_coverage() {
1271 let all_content: String = get_showcase_fetch_workflows()
1272 .iter()
1273 .map(|w| w.content)
1274 .collect::<Vec<_>>()
1275 .join("\n");
1276
1277 assert!(
1278 all_content.contains("response: full"),
1279 "Missing response mode: full"
1280 );
1281 assert!(
1282 all_content.contains("response: binary"),
1283 "Missing response mode: binary"
1284 );
1285 }
1288
1289 #[test]
1290 fn test_showcase_fetch_all_have_artifacts_dir() {
1291 let workflows = get_showcase_fetch_workflows();
1292 for w in &workflows {
1293 assert!(
1294 w.content.contains("artifacts:") || w.content.contains("artifact:"),
1295 "Workflow {} should produce artifacts",
1296 w.filename
1297 );
1298 }
1299 }
1300}