Skip to main content

mcp_methods/
cache.rs

1use regex::Regex;
2use serde_json::Value;
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::sync::LazyLock;
6
7thread_local! {
8    static CACHED_RE: RefCell<Option<(String, Regex)>> = const { RefCell::new(None) };
9}
10
11fn get_or_compile_regex(pattern: &str) -> Result<Regex, regex::Error> {
12    CACHED_RE.with(|cell| {
13        let mut cache = cell.borrow_mut();
14        if let Some((ref cached_pat, ref re)) = *cache {
15            if cached_pat == pattern {
16                return Ok(re.clone());
17            }
18        }
19        let re = Regex::new(pattern)?;
20        *cache = Some((pattern.to_string(), re.clone()));
21        Ok(re)
22    })
23}
24
25static LINE_RANGE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\d+)-(\d+)$").unwrap());
26
27use crate::compact;
28use crate::github;
29
30/// Element cache for storing collapsed discussion elements (code blocks,
31/// details sections, truncated comments, overflow).
32///
33/// Lives entirely in Rust memory. Pure Rust — Python bindings exist
34/// in the sibling `mcp-methods-py` crate as a thin `PyElementCache`
35/// newtype wrapping this struct.
36pub struct ElementCache {
37    // (repo, number) -> {element_id -> element_data_json}
38    store: HashMap<(String, u64), HashMap<String, Value>>,
39}
40
41impl Default for ElementCache {
42    fn default() -> Self {
43        Self::new()
44    }
45}
46
47impl ElementCache {
48    pub fn new() -> Self {
49        Self {
50            store: HashMap::new(),
51        }
52    }
53
54    /// Get a cached element as a JSON string. Returns None if not found.
55    pub fn get(&self, repo: &str, number: u64, element_id: &str) -> Option<String> {
56        self.store
57            .get(&(repo.to_string(), number))
58            .and_then(|m| m.get(element_id))
59            .map(|v| serde_json::to_string(v).unwrap_or_default())
60    }
61
62    /// Store elements for a repo/number, replacing any existing ones.
63    pub fn store_elements(&mut self, repo: &str, number: u64, elements_json: &str) {
64        if let Ok(val) = serde_json::from_str::<Value>(elements_json) {
65            if let Some(obj) = val.as_object() {
66                let mut map = HashMap::new();
67                for (k, v) in obj {
68                    if !k.starts_with('_') {
69                        map.insert(k.clone(), v.clone());
70                    }
71                }
72                self.store.insert((repo.to_string(), number), map);
73            }
74        }
75    }
76
77    /// Add elements to an existing cache entry (merge).
78    pub fn update_elements(&mut self, repo: &str, number: u64, elements_json: &str) {
79        if let Ok(val) = serde_json::from_str::<Value>(elements_json) {
80            if let Some(obj) = val.as_object() {
81                let entry = self.store.entry((repo.to_string(), number)).or_default();
82                for (k, v) in obj {
83                    if !k.starts_with('_') {
84                        entry.insert(k.clone(), v.clone());
85                    }
86                }
87            }
88        }
89    }
90
91    /// List available element IDs for a repo/number.
92    pub fn available(&self, repo: &str, number: u64) -> Vec<String> {
93        match self.store.get(&(repo.to_string(), number)) {
94            Some(m) => {
95                let mut keys: Vec<String> = m.keys().cloned().collect();
96                keys.sort();
97                keys
98            }
99            None => Vec::new(),
100        }
101    }
102
103    /// Retrieve a cached element with optional line slicing or grep.
104    ///
105    /// This is the main drill-down method. Returns a JSON string.
106    pub fn retrieve(
107        &self,
108        repo: &str,
109        number: u64,
110        element_id: &str,
111        lines: Option<&str>,
112        grep: Option<&str>,
113        context: usize,
114    ) -> String {
115        let elem_data = match self
116            .store
117            .get(&(repo.to_string(), number))
118            .and_then(|m| m.get(element_id))
119        {
120            Some(v) => v,
121            None => {
122                let available = self.available(repo, number);
123                let mut msg = format!(
124                    "Element '{}' not found for {}#{}.",
125                    element_id, repo, number
126                );
127                if !available.is_empty() {
128                    msg.push_str(&format!("\nAvailable: {}", available.join(", ")));
129                } else {
130                    msg.push_str("\nNo cached elements. Call fetch_issue first.");
131                }
132                return msg;
133            }
134        };
135
136        // Content can be a string or a structured JSON value (array/object)
137        let content_val = elem_data.get("content");
138        let content_str: String;
139        let content_is_structured;
140
141        match content_val {
142            Some(Value::String(s)) => {
143                content_str = s.clone();
144                content_is_structured = false;
145            }
146            Some(val) => {
147                content_str = serde_json::to_string_pretty(val).unwrap_or_default();
148                content_is_structured = true;
149            }
150            None => {
151                content_str = String::new();
152                content_is_structured = false;
153            }
154        }
155        let content_lines: Vec<&str> = content_str.split('\n').collect();
156
157        // Grep mode
158        if let Some(grep_pattern) = grep {
159            let regex = match get_or_compile_regex(grep_pattern) {
160                Ok(r) => r,
161                Err(e) => return format!("Invalid grep pattern: {}", e),
162            };
163
164            // Structured content (overflow, comment segments): field-aware grep
165            if content_is_structured {
166                if let Some(data) = content_val {
167                    let matches = grep_json_value(data, &regex, context, "");
168                    let elem_type = elem_data
169                        .get("type")
170                        .and_then(|v| v.as_str())
171                        .unwrap_or("unknown");
172                    let result = serde_json::json!({
173                        "element_id": element_id,
174                        "type": elem_type,
175                        "grep": grep_pattern,
176                        "matches": matches,
177                    });
178                    return serde_json::to_string_pretty(&result).unwrap_or_default();
179                }
180            }
181
182            // Standard elements: line-based grep — build result without cloning
183            let matches = grep_lines_internal(&content_lines, &regex, context);
184            let mut result = serde_json::Map::new();
185            if let Some(obj) = elem_data.as_object() {
186                for (k, v) in obj {
187                    if k != "content" {
188                        result.insert(k.clone(), v.clone());
189                    }
190                }
191            }
192            result.insert("grep".to_string(), Value::String(grep_pattern.to_string()));
193            result.insert("matches".to_string(), matches);
194            return serde_json::to_string_pretty(&Value::Object(result)).unwrap_or_default();
195        }
196
197        // Lines mode
198        if let Some(lines_str) = lines {
199            let m = match LINE_RANGE_RE.captures(lines_str) {
200                Some(m) => m,
201                None => {
202                    return format!(
203                        "Invalid lines format: '{}'. Use 'start-end', e.g. '40-60'.",
204                        lines_str
205                    );
206                }
207            };
208            let start: usize = m[1].parse().unwrap_or(1);
209            let end: usize = m[2].parse().unwrap_or(usize::MAX);
210
211            // Array elements (e.g. comments_middle): interpret lines as item index range
212            if content_is_structured {
213                if let Some(Value::Array(arr)) = content_val {
214                    let from = start.saturating_sub(1);
215                    let to = end.min(arr.len());
216                    let selected: Vec<Value> = arr[from..to].to_vec();
217                    let mut result = serde_json::Map::new();
218                    if let Some(obj) = elem_data.as_object() {
219                        for (k, v) in obj {
220                            if k != "content" {
221                                result.insert(k.clone(), v.clone());
222                            }
223                        }
224                    }
225                    result.insert("content".to_string(), Value::Array(selected));
226                    result.insert(
227                        "items_shown".to_string(),
228                        Value::String(format!("{}-{}", start, to)),
229                    );
230                    result.insert("total_items".to_string(), Value::from(arr.len()));
231                    return serde_json::to_string_pretty(&Value::Object(result))
232                        .unwrap_or_default();
233                }
234            }
235
236            // String elements: interpret lines as text line range
237            let selected: Vec<&str> =
238                content_lines[start.saturating_sub(1)..end.min(content_lines.len())].to_vec();
239            let mut result = serde_json::Map::new();
240            if let Some(obj) = elem_data.as_object() {
241                for (k, v) in obj {
242                    if k != "content" {
243                        result.insert(k.clone(), v.clone());
244                    }
245                }
246            }
247            result.insert("content".to_string(), Value::String(selected.join("\n")));
248            result.insert(
249                "lines_shown".to_string(),
250                Value::String(format!("{}-{}", start, end.min(content_lines.len()))),
251            );
252            return serde_json::to_string_pretty(&Value::Object(result)).unwrap_or_default();
253        }
254
255        // Comment segments: return a TOC (index + author + date + snippet) instead
256        // of dumping the full content. Use lines="1-20" to paginate.
257        let elem_type = elem_data.get("type").and_then(|v| v.as_str()).unwrap_or("");
258        if elem_type == "comment_segment" {
259            if let Some(Value::Array(arr)) = content_val {
260                let toc: Vec<Value> = arr
261                    .iter()
262                    .map(|c| {
263                        let body = c.get("body").and_then(|v| v.as_str()).unwrap_or("");
264                        let snippet: String = body
265                            .chars()
266                            .filter(|ch| !ch.is_control())
267                            .take(80)
268                            .collect();
269                        serde_json::json!({
270                            "_index": c.get("_index"),
271                            "author": c.get("author"),
272                            "created_at": c.get("created_at"),
273                            "author_association": c.get("author_association"),
274                            "snippet": snippet,
275                        })
276                    })
277                    .collect();
278                let result = serde_json::json!({
279                    "element_id": element_id,
280                    "type": elem_type,
281                    "total_comments": arr.len(),
282                    "hint": "Use lines='1-20' to paginate, or grep='pattern' to search.",
283                    "comments": toc,
284                });
285                return serde_json::to_string_pretty(&result).unwrap_or_default();
286            }
287        }
288
289        // Full content
290        serde_json::to_string_pretty(elem_data).unwrap_or_default()
291    }
292
293    /// Fetch a GitHub issue/PR, compact it, and store cache entries.
294    ///
295    /// Releases the GIL during all HTTP and computation (when the
296    /// `python` feature is on). This is the primary entry point for
297    /// fetching discussions with caching, callable from both Python
298    /// and pure Rust.
299    ///
300    /// Every code path returns a status string — invalid-repo, fetch-
301    /// failure, cached-summary, overflow-preview, and full-text are
302    /// all returned as `String`; the return is never a real error
303    /// envelope. Pyo3 wraps the return as a Python `str` when the
304    /// `python` feature is enabled.
305    #[allow(clippy::too_many_arguments)]
306    pub fn fetch_issue(
307        &mut self,
308        repo: &str,
309        number: u64,
310        element_id: Option<&str>,
311        lines: Option<&str>,
312        grep: Option<&str>,
313        context: usize,
314        refresh: bool,
315    ) -> String {
316        // Element retrieval — no network, fast
317        if let Some(eid) = element_id {
318            return self.retrieve(repo, number, eid, lines, grep, context);
319        }
320
321        // Validate repo
322        if let Some(err) = crate::git_refs::validate_repo(repo) {
323            return err;
324        }
325
326        // If cached and not refreshing, return summary of available elements
327        let key = (repo.to_string(), number);
328        if !refresh {
329            if let Some(elements) = self.store.get(&key) {
330                if !elements.is_empty() {
331                    let mut ids: Vec<&String> = elements.keys().collect();
332                    ids.sort();
333                    return format!(
334                        "Cached {}#{} — {} elements available: {}\n\
335                         Use element_id='...' to drill down, or refresh=True to re-fetch.",
336                        repo,
337                        number,
338                        ids.len(),
339                        ids.iter()
340                            .map(|s| s.as_str())
341                            .collect::<Vec<_>>()
342                            .join(", ")
343                    );
344                }
345            }
346        }
347
348        // All HTTP + computation runs in Rust; parallel requests use std::thread::scope
349        let (text, cache_json) = match github::fetch_issue_internal(repo, number) {
350            Ok(r) => r,
351            Err(e) => return e,
352        };
353
354        // Store cache entries
355        if let Some(ref cj) = cache_json {
356            self.store_elements(repo, number, cj);
357        }
358
359        // Overflow guard
360        if text.len() > github::OVERFLOW_LIMIT {
361            let total_lines = text.matches('\n').count() + 1;
362            let overflow = serde_json::json!({
363                "overflow": {
364                    "type": "overflow",
365                    "total_chars": text.len(),
366                    "total_lines": total_lines,
367                    "content": text,
368                }
369            });
370            self.update_elements(
371                repo,
372                number,
373                &serde_json::to_string(&overflow).unwrap_or_default(),
374            );
375            let safe_end = compact::safe_byte_index(&text, github::OVERFLOW_PREVIEW);
376            let mut preview = text[..safe_end].to_string();
377            if let Some(last_nl) = preview.rfind('\n') {
378                if last_nl > 0 {
379                    preview.truncate(last_nl);
380                }
381            }
382            preview.push_str(&format!(
383                "\n\n... [{} chars, {} lines — truncated]\n\
384                 Use element_id='overflow' with lines='N-M' or grep='pattern' \
385                 to explore the full result.",
386                text.len(),
387                total_lines
388            ));
389            return preview;
390        }
391
392        text
393    }
394}
395
396fn grep_lines_internal(text_lines: &[&str], regex: &Regex, context: usize) -> Value {
397    let mut raw: Vec<(usize, usize, usize)> = Vec::new();
398    for (idx, line) in text_lines.iter().enumerate() {
399        if regex.is_match(line) {
400            let start = idx.saturating_sub(context);
401            let end = (idx + context + 1).min(text_lines.len());
402            raw.push((idx + 1, start, end));
403        }
404    }
405
406    struct Group {
407        lines: Vec<usize>,
408        start: usize,
409        end: usize,
410    }
411    let mut groups: Vec<Group> = Vec::new();
412    for (hit_line, start, end) in raw {
413        if let Some(last) = groups.last_mut() {
414            if start <= last.end {
415                last.lines.push(hit_line);
416                last.end = last.end.max(end);
417                continue;
418            }
419        }
420        groups.push(Group {
421            lines: vec![hit_line],
422            start,
423            end,
424        });
425    }
426
427    let result: Vec<Value> = groups
428        .into_iter()
429        .map(|g| {
430            let content = text_lines[g.start..g.end].join("\n");
431            serde_json::json!({
432                "lines": g.lines,
433                "context_start": g.start + 1,
434                "context_end": g.end,
435                "content": content,
436            })
437        })
438        .collect();
439
440    Value::Array(result)
441}
442
443fn grep_json_value(data: &Value, regex: &Regex, context: usize, path: &str) -> Vec<Value> {
444    match data {
445        Value::String(s) => {
446            let text = s.replace("\r\n", "\n");
447            let text_lines: Vec<&str> = text.split('\n').collect();
448            let matches = grep_lines_internal(&text_lines, regex, context);
449            if let Value::Array(arr) = matches {
450                arr.into_iter()
451                    .map(|mut m| {
452                        m["field"] = Value::String(path.to_string());
453                        m
454                    })
455                    .collect()
456            } else {
457                Vec::new()
458            }
459        }
460        Value::Object(map) => {
461            let mut matches = Vec::new();
462            for (key, val) in map {
463                let child = if path.is_empty() {
464                    key.clone()
465                } else {
466                    format!("{}.{}", path, key)
467                };
468                matches.extend(grep_json_value(val, regex, context, &child));
469            }
470            matches
471        }
472        Value::Array(arr) => {
473            let mut matches = Vec::new();
474            for (i, item) in arr.iter().enumerate() {
475                let child = format!("{}[{}]", path, i);
476                let mut item_matches = grep_json_value(item, regex, context, &child);
477
478                // Enrich matches from comment-like objects with metadata
479                if let Value::Object(obj) = item {
480                    if obj.contains_key("author") && obj.contains_key("body") {
481                        for m in &mut item_matches {
482                            if let Some(author) = obj.get("author") {
483                                m["author"] = author.clone();
484                            }
485                            if let Some(date) = obj.get("created_at") {
486                                m["created_at"] = date.clone();
487                            }
488                            if let Some(assoc) = obj.get("author_association") {
489                                m["author_association"] = assoc.clone();
490                            }
491                            if let Some(idx) = obj.get("_index") {
492                                m["comment_index"] = idx.clone();
493                                m["element_id"] =
494                                    Value::String(format!("comment_{}", idx.as_u64().unwrap_or(0)));
495                            }
496                        }
497                    }
498                }
499
500                matches.extend(item_matches);
501            }
502            matches
503        }
504        _ => Vec::new(),
505    }
506}