use anyhow::{Context, Result};
use glob::Pattern;
use jtool_core::Config;
use jtool_jupyter::JupyterClient;
use jtool_notebook::cell::Output;
use jtool_notebook::{Cell, Notebook};
use std::path::Path;
use std::str::FromStr;
use tracing::debug;
use crate::matcher::Matcher;
use crate::types::{GrepOptions, GrepResult, Match, MatchType};
fn should_search_notebook(path: &str, options: &GrepOptions) -> bool {
if let Some(ref pattern_str) = options.glob_pattern
&& let Ok(pattern) = Pattern::new(pattern_str)
&& !pattern.matches(path)
{
return false;
}
if let Some(ref exclude_str) = options.exclude_pattern
&& let Ok(pattern) = Pattern::new(exclude_str)
&& pattern.matches(path)
{
return false;
}
true
}
pub async fn search_notebook(notebook_path: &str, options: &GrepOptions) -> Result<GrepResult> {
debug!("Searching notebook: {}", notebook_path);
if !should_search_notebook(notebook_path, options) {
return Ok(GrepResult::new(notebook_path.to_string()));
}
let notebook = load_notebook(notebook_path).await?;
let matcher = Matcher::new(
&options.pattern,
options.case_insensitive,
options.word_regexp,
options.fixed_strings,
options.only_matching,
options.invert_match,
)?;
let matches = search_notebook_cells(¬ebook, &matcher, options)?;
Ok(GrepResult {
notebook: notebook_path.to_string(),
matches,
})
}
pub async fn search_all_notebooks(
config: &Config,
options: &GrepOptions,
) -> Result<Vec<GrepResult>> {
let matcher = Matcher::new(
&options.pattern,
options.case_insensitive,
options.word_regexp,
options.fixed_strings,
options.only_matching,
options.invert_match,
)?;
let mut all_results = Vec::new();
for server_config in config.servers.values() {
let client = JupyterClient::new(&server_config.url, server_config.token.clone())?;
let mut notebook_paths = Vec::new();
if let Err(e) = collect_notebook_paths(&client, "", &mut notebook_paths).await {
debug!("Failed to collect notebooks from server: {}", e);
continue;
}
for path in notebook_paths {
if !should_search_notebook(&path, options) {
continue;
}
match client.get_contents(&path).await {
Ok(contents) => {
if let Some(content) = contents.content
&& let Ok(nb) = serde_json::from_value::<Notebook>(content)
{
let matches = search_notebook_cells(&nb, &matcher, options)?;
if !matches.is_empty() {
all_results.push(GrepResult {
notebook: path.clone(),
matches,
});
}
}
}
Err(e) => {
debug!("Failed to get notebook {}: {}", path, e);
}
}
}
}
Ok(all_results)
}
async fn load_notebook(notebook_path: &str) -> Result<Notebook> {
if Path::new(notebook_path).exists() {
Notebook::from_file(Path::new(notebook_path)).context("Failed to load notebook from file")
} else {
let config = Config::load().context("Failed to load configuration")?;
let (_, server_config) = config
.get_default_server()
.ok_or_else(|| anyhow::anyhow!("No default server configured"))?;
let client = JupyterClient::new(&server_config.url, server_config.token.clone())
.context("Failed to create Jupyter client")?;
let contents = client
.get_contents(notebook_path)
.await
.context("Failed to get notebook from server")?;
if let Some(content) = contents.content {
Notebook::from_str(&content.to_string()).context("Failed to parse notebook from server")
} else {
anyhow::bail!("Notebook has no content");
}
}
}
fn search_notebook_cells(
notebook: &Notebook,
matcher: &Matcher,
options: &GrepOptions,
) -> Result<Vec<Match>> {
let mut matches = Vec::new();
for cell_index in 0..notebook.cell_count() {
if let Some(max) = options.max_count
&& matches.len() >= max
{
break;
}
let cell = notebook
.get_cell(cell_index)
.context(format!("Failed to get cell {cell_index}"))?;
if options.code_cells_only && !matches!(cell, Cell::Code(_)) {
continue;
}
if options.markdown_cells_only && !matches!(cell, Cell::Markdown(_)) {
continue;
}
if options.raw_cells_only && !matches!(cell, Cell::Raw(_)) {
continue;
}
let execution_count = get_execution_count(cell);
if options.executed_only && execution_count.is_none() {
continue;
}
if options.not_executed_only && execution_count.is_some() {
continue;
}
if options.search_inputs {
search_cell_input(cell, cell_index, matcher, &mut matches, options.max_count)?;
if let Some(max) = options.max_count
&& matches.len() >= max
{
break;
}
}
if options.search_outputs {
search_cell_outputs(
cell,
cell_index,
matcher,
&mut matches,
options.max_count,
options,
)?;
}
}
if options.context_lines.is_some()
|| options.context_before.is_some()
|| options.context_after.is_some()
{
add_context_to_matches(&mut matches, notebook, options)?;
}
Ok(matches)
}
fn search_cell_input(
cell: &Cell,
cell_index: usize,
matcher: &Matcher,
matches: &mut Vec<Match>,
max_count: Option<usize>,
) -> Result<()> {
let source = cell.source().as_str();
let execution_count = get_execution_count(cell);
for (line_index, line) in source.lines().enumerate() {
if let Some(max) = max_count
&& matches.len() >= max
{
break;
}
if let Some((matched_text, _, _)) = matcher.find(line) {
matches.push(Match::new(
cell_index,
execution_count,
MatchType::Input,
line_index,
line.to_string(),
matched_text.to_string(),
));
}
}
Ok(())
}
fn search_cell_outputs(
cell: &Cell,
cell_index: usize,
matcher: &Matcher,
matches: &mut Vec<Match>,
max_count: Option<usize>,
options: &GrepOptions,
) -> Result<()> {
if let Cell::Code(code_cell) = cell {
for output in &code_cell.outputs {
if let Some(max) = max_count
&& matches.len() >= max
{
break;
}
let should_search = match output {
Output::Stream { .. } => !options.error_output_only && !options.result_output_only,
Output::Error { .. } => !options.stream_output_only && !options.result_output_only,
Output::ExecuteResult { .. } | Output::DisplayData { .. } => {
!options.stream_output_only && !options.error_output_only
}
};
if !should_search {
continue;
}
let output_text = extract_output_text(output);
for (line_index, line) in output_text.lines().enumerate() {
if let Some(max) = max_count
&& matches.len() >= max
{
break;
}
if let Some((matched_text, _, _)) = matcher.find(line) {
matches.push(Match::new(
cell_index,
code_cell.execution_count,
MatchType::Output,
line_index,
line.to_string(),
matched_text.to_string(),
));
}
}
}
}
Ok(())
}
fn extract_output_text(output: &Output) -> String {
match output {
Output::Stream { text, .. } => text.as_str(),
Output::ExecuteResult { data, .. } | Output::DisplayData { data, .. } => {
data.get("text/plain")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string()
}
Output::Error {
evalue, traceback, ..
} => {
let mut text = evalue.clone();
text.push('\n');
for line in traceback {
text.push_str(line);
text.push('\n');
}
text
}
}
}
fn get_execution_count(cell: &Cell) -> Option<i32> {
match cell {
Cell::Code(code_cell) => code_cell.execution_count,
_ => None,
}
}
fn add_context_to_matches(
matches: &mut [Match],
notebook: &Notebook,
options: &GrepOptions,
) -> Result<()> {
let (before, after) = if let Some(context) = options.context_lines {
(context, context)
} else {
(
options.context_before.unwrap_or(0),
options.context_after.unwrap_or(0),
)
};
for m in matches.iter_mut() {
let cell = notebook.get_cell(m.cell_index)?;
let lines: Vec<String> = if m.match_type == MatchType::Input {
cell.source()
.as_str()
.lines()
.map(|s| s.to_string())
.collect()
} else {
if let Cell::Code(code_cell) = cell {
let mut output_lines = Vec::new();
for output in &code_cell.outputs {
let text = extract_output_text(output);
output_lines.extend(text.lines().map(|s| s.to_string()));
}
output_lines
} else {
Vec::new()
}
};
let start = m.line_index.saturating_sub(before);
for i in start..m.line_index {
if let Some(line) = lines.get(i) {
m.context_before.push(line.clone());
}
}
let end = std::cmp::min(m.line_index + after + 1, lines.len());
for i in (m.line_index + 1)..end {
if let Some(line) = lines.get(i) {
m.context_after.push(line.clone());
}
}
}
Ok(())
}
fn collect_notebook_paths<'a>(
client: &'a JupyterClient,
path: &'a str,
paths: &'a mut Vec<String>,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + 'a>> {
Box::pin(async move {
let contents = client.get_contents(path).await?;
if contents.r#type == "directory"
&& let Some(content) = &contents.content
&& let Some(items) = content.as_array()
{
for item in items {
if let Ok(item_info) =
serde_json::from_value::<jtool_jupyter::models::ContentsInfo>(item.clone())
{
match item_info.r#type.as_str() {
"notebook" => {
paths.push(item_info.path.clone());
}
"directory" => {
collect_notebook_paths(client, &item_info.path, paths).await?;
}
_ => {}
}
}
}
}
Ok(())
})
}