rem_extract/extract/
extraction.rs

1use std::{
2    fs,
3    io::{
4        self,
5        ErrorKind
6    },
7    path::PathBuf, sync::atomic::AtomicU32,
8    sync::atomic::Ordering,
9};
10
11use proc_macro2::Span;
12use syn::spanned::Spanned;
13use syn::visit::Visit;
14
15use triomphe::Arc;
16
17use ra_ap_hir::{
18    CfgOptions,
19    Semantics,
20};
21
22use ra_ap_ide_db::{
23    EditionedFileId,
24    base_db::{CrateOrigin, CrateWorkspaceData, Env},
25    ChangeWithProcMacros,
26};
27use ra_ap_project_model::{
28    CargoConfig,
29    ProjectWorkspace,
30    ProjectManifest,
31};
32
33use ra_ap_ide::{
34    Edition,
35    Analysis,
36    AnalysisHost,
37    AssistConfig,
38    AssistResolveStrategy,
39    TextSize,
40    SourceRoot,
41};
42
43use ra_ap_base_db::CrateGraph;
44
45use ra_ap_syntax::{
46    algo,
47    ast::HasName,
48    AstNode,
49    SourceFile
50};
51
52use ra_ap_ide_assists::Assist;
53
54use ra_ap_vfs::{
55    AbsPathBuf,
56    VfsPath,
57    file_set::FileSet,
58    FileId as VfsFileId,
59};
60
61use crate::{
62    error::ExtractionError,
63    extract::extraction_utils::{
64        apply_edits, apply_extract_function, check_braces, check_comment, convert_to_abs_path_buf, filter_extract_function_assist, fixup_controlflow, generate_frange, generate_frange_from_fileid, get_assists, get_cargo_config, get_cargo_toml, get_manifest_dir, load_project_manifest, load_project_workspace, load_workspace_data, rename_function, trim_range
65    }, startup::identify::add_sysroot_deps,
66};
67
68use crate::startup;
69
70use rem_interface::metrics as mx;
71
72#[derive(Debug, PartialEq, Clone)]
73pub struct ExtractionInput {
74    pub file_path: String,
75    pub new_fn_name: String,
76    pub start_idx: u32,
77    pub end_idx: u32,
78}
79
80impl ExtractionInput {
81    pub fn new(
82        file_path: &str,
83        new_fn_name: &str,
84        start_idx: u32,
85        end_idx: u32,
86    ) -> Self { ExtractionInput {
87            file_path: file_path.to_string(),
88            new_fn_name: new_fn_name.to_string(),
89            start_idx,
90            end_idx,
91        }
92    }
93
94    #[allow(dead_code)]
95    pub fn new_absolute(
96        file_path: &str,
97        new_fn_name: &str,
98        start_idx: u32,
99        end_idx: u32,
100    ) -> Self { ExtractionInput {
101            file_path: convert_to_abs_path_buf(file_path).unwrap().as_str().to_string(),
102            new_fn_name: new_fn_name.to_string(),
103            start_idx,
104            end_idx,
105        }
106    }
107}
108
109// ========================================
110// Checks for the validity of the input
111// ========================================
112
113// Check if the file exists and is readable
114fn check_file_exists(file_path: &str) -> Result<(), ExtractionError> {
115    if fs::metadata(file_path).is_err() {
116        return Err(ExtractionError::Io(io::Error::new(
117            ErrorKind::NotFound,
118            format!("File not found: {}", file_path),
119        )));
120    }
121    Ok(())
122}
123
124// Check if the idx pair is valid
125fn check_idx(input: &ExtractionInput) -> Result<(), ExtractionError> {
126    if input.start_idx == input.end_idx {
127        return Err(ExtractionError::SameIdx);
128    } else if input.start_idx > input.end_idx {
129        return Err(ExtractionError::InvalidIdxPair);
130    }
131    if input.start_idx == 0 {
132        return Err(ExtractionError::InvalidStartIdx);
133    }
134    if input.end_idx == 0 {
135        return Err(ExtractionError::InvalidEndIdx);
136    }
137    Ok(())
138}
139
140fn verify_input(input: &ExtractionInput) -> Result<(), ExtractionError> {
141    // Execute each input validation step one by one
142    check_file_exists(&input.file_path)?;
143    check_idx(input)?;
144
145    Ok(())
146}
147
148pub fn extract_method_file(input: ExtractionInput) -> Result<(String, String), ExtractionError> {
149    mx::mark("Extraction Start");
150
151    // Extract the struct information
152    let input_path: &str = &input.file_path;
153    let callee_name: &str = &input.new_fn_name;
154    let start_idx: u32 = input.start_idx;
155    let end_idx: u32 = input.end_idx;
156
157    let text: String = fs::read_to_string(input_path).unwrap();
158
159    // Verify the input data
160    verify_input(&input)?;
161
162    mx::mark("Load the analysis");
163
164    // let (analysis,file_id) = analysis_from_single_file_no_std( text.clone() );
165    let (analysis, file_id) = analysis_from_single_file_std( text.clone() );
166
167    mx::mark("Analysis Loaded");
168
169    let assist_config: AssistConfig = super::extraction_utils::generate_assist_config();
170    let diagnostics_config = super::extraction_utils::generate_diagnostics_config();
171    let resolve: AssistResolveStrategy = super::extraction_utils::generate_resolve_strategy();
172    let range: (u32, u32) = (start_idx, end_idx);
173
174    let frange = generate_frange_from_fileid(file_id, range);
175
176    mx::mark("Get the assists");
177
178    let assists: Vec<Assist> = analysis.assists_with_fixes(
179        &assist_config,
180        &diagnostics_config,
181        resolve,
182        frange
183    ).unwrap();
184
185    mx::mark("Filter for extract function assist");
186
187    let assist: Assist = filter_extract_function_assist( assists )?;
188
189    mx::mark("Apply extract function assist");
190
191    let src_change = assist.source_change
192        .as_ref()
193        .unwrap()
194        .clone();
195
196    let (text_edit, maybe_snippet_edit) =
197        src_change.get_source_and_snippet_edit(
198            file_id,
199        ).unwrap();
200
201    let edited_text: String = apply_edits(
202        text.clone(),
203        text_edit.clone(),
204        maybe_snippet_edit.clone(),
205    );
206
207    let renamed_text: String = rename_function(
208        edited_text,
209        "fun_name",
210        callee_name,
211    );
212
213    // Ensure that the output file imports std::ops::ControlFlow if it uses it
214    let fixed_cf_text: String = fixup_controlflow( renamed_text );
215
216    mx::mark("Extraction End");
217
218    let parent_method: String = parent_method_from_text(
219        text,
220        &range,
221    );
222
223    Ok( (fixed_cf_text, parent_method) )
224}
225
226/// Function to extract the code segment based on cursor positions
227/// If successful, returns the `String` of the output code, followed by a
228/// `String` of the caller method
229pub fn extract_method(input: ExtractionInput) -> Result<(String, String), ExtractionError> {
230
231    mx::mark("Extraction Start");
232
233    // Extract the struct information
234    let input_path: &str = &input.file_path;
235    let callee_name: &str = &input.new_fn_name;
236    let start_idx: u32 = input.start_idx;
237    let end_idx: u32 = input.end_idx;
238
239    // Convert the input path to an `AbsPathBuf`
240    let input_abs_path: AbsPathBuf = convert_to_abs_path_buf(input_path).unwrap();
241
242    // Verify the input data
243    verify_input(&input)?;
244
245    let manifest_dir: PathBuf = get_manifest_dir(
246        &PathBuf::from(input_abs_path.as_str())
247    )?;
248    let cargo_toml: AbsPathBuf = get_cargo_toml( &manifest_dir );
249    // println!("Cargo.toml {:?}", cargo_toml);
250
251    mx::mark("Load the project workspace");
252
253    let project_manifest: ProjectManifest = load_project_manifest( &cargo_toml );
254    // println!("Project Manifest {:?}", project_manifest);
255
256    // MARKER: Load the cargo config
257    mx::mark("Load the cargo config");
258
259    let cargo_config: CargoConfig = get_cargo_config( &project_manifest );
260    // println!("Cargo Config {:?}", cargo_config);
261
262    // MARKER: Load the project workspace
263    mx::mark("Load the project workspace");
264
265    let workspace: ProjectWorkspace = load_project_workspace( &project_manifest, &cargo_config );
266    // println!("Project Workspace {:?}", workspace);
267
268    // MARKER: Load the analysis database and VFS
269    mx::mark("Load the analysis database and VFS");
270
271    let (db, vfs) = load_workspace_data(workspace, &cargo_config);
272
273    // Parse the cursor positions into the range
274    let range_: (u32, u32) = (
275        start_idx,
276        end_idx,
277    );
278
279    // MARKER: Database Loaded
280    mx::mark("Database Loaded");
281
282    // Before we go too far, lets do few more quick checks now that we have the
283    // analysis
284    // 1. Check if the function to extract is not just a comment
285    // 2. Check if the function to extract has matching braces
286    // 3. Convert the range to a trimmed range.
287    let sema: Semantics<'_, ra_ap_ide::RootDatabase> = Semantics::new( &db );
288    let frange_: ra_ap_hir::FileRangeWrapper<ra_ap_vfs::FileId> = generate_frange( &input_abs_path, &vfs, range_.clone() );
289    let edition: EditionedFileId = EditionedFileId::current_edition( frange_.file_id );
290    let source_file: SourceFile = sema.parse( edition );
291    let range: (u32, u32) = trim_range( &source_file, &range_ );
292    check_comment( &source_file, &range )?;
293    check_braces( &source_file, &range )?;
294
295    // MARKER: Run the analysis
296    mx::mark("Run the analysis");
297
298    // let analysis_host: AnalysisHost = AnalysisHost::with_database( db );
299    // let analysis: Analysis = run_analysis( analysis_host );
300
301    // MARKER: Get the assists and filter for extract function assist
302    mx::mark("Get the assists");
303
304    let assists: Vec<Assist> = get_assists( &db, &vfs, &input_abs_path, range );
305
306    // mx::mark("1");
307    // let assists_2: Vec<Assist> = get_assists(&analysis, &vfs, &input_abs_path, range);
308
309    mx::mark("Filter for extract function assist");
310
311    let assist: Assist = filter_extract_function_assist( assists )?;
312
313    mx::mark("Apply extract function assist");
314
315    let modified_code: String = apply_extract_function(
316        &assist,
317        &input_abs_path,
318        &vfs,
319        &callee_name,
320    )?;
321
322    mx::mark("Get parent method");
323
324    let parent_method: String = parent_method(
325        &source_file,
326        range,
327    )?;
328
329    // MARKER: Extraction End
330    mx::mark("Extraction End");
331
332    Ok( (modified_code, parent_method) )
333}
334
335/// Constructs an analysis from the text of a single file
336/// Returns the Analysis object and the FileId of the file (which is just zero),
337/// but needed later down the line
338fn analysis_from_single_file_no_std(
339    src: String
340) -> (Analysis, VfsFileId) {
341    // Create a single "virtual" file and systemm
342    let mut files = FileSet::default();
343    let file_id = ra_ap_vfs::FileId::from_raw(0);
344    let path = VfsPath::new_virtual_path("/main.rs".to_owned());
345    files.insert(file_id, path);
346
347    // Build out the crate graph for that file
348    let mut config = CfgOptions::default();
349    config.insert_atom(ra_ap_hir::sym::test.clone()); // Probably not needed but enables cfg(test)
350
351    let mut graph = CrateGraph::default();
352    graph.add_crate_root(
353        file_id,
354        ra_ap_ide::Edition::CURRENT,
355        None,
356        None,
357        Arc::new(config.clone()),
358        None,
359        Env::default(),
360        false,
361        CrateOrigin::Local { repo: None, name: None},
362    );
363
364    // Prepare the workspace for this "crate"
365    let shared_ws = Arc::new(CrateWorkspaceData {
366        proc_macro_cwd: None,
367        data_layout: Err("There is no data layout for a single file analysis".into()),
368        toolchain: None,
369    });
370
371    let workspace = graph
372        .iter()
373        .map(|crate_id| (crate_id, shared_ws.clone()))
374        .collect();
375
376    // Describe the change to the host
377    let mut change = ChangeWithProcMacros::new();
378    let root = SourceRoot::new_local(files);
379    change.set_roots(vec![root]);
380    change.change_file(file_id, Some(src));
381    change.set_crate_graph(graph, workspace);
382
383    // Create the change that instantiates the analysis
384    let mut analysis = AnalysisHost::default();
385    analysis.apply_change(change);
386    (analysis.analysis(), file_id)
387
388}
389
390/// Constructs the analysis from a single file. Imports the standard library and
391/// core into the crate graph of the analysis.
392pub fn analysis_from_single_file_std(
393    src: String
394) -> (Analysis, VfsFileId) {
395    // 1) Grab the cached sysroot context
396    let ctx = startup::single_file_std_context();
397
398
399    // 2) Clone the base graph that already has core/std/etc.
400    let file_id: VfsFileId = alloc_vfs_file_id();
401    let mut graph: CrateGraph = ctx.base_graph.clone();
402
403    // 3) Add a crate rooted at this file.
404    let mut cfg: CfgOptions = CfgOptions::default();
405    cfg.insert_atom(ra_ap_hir::sym::test.clone());
406
407    let my_crate = graph.add_crate_root(
408        file_id,
409        Edition::CURRENT,
410        None,           // no display name override
411        None,           // no cfg_explicitly_set
412        Arc::new(cfg),  // cfg options
413        None,           // no out-dir
414        Env::default(), // empty env
415        false,          // is_proc_macro
416        CrateOrigin::Local { repo: None, name: None },
417    );
418
419    // 2) Clone the base graph that already has core/std/etc.
420    let mut graph = ctx.base_graph.clone();
421
422    // 3) Add a crate rooted at this file.
423    let mut cfg = CfgOptions::default();
424    cfg.insert_atom(ra_ap_hir::sym::test.clone());
425
426    let my_crate = graph.add_crate_root(
427        file_id,
428        Edition::CURRENT,
429        None,
430        None,
431        Arc::new(cfg),
432        None,
433        Env::default(),
434        false,
435        CrateOrigin::Local { repo: None, name: None },
436    );
437
438    // 4) Wire this crate to core/std.
439    add_sysroot_deps(&mut graph, my_crate);
440
441    // 5) Workspace data for all crates.
442    let ws_data =
443        startup::identify::build_ws_data(&graph);
444
445    // 6) Build source roots:
446    //    - local root for /main.rs
447    //    - library root for all sysroot files
448    let mut local_files = FileSet::default();
449    local_files.insert(file_id, VfsPath::new_virtual_path("/main.rs".to_owned()));
450    let local_root = SourceRoot::new_local(local_files);
451
452    let sysroot_files = ctx.sysroot_files.to_file_set();
453    let sysroot_root = SourceRoot::new_library(sysroot_files);
454
455    // 7) Build change: roots + crate graph + file contents.
456    let mut change = ChangeWithProcMacros::new();
457    change.set_roots(vec![local_root, sysroot_root]);
458    change.set_crate_graph(graph, ws_data);
459
460        // 7a) Set text for the sysroot files.
461    for (abs_path, id) in ctx.sysroot_files.entries() {
462        // Best-effort I/O; if it fails, log and skip.
463        match fs::read_to_string(abs_path.as_path()) {
464            Ok(text) => {
465                change.change_file(*id, Some(text));
466            }
467            Err(err) => {
468                eprintln!("warn: failed to read sysroot file {:?}: {err}", abs_path);
469            }
470        }
471    }
472
473    // 7b) Set text for our single file.
474    change.change_file(file_id, Some(src));
475
476    // 8) Host + analysis.
477    let mut host = AnalysisHost::default();
478    host.apply_change(change);
479    (host.analysis(), file_id)
480
481}
482
483static NEXT_VFS_FILE_ID: AtomicU32 = AtomicU32::new(1_000_000);
484
485fn alloc_vfs_file_id() -> VfsFileId {
486    let raw = NEXT_VFS_FILE_ID.fetch_add(1, Ordering::Relaxed);
487    VfsFileId::from_raw(raw)
488}
489
490/// Gets the caller method, based on the input code and the cursor positions
491/// If successful, returns the `String` of the caller method
492/// If unsuccessful, returns an `ExtractionError`
493pub fn parent_method(
494    source_file: &SourceFile,
495    range: (u32, u32),
496) -> Result<String, ExtractionError> {
497    let start: TextSize = TextSize::new(range.0);
498
499    // We want the last function that occurs before the start of the range
500    let node: Option<ra_ap_syntax::ast::Fn> = algo::find_node_at_offset::<ra_ap_syntax::ast::Fn>(
501        source_file.syntax(),
502        start,
503    );
504
505    let fn_name: String = match node {
506        Some(n) => n.name().map_or("".to_string(), |name| name.text().to_string()),
507        None => "".to_string(),
508    };
509
510    if fn_name.is_empty() {
511        return Err(ExtractionError::ParentMethodNotFound);
512    }
513
514    Ok( fn_name.trim().to_string() )
515}
516
517/// Return the name of the function/method that contains the given [start, end)
518/// byte range in `text`. Returns empty string if none found.
519///
520/// NOTE: Requires `proc-macro2` with the "span-locations" feature enabled.
521pub fn parent_method_from_text(text: String, range: &(u32, u32)) -> String {
522    let Ok(file) = syn::parse_file(&text) else {
523        return String::new();
524    };
525
526    let line_starts = compute_line_starts(&text);
527    let selection = (range.0 as usize, range.1 as usize);
528
529    let mut visitor = FnCollector {
530        text: &text,
531        line_starts: &line_starts,
532        fns: Vec::new(),
533    };
534    visitor.visit_file(&file);
535
536    // Find the *innermost* function that contains the selection.
537    let mut best: Option<(&str, usize, usize)> = None;
538    for (name, start, end) in visitor.fns {
539        if contains((start, end), selection) {
540            match best {
541                None => best = Some((name, start, end)),
542                Some((_, b_start, b_end)) => {
543                    if (end - start) < (b_end - b_start) {
544                        best = Some((name, start, end));
545                    }
546                }
547            }
548        }
549    }
550
551    best.map(|(name, _, _)| name.to_string()).unwrap_or_default()
552}
553
554/// Collect function spans (name, start_byte, end_byte).
555struct FnCollector<'a> {
556    text: &'a str,
557    line_starts: &'a [usize],
558    fns: Vec<(&'a str, usize, usize)>,
559}
560
561impl<'a, 'ast> Visit<'ast> for FnCollector<'a> {
562    fn visit_item_fn(&mut self, node: &'ast syn::ItemFn) {
563        // Free function
564        let name = node.sig.ident.to_string();
565        let (start, end) = span_to_offsets(node.block.span(), self.line_starts, self.text);
566        self.fns.push((self.leak(name), start, end));
567        // Recurse into the function in case there are nested modules, etc.
568        syn::visit::visit_item_fn(self, node);
569    }
570
571    fn visit_item_impl(&mut self, node: &'ast syn::ItemImpl) {
572        for item in &node.items {
573            if let syn::ImplItem::Fn(m) = item {
574                let name = m.sig.ident.to_string();
575                let (start, end) = span_to_offsets(m.block.span(), self.line_starts, self.text);
576                self.fns.push((self.leak(name), start, end));
577            }
578        }
579        syn::visit::visit_item_impl(self, node);
580    }
581
582    fn visit_item_trait(&mut self, node: &'ast syn::ItemTrait) {
583        for item in &node.items {
584            if let syn::TraitItem::Fn(f) = item {
585                if let Some(block) = &f.default {
586                    let name = f.sig.ident.to_string();
587                    let (start, end) = span_to_offsets(block.span(), self.line_starts, self.text);
588                    self.fns.push((self.leak(name), start, end));
589                }
590            }
591        }
592        syn::visit::visit_item_trait(self, node);
593    }
594
595    fn visit_item_mod(&mut self, node: &'ast syn::ItemMod) {
596        // For inline modules (`mod m { ... }`) the content is present; recurse.
597        if let Some((_brace, items)) = &node.content {
598            for it in items {
599                self.visit_item(it);
600            }
601        }
602        // For `mod m;` (file modules) we can't see into another file from this text.
603    }
604}
605
606impl<'a> FnCollector<'a> {
607    /// Leak a `String` into a `'static` str so we can store &str in self.fns without lifetimes hell.
608    /// This is fine for short-lived analysis in a tool; if you prefer, store `String` instead.
609    fn leak(&self, s: String) -> &'static str {
610        Box::leak(s.into_boxed_str())
611    }
612}
613
614/// Compute the starting byte offset of each line (1-based line numbers).
615fn compute_line_starts(text: &str) -> Vec<usize> {
616    let mut starts = vec![0]; // line 1 starts at 0
617    for (i, b) in text.bytes().enumerate() {
618        if b == b'\n' {
619            starts.push(i + 1);
620        }
621    }
622    starts
623}
624
625/// Convert a Span to byte start/end offsets within `text`.
626///
627/// This relies on proc_macro2's "span-locations" to get (line, column).
628fn span_to_offsets(span: Span, line_starts: &[usize], text: &str) -> (usize, usize) {
629    let start = span.start();
630    let end = span.end();
631
632    // Line numbers are 1-based; columns are (effectively) byte offsets within the line.
633    let start_off = lc_to_offset(start.line, start.column, line_starts, text);
634    let end_off = lc_to_offset(end.line, end.column, line_starts, text);
635
636    (start_off.min(text.len()), end_off.min(text.len()))
637}
638
639fn lc_to_offset(line: usize, column: usize, line_starts: &[usize], text: &str) -> usize {
640    if line == 0 || line > line_starts.len() {
641        return text.len();
642    }
643    let base = line_starts[line - 1];
644    base.saturating_add(column)
645}
646
647fn contains(outer: (usize, usize), inner: (usize, usize)) -> bool {
648    let (o_start, o_end) = outer;
649    let (i_start, i_end) = inner;
650    o_start <= i_start && i_end <= o_end && i_start <= i_end
651}