owl-ms-language-server 0.12.0

use crate::catalog::CatalogUri;
use crate::consts::{get_fixed_infos, keyword_hover_info};
use crate::error::{Error, Result, ResultExt, ResultIterator};
use crate::pos::Position;
use crate::queries::{
    self, treesitter_highlight_capture_into_semantic_token_type_index, NODE_TYPES,
};
use crate::web::{url_to_filename, HttpClient};
use crate::{
    catalog::Catalog, debugging::timeit, queries::ALL_QUERIES, range::Range,
    rope_provider::RopeProvider, LANGUAGE,
};
use cached::proc_macro::cached;
use horned_owl::model::Component::{self, AnnotationAssertion};
use horned_owl::model::{
    AnnotationProperty, AnnotationSubject, AnnotationValue, ArcStr, Build, DataProperty, Datatype,
    DeclareAnnotationProperty, DeclareClass, DeclareDataProperty, DeclareDatatype,
    DeclareNamedIndividual, DeclareObjectProperty, Literal, NamedIndividual, ObjectProperty,
};
use horned_owl::ontology::set::SetOntology;
use itertools::Itertools;
use log::{debug, error, info, trace, warn};
use pretty::RcDoc;
use ropey::Rope;
use sophia::api::graph::{Graph, MutableGraph};
use sophia::api::ns::Namespace;
use sophia::api::prelude::Any;
use sophia::api::source::TripleSource;
use sophia::api::term::{BnodeId, LanguageTag, SimpleTerm, Term};
use sophia::api::MownStr;
use sophia::inmem::graph::LightGraph;
use sophia::iri::resolve::Oxiri;
use sophia::iri::IriRef;
use std::fmt::Debug;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::iter::once;
use std::path::Path;
use std::sync::{Arc, LazyLock, Mutex, MutexGuard};
use std::time::{Duration, SystemTime};
use std::{
    collections::{HashMap, HashSet},
    fmt::Display,
    fs,
    path::PathBuf,
};
use tokio::task::JoinHandle;
use tower_lsp::lsp_types::{
    self, DiagnosticSeverity, DidChangeTextDocumentParams, InlayHint, InlayHintLabel,
    PositionEncodingKind, SemanticToken, SymbolKind, Url, WorkspaceFolder,
};
use tree_sitter_c2rust::{InputEdit, Node, Parser, Query, QueryCursor, StreamingIterator, Tree};

static GLOBAL_PARSER: LazyLock<Mutex<Parser>> = LazyLock::new(|| {
    let mut parser = Parser::new();
    parser
        .set_language(&LANGUAGE)
        .expect("the language to be valid");
    parser.set_logger(Some(Box::new(|type_, str| match type_ {
        tree_sitter_c2rust::LogType::Parse => trace!(target: "tree-sitter-parse", "{str}"),
        tree_sitter_c2rust::LogType::Lex => trace!(target: "tree-sitter-lex", "{str}"),
    })));

    Mutex::new(parser)
});

pub fn lock_global_parser() -> MutexGuard<'static, Parser> {
    (*GLOBAL_PARSER)
        .lock()
        .expect("the parser should not panic")
}

static GLOBAL_BUILD_ARC: LazyLock<Mutex<Build<ArcStr>>> = LazyLock::new(|| {
    let build = Build::new_arc();
    Mutex::new(build)
});

pub fn lock_global_build_arc() -> MutexGuard<'static, Build<ArcStr>> {
    (*GLOBAL_BUILD_ARC)
        .lock()
        .expect("the horned owl builder should not panic")
}

/// Document container
#[derive(Debug)]
pub struct Workspace {
    /// Maps a Path/URL to a document that can be internal or external
    internal_documents: HashMap<PathBuf, InternalDocument>,
    external_documents: HashMap<Url, ExternalDocument>,
    folder: WorkspaceFolder,
    catalogs: Vec<Catalog>,
    // TODO remove pub
    pub index_handles: Vec<JoinHandle<()>>,
}

impl Display for Workspace {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Workspace {} at {}", self.folder.name, self.folder.uri)
    }
}

impl Workspace {
    pub fn new(workspace_folder: WorkspaceFolder) -> Self {
        let catalogs = Catalog::load_catalogs_recursive(&workspace_folder.uri);
        info!(
            "New workspace {} at {} with catalogs {catalogs:?}",
            workspace_folder.name, workspace_folder.uri
        );
        Workspace {
            internal_documents: HashMap::new(),
            external_documents: HashMap::new(),
            folder: workspace_folder,
            catalogs,
            index_handles: Vec::new(),
        }
    }

    /// Inserts an internal document into the workspace and returns a reference to it.
    /// This will replace the document with the same URL if there was one.
    pub fn insert_internal_document(&mut self, document: InternalDocument) -> &InternalDocument {
        debug!("Insert internal document {document}");
        let path = document.path.clone();
        self.internal_documents.insert(path.clone(), document);
        self.internal_documents
            .get(&path)
            .expect("document should be present")
    }

    pub fn get_internal_document(&self, path: &Path) -> Result<&InternalDocument> {
        self.internal_documents
            .get(path)
            .ok_or(Error::InternalDocumentNotFound(path.to_path_buf()))
    }

    pub fn take_internal_document(&mut self, path: &Path) -> Result<InternalDocument> {
        self.internal_documents
            .remove(path)
            .ok_or(Error::InternalDocumentNotFound(path.to_path_buf()))
    }

    pub fn contains_internal_document(&self, path: &Path) -> bool {
        self.internal_documents.contains_key(path)
    }

    pub fn internal_documents(
        &self,
    ) -> std::collections::hash_map::Values<'_, PathBuf, InternalDocument> {
        self.internal_documents.values()
    }

    pub fn insert_external_document(&mut self, document: ExternalDocument) -> &ExternalDocument {
        debug!(
            "Insert external document {} length is {} into workspace at {}",
            document.uri,
            document.text.len(),
            self.folder.uri
        );
        let uri = document.uri.clone();
        self.external_documents.insert(uri.clone(), document);
        self.external_documents
            .get(&uri)
            .expect("external document should exist")
    }

    #[cfg(test)]
    pub fn external_documents(
        &self,
    ) -> std::collections::hash_map::Values<'_, Url, ExternalDocument> {
        self.external_documents.values()
    }

    pub fn catalog_contains_url(&self, url: &Url) -> bool {
        self.catalogs.iter().any(|catalog| {
            match &url
                .to_file_path()
                .inspect_err(|()| error!("Url is not a filepath {url}"))
            {
                Ok(path) => catalog.contains(path),
                Err(()) => false,
            }
        })
    }

    pub fn workspace_folder_is_base_of_url(&self, url: &Url) -> bool {
        self.folder.uri.make_relative(url).is_some()
    }

    pub fn all_frame_infos(&self) -> impl Iterator<Item = &FrameInfo> {
        self.internal_documents()
            .flat_map(InternalDocument::all_frame_infos)
    }

    /// Returns the path for the cache folder
    pub fn shared_cache_folder_path(&self) -> PathBuf {
        if let Some(dir) = dirs::cache_dir() {
            // Well all projects can even share a cache dir
            dir.join("owl-ms-language-server")
        } else {
            // If the cache folder can not be accessed then lets just use a local folder
            self.folder
                .uri
                .to_file_path()
                .expect("Workspace folder url should be file path")
                .join(".owl")
        }
    }

    // TODO #28 maybe return a reference?
    /// This searches in the frames of internal documents (case-insensitive)
    pub fn search_frame(&self, partial_text: &str) -> Vec<(String, Iri, FrameInfo)> {
        let partial_lower = partial_text.to_lowercase();
        self.internal_documents
            .values()
            .flat_map(|doc| {
                doc.all_frame_infos()
                    .filter_map(|item| {
                        if item.iri.to_lowercase().contains(&partial_lower) {
                            Some((item.iri.clone(), item.iri.clone(), item.clone()))
                        } else {
                            item.annotations
                                .values()
                                .find_map(|values| {
                                    values.iter().find(|value| {
                                        value.to_lowercase().starts_with(&partial_lower)
                                    })
                                })
                                .map(|full| (full.clone(), item.iri.clone(), item.clone()))
                        }
                    })
                    .collect_vec()
            })
            .collect_vec()

        // TODO search in external frames
    }

    /// This finds a frame info in the internal and external documents.
    ///
    /// - `iri` should be a full iri
    pub fn get_frame_info(&self, iri: &Iri) -> Option<FrameInfo> {
        debug!(
            "getting workspace frame info for {iri} on {}",
            self.folder.uri
        );
        let external_infos = self
            .external_documents
            .values()
            .filter_map(|doc| doc.get_frame_info(iri));

        let internal_infos = self
            .internal_documents
            .values()
            .filter_map(|dm| dm.frame_info_by_iri(iri));

        internal_infos
            .chain(external_infos)
            .chain(get_fixed_infos(iri))
            .tree_reduce(FrameInfo::merge)
    }

    /// This no longer uses a document. Because the reachable documents would be callec way to often. Now it just takes the slice of reachable documents directly. Generate it using `reachable_docs_recursive`.
    pub fn get_frame_info_recursive(
        workspace: &Workspace,
        iri: &Iri,
        reachable_docs: &[Url],
    ) -> Option<FrameInfo> {
        reachable_docs
            .iter()
            .filter_map(|url| {
                if let Some(doc) = workspace.document_by_url(url) {
                    match &doc {
                        DocumentReference::Internal(doc) => doc.frame_info_by_iri(iri),
                        DocumentReference::External(doc) => doc.get_frame_info(iri),
                    }
                } else {
                    None
                }
            })
            .chain(get_fixed_infos(iri))
            .tree_reduce(FrameInfo::merge)
    }

    pub fn node_info(&self, node: &Node, doc: &InternalDocument) -> String {
        match node.kind() {
            "class_frame" | "annotation_property_frame" | "class_iri" => {
                // Goto first named child and repeat
                if let Some(iri_node) = &node.named_child(0) {
                    self.node_info(iri_node, doc)
                } else {
                    "Class Frame\nNo iri was found".to_string()
                }
            }
            "full_iri" => {
                let iri = trim_full_iri(node_text(node, doc.rope()));

                self.get_frame_info(&iri)
                    .map(|fi| fi.info_display(self))
                    .unwrap_or(iri)
            }
            "simple_iri" | "abbreviated_iri" => {
                let iri = node_text(node, doc.rope());
                debug!("Getting node info for {iri} at doc {}", doc.uri);
                let iri = doc
                    .abbreviated_iri_to_full_iri(&iri)
                    .unwrap_or(iri.to_string());
                self.get_frame_info(&iri)
                    .map(|fi| fi.info_display(self))
                    .unwrap_or(iri)
            }
            kind => keyword_hover_info(kind),
        }
    }

    pub fn find_catalog_uri(&self, url: &Url) -> Option<(&Catalog, &CatalogUri)> {
        let url_string = url.to_string();

        for catalog in &self.catalogs {
            for catalog_uri in catalog.all_catalog_uris() {
                if catalog_uri.name == url_string {
                    return Some((catalog, catalog_uri));
                }
            }
        }
        None
    }
    /// Does no loading. Just returns the document when found.
    pub fn document_by_url(&'_ self, url: &Url) -> Option<DocumentReference<'_>> {
        if let Some(path) = self.url_to_path_with_catalog(url) {
            if let Some(doc) = self.internal_documents.get(&path) {
                // Document is loaded already
                return Some(DocumentReference::Internal(doc));
            }
        }

        // TODO maybe change this
        // Lets try to find the doc in internal docs
        if let Some(doc) = self.internal_documents.values().find(|doc| &doc.uri == url) {
            return Some(DocumentReference::Internal(doc));
        }

        if let Some(doc) = self.external_documents.get(url) {
            // Document is loaded already
            return Some(DocumentReference::External(doc));
        }
        None
    }

    // TODO can this be done without two calls?
    /// Resolves/Loads a URL (file or http/https protocol) to a document that is inserted into this workspace
    /// Locks workspace for read
    pub async fn resolve_url_to_document(
        workspace: &Workspace,
        url: &Url,
        http_client: Arc<dyn HttpClient>,
    ) -> Result<Option<Document>> {
        if workspace.document_by_url(url).is_some() {
            return Ok(None);
        }

        if let Ok(Some(doc)) = read_cached_doc(workspace, url).inspect_log() {
            debug!("Document found in web cache {url}");
            return Ok(Some(doc));
        }

        warn!("Document NOT found in web cache {url}");

        // TODO maybe use workspace.url_to_path_with_catalog(url)

        let Some((catalog, catalog_uri)) = workspace.find_catalog_uri(url) else {
            warn!("Url {url} could not be found in any catalog");

            let url_copy = url.clone();
            let document_text =
                tokio::task::spawn_blocking(move || http_client.get(url_copy.as_str()))
                    .await
                    .expect("join should work")?;

            let document = timeit("external doc new", || {
                ExternalDocument::new(document_text, url.clone())
            })?;

            timeit("cache_doc", || cache_doc(workspace, &document));

            return Ok(Some(Document::External(document)));
        };

        if let Ok(real_url) = Url::parse(&catalog_uri.uri) {
            if workspace.document_by_url(&real_url).is_some() {
                return Ok(None);
            }

            if let Ok(path) = real_url.to_file_path() {
                // This is an absolute file path url
                Ok(Some(Workspace::resolve_path_to_document(
                    &path,
                    url.clone(),
                )?))
            } else {
                // This is an external url
                let document_text = http_client.get(real_url.as_str())?;
                // TODO maybe use url or just the requested url
                // let document = ExternalDocument::new(document_text, url)?;
                let document = ExternalDocument::new(document_text, url.clone())?;

                cache_doc(workspace, &document);

                Ok(Some(Document::External(document)))
            }
        } else {
            // The catalog uri is most likely a relative file path, so lets try that
            let path = catalog.parent_folder().join(&catalog_uri.uri);
            let path_url =
                Url::from_file_path(&path).map_err(|()| Error::InvalidFilePath(path.clone()))?;
            if workspace.document_by_url(&path_url).is_some() {
                return Ok(None);
            }

            Ok(Some(Workspace::resolve_path_to_document(
                &path,
                url.clone(),
            )?))
        }
    }

    /// Convert an URL that is in the catalog into the file path
    pub fn url_to_path_with_catalog(&self, url: &Url) -> Option<PathBuf> {
        if let Some((catalog, catalog_uri)) = self.find_catalog_uri(url) {
            if let Ok(url) = Url::parse(&catalog_uri.uri) {
                url.to_file_path().ok()
            } else {
                // The catalog uri is most likely a relative file path, so lets try that

                let path = catalog.parent_folder().join(&catalog_uri.uri);
                Some(path)
            }
        } else {
            None
        }
    }

    fn resolve_path_to_document(path: &Path, original_url: Url) -> Result<Document> {
        // I think I don't care about the URL that is the path to the file.
        // Let's ignore it and use the original URL instead.
        let (document_text, path_url) = load_file_from_disk(path.to_path_buf())?;

        match path
            .extension()
            .and_then(|s| s.to_str())
            .unwrap_or_default()
        {
            "omn" => {
                let document = InternalDocument::new_with_path(
                    original_url,
                    -1,
                    document_text,
                    path.to_path_buf(),
                );
                Ok(Document::Internal(document))
            }
            "owl" | "owx" => {
                let document = ExternalDocument::new(document_text, path_url)?;
                Ok(Document::External(document))
            }
            ext => Err(Error::DocumentNotSupported(ext.to_string())),
        }
    }

    // pub fn diagnostics(&self) {
    //     for (path, doc) in &self.internal_documents {
    //         doc.stage2.all_frame_infos;
    //     }
    // }
}

fn load_file_from_disk(path: PathBuf) -> Result<(String, Url)> {
    info!("Loading file from disk {}", path.display());

    Ok((
        fs::read_to_string(&path)?,
        Url::from_file_path(&path).map_err(|()| Error::InvalidFilePath(path))?,
    ))
}

fn read_cached_doc(workspace: &Workspace, url: &Url) -> Result<Option<Document>> {
    if cfg!(test) {
        // Do not cache in tests
        return Ok(None);
    }

    let owl_dir = workspace.shared_cache_folder_path();
    let web_cache = owl_dir.join("web_cache");
    let file_name = url_to_filename(url.as_ref());

    debug!("try read cached doc {}", file_name.display());

    let mut cache_valid = true;
    if let Ok(file) = fs::File::open(web_cache.join(&file_name)) {
        let modified_time = file.metadata()?.modified()?;
        if modified_time + Duration::from_secs(60 * 60 * 24 * 30) < SystemTime::now() {
            // invalidate cache
            warn!("Cached document is stale (older then 30 days) {url}");
            cache_valid = false;
        }
    }
    if !cache_valid {
        fs::remove_file(&file_name)?;
        return Ok(None);
    }

    if let Ok(some) = fs::read(web_cache.join(file_name)) {
        let text = String::from_utf8(some).expect("Cached file should be valid UTF-8");
        let doc = ExternalDocument::new(text, url.clone())?;
        Ok(Some(Document::External(doc)))
    } else {
        Ok(None)
    }
}

fn cache_doc(workspace: &Workspace, doc: &ExternalDocument) {
    if cfg!(test) {
        // Do not cache in tests
        return;
    }

    let file_name = url_to_filename(doc.uri.as_ref());
    let owl_dir = workspace.shared_cache_folder_path();

    if let Err(err) = fs::create_dir_all(&owl_dir) {
        error!("Dir create Error: {err}");
    }
    if let Err(err) = fs::write(owl_dir.join(".gitignore"), "web_cache") {
        error!("File write Error: {err}");
    }

    let web_cache = owl_dir.join("web_cache");
    if let Err(err) = fs::create_dir_all(web_cache.clone()) {
        error!("Dir create Error: {err}");
    }
    if let Err(err) = fs::write(web_cache.join(file_name), &doc.text) {
        error!("Web cache Error: {err}");
    } else {
        debug!("Added {} to web cache", doc.uri);
    }
}

#[derive(Debug, PartialEq, Eq)]
pub enum DocumentReference<'a> {
    // Not boxing this is fine because the size ratio is just about 1.6
    Internal(&'a InternalDocument),
    External(&'a ExternalDocument),
}

#[derive(Debug)]
#[allow(clippy::large_enum_variant)] // Not boxing this is fine because the size ratio is just about 1.6
pub enum Document {
    Internal(InternalDocument),
    External(ExternalDocument),
}

/// Internal documents are OMN files on disk.
/// Text -> Parsed -> Queried -> Analyzed -> ``InternalDocument``
#[derive(Debug)]
pub struct InternalDocument {
    /// File location
    pub path: PathBuf,
    /// URL and location where this document was loaded from
    pub uri: Url,
    pub version: i32,
    queried_document: QueriedDocument,
    stage2: Stage2Document,
}

/// An internal document that has analysis results.
#[derive(Debug)]
struct Stage2Document {
    // TODO maybe move everything into Internal document or here
    // /// File location
    // path: PathBuf,
    // /// URL and location where this document was loaded from
    // uri: Url,
    // version: i32,
    definitions: HashSet<Iri>,
    references: HashSet<Iri>,

    all_frame_infos: HashMap<Iri, FrameInfo>,
    local_diagnostics: Vec<Diagnostic>,
    directly_reachable_urls: (Vec<Url>, Vec<Url>),
    iri_locations: HashMap<Iri, Vec<Range>>,
}

#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct IriDefinition {
    pub iri: Iri,
    pub location: Location,
    pub kind: FrameType,
}

#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Diagnostic {
    pub range: Range,
    pub label: String,
}

impl Display for InternalDocument {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "InternalDocument {{ path = \"{}\", url = \"{}\" version = {}, rope.len_bytes = {}}}",
            self.path.display(),
            self.uri,
            self.version,
            self.rope().len_bytes()
        )
    }
}

impl core::hash::Hash for InternalDocument {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.path.hash(state);
        Hash::hash(&self.version, state);
    }
}
impl Eq for InternalDocument {}
impl PartialEq for InternalDocument {
    fn eq(&self, other: &Self) -> bool {
        self.rope() == other.rope()
    }
}

impl InternalDocument {
    pub fn new(uri: Url, version: i32, text: String) -> InternalDocument {
        let path = uri.to_file_path().expect("URL should be a file path");
        Self::new_with_path(uri, version, text, path)
    }

    pub fn new_with_path(uri: Url, version: i32, text: String, path: PathBuf) -> InternalDocument {
        let tree = timeit("create_document / parse", || {
            lock_global_parser()
                .parse(&text, None)
                .expect("language to be set, no timeout to be used, no cancellation flag")
        });

        let rope = Rope::from(text);
        let parsed_document = ParsedDocument {
            path: path.clone(),
            uri: uri.clone(),
            version,
            tree,
            rope,
        };

        let queried_document: QueriedDocument = parsed_document.into();

        let stage2: Stage2Document = queried_document.analyze();

        debug!("Stage2Document -> InternalDocument");

        InternalDocument {
            path,
            uri,
            version,
            queried_document,
            stage2,
        }
    }

    pub fn rope(&self) -> &Rope {
        &self.queried_document.parsed_document.rope
    }

    pub fn tree(&self) -> &Tree {
        &self.queried_document.parsed_document.tree
    }

    pub fn prefixes(&self) -> &HashMap<String, String> {
        &self.queried_document.prefixes
    }

    pub fn diagnostics(&self, workspace: &Workspace) -> Vec<Diagnostic> {
        let local_diagnostics = &self.stage2.local_diagnostics;
        let workspace_diagnostics = timeit("semantic errors", || semantic_errors(self, workspace));
        local_diagnostics
            .iter()
            .cloned()
            .chain(workspace_diagnostics)
            .collect_vec()
    }

    pub fn formatted(&self, options: &FormattingSettings) -> String {
        let root = self.tree().root_node();
        let doc = to_doc(&root, self.rope(), options);
        debug!("doc:\n{doc:#?}");
        doc.pretty(options.ruler_width as usize).to_string()
    }

    pub fn node_by_id(&self, id: usize) -> Option<Node<'_>> {
        node_by_id(&self.queried_document.parsed_document, id)
    }

    /// Returns all document URL's that can be reached (imports, prefixes, ...) from this internal document.
    /// Does not load anything.
    pub fn reachable_docs_recursive(
        &self,
        workspace: &Workspace,
        include_prefix: bool,
    ) -> Vec<Url> {
        reachable_docs_recursive_cached(self, workspace, include_prefix)
    }

    fn reachable_docs_recursive_helper(
        &self,
        result: &mut HashSet<Url>,
        workspace: &Workspace,
        include_prefix: bool,
    ) -> Result<()> {
        if result.contains(&self.uri) {
            // Do nothing
            return Ok(());
        }

        result.insert(self.uri.clone());

        let urls = self.reachable_urls(include_prefix);

        let docs = urls.iter().filter_map(|url| {
            workspace.document_by_url(url)
            // TODO maybe reactivate but for now lets not log here
            // .ok_or(Error::DocumentNotLoaded(url.clone())) //                    Workspace::resolve_url_to_document(&self.try_get_workspace()?, &url)
            // .inspect_log()
            // .ok()
        });

        for doc in docs {
            match doc {
                DocumentReference::Internal(internal_document) => {
                    internal_document.reachable_docs_recursive_helper(
                        result,
                        workspace,
                        include_prefix,
                    )?;
                }
                DocumentReference::External(external_document) => {
                    external_document.reachable_docs_recursive_helper(
                        workspace,
                        result,
                        0,
                        include_prefix,
                    )?;
                }
            }
        }
        Ok(())
    }

    pub fn edit(
        self,
        params: &DidChangeTextDocumentParams,
        encoding: &PositionEncodingKind,
    ) -> Result<InternalDocument> {
        if self.version >= params.text_document.version {
            return Ok(self); // no change needed
        }

        if params
            .content_changes
            .iter()
            .any(|change| change.range.is_none())
        {
            // Change the whole file
            return Err(Error::LspFeatureNotSupported(
                "Whole file (null range) change event",
            ));
        }

        debug!("content changes {:#?}", params.content_changes);

        let mut new_tree = self.tree().clone();
        let mut new_rope = self.rope().clone();
        let uri = self.uri;
        let path = self.path;

        // This range is relative to the *old* document not the new one
        for change in &params.content_changes {
            let range = change.range.expect("range to be defined");
            // LSP ranges are in bytes when encoding is utf-8!!!
            let old_range: Range = Range::from_lsp(&range, &new_rope, encoding)?;
            let start_byte = old_range.start.byte_index(&new_rope);
            let old_end_byte = old_range.end.byte_index(&new_rope);

            // must come before the rope is changed!
            let start_char = new_rope.try_byte_to_char(start_byte)?;
            let old_end_char = new_rope.try_byte_to_char(old_end_byte)?;

            debug!(
                "change range in chars {start_byte}..{old_end_byte} og range {range:?} and text {}",
                change.text
            );

            // rope replace
            new_rope.try_remove(start_char..old_end_char)?;
            new_rope.try_insert(start_char, &change.text)?;

            // this must come after the rope was changed!
            let new_end_byte = start_byte + change.text.len();
            let new_end_position = Position::new_from_byte_index(&new_rope, new_end_byte);

            let edit = InputEdit {
                start_byte,
                old_end_byte,
                new_end_byte,
                start_position: old_range.start.into(),
                old_end_position: old_range.end.into(),
                new_end_position: new_end_position.into(),
            };
            timeit("tree edit", || new_tree.edit(&edit));
        }
        let new_version = params.text_document.version;

        let rope_provider = RopeProvider::new(&new_rope);

        let new_tree = {
            let mut parser_guard = lock_global_parser();
            timeit("parsing", || {
                parser_guard
                    .parse_with_options(
                        &mut |byte_idx, _| rope_provider.chunk_callback(byte_idx),
                        Some(&new_tree),
                        None,
                    )
                    .expect("language to be set, no timeout to be used, no cancellation flag")
            })
        };

        // TODO #30 prune diagnostics with
        // Remove all old diagnostics with an overlapping range. They will need to be recreated
        // Move all other diagnostics

        let parsed_document = ParsedDocument {
            uri: uri.clone(),
            path: path.clone(),
            version: new_version,
            tree: new_tree,
            rope: new_rope,
        };
        let queried_document: QueriedDocument = parsed_document.into();

        let stage2 = timeit("document.edit / stage1.analyze", || {
            queried_document.analyze()
        });

        let doc = InternalDocument {
            path,
            uri,
            version: new_version,
            queried_document,
            stage2,
        };

        Ok(doc)
    }

    /// Converts a full IRI into an abbreviated one by splitting it.
    /// Works a bit like `make_relative`
    ///
    /// With `Prefix: o: http://foo.bar/o#` and `doc.full_iri_to_abbreviated_iri("http://foo.bar/o#a")` -> `o:a`
    pub fn full_iri_to_abbreviated_iri(&self, full_iri: &str) -> Option<String> {
        self.prefixes()
            .iter()
            .filter_map(|(prefix, url)| match full_iri.split_once(url) {
                Some(("", post)) if prefix.is_empty() => Some(post.to_string()),
                Some(("", post)) => Some(prefix.to_owned() + ":" + post),
                Some(_) | None => None,
            })
            .sorted_by_key(String::len) // short IRI's are preferred
            .next()
    }

    pub fn inlay_hint(
        &self,
        range: Range,
        encoding: &PositionEncodingKind,
        workspace: &Workspace,
    ) -> Vec<tower_lsp::lsp_types::InlayHint> {
        let reachable_docs = self.reachable_docs_recursive(workspace, true);
        // TODO cache this in stage2
        self.queried_document
            .parsed_document
            .query_range(&ALL_QUERIES.iri_query, range)
            .into_iter()
            .flat_map(|match_| match_.captures)
            .map(|capture| {
                let iri = trim_full_iri(capture.node.text);
                let iri = self.abbreviated_iri_to_full_iri(&iri).unwrap_or(iri);

                let label =
                // timeit("get frame info recursive", || {
                    Workspace::get_frame_info_recursive(workspace, &iri, &reachable_docs)
                // })
                .and_then(|frame_info| frame_info.label())
                .unwrap_or_default();

                let mut label_normalized = label.clone().to_lowercase();
                label_normalized.retain(char::is_alphanumeric);

                let same = iri.to_lowercase().contains(&label_normalized);

                if label.is_empty() || same {
                    Ok(None)
                } else {
                    Ok(Some(InlayHint {
                        position: capture.node.range.end.into_lsp(self.rope(), encoding)?,
                        label: InlayHintLabel::String(label),
                        kind: None,
                        text_edits: None,
                        tooltip: None,
                        padding_left: Some(true),
                        padding_right: None,
                        data: None,
                    }))
                }
            })
            .filter_and_log()
            .flatten()
            .collect()
    }

    pub fn frame_info_by_iri(&self, iri: &Iri) -> Option<FrameInfo> {
        self.stage2.all_frame_infos.get(iri).cloned()
    }

    pub fn all_frame_infos(&self) -> impl Iterator<Item = &FrameInfo> {
        self.stage2.all_frame_infos.values()
    }

    pub fn try_keywords_at_position(&self, cursor: Position) -> Vec<String> {
        let mut parser = lock_global_parser();
        let rope = self.rope().clone();
        let tree = self.tree().clone();

        let line = rope
            .get_line(cursor.line() as usize)
            .map(|s| s.to_string())
            .unwrap_or_default();
        let partial = word_before_character(cursor.character_byte() as usize, &line);

        debug!("Cursor node text is {partial:?}");

        let keywords = &*queries::KEYWORDS;

        let kws = keywords
            .iter()
            .filter(|k| k.starts_with(&partial))
            .collect_vec();

        debug!("Checking {} keywords", kws.len());

        kws.iter()
            .map(|kw| {
                let mut rope_version = rope.clone();
                let change = kw[partial.len()..].to_string() + " a";

                let mut tree = tree.clone(); // This is fast

                // Must come before the rope is changed!
                let cursor_byte_index = cursor.byte_index(&rope_version);

                rope_version.insert(cursor.char_index(&rope_version), &change);

                // Must come after rope changed!
                let new_end_byte = cursor_byte_index + change.len();
                let new_end_position = Position::new_from_byte_index(&rope_version, new_end_byte);

                let edit = InputEdit {
                    // Old range is just a zero size range
                    start_byte: cursor_byte_index,
                    start_position: cursor.into(),
                    old_end_byte: cursor_byte_index,
                    old_end_position: cursor.into(),

                    new_end_byte,
                    new_end_position: new_end_position.into(),
                };
                tree.edit(&edit);

                let rope_provider = RopeProvider::new(&rope_version);
                let new_tree = parser
                    .parse_with_options(
                        &mut |byte_idx, _| rope_provider.chunk_callback(byte_idx),
                        Some(&tree),
                        None,
                    )
                    .expect("language to be set, no timeout to be used, no cancellation flag");

                let cursor_one_left = cursor.moved_left(1, &rope);
                let cursor_node_version = new_tree
                    .root_node()
                    .named_descendant_for_point_range(
                        cursor_one_left.into(),
                        cursor_one_left.into(),
                    )
                    .ok_or(Error::PositionOutOfBounds(cursor_one_left))?;

                debug!("{cursor_node_version:#?} is {}", cursor_node_version.kind());

                if cursor_node_version.kind().starts_with("keyword_")
                    && !cursor_node_version
                        .parent()
                        .expect("keyword to have parent")
                        .is_error()
                {
                    debug!("Found possible keyword {kw}!");
                    Ok(Some((*kw).to_string()))
                } else {
                    debug!("{kw} is not possible");
                    Ok(None)
                }
            })
            .filter_map_ok(|x| x)
            .filter_and_log()
            .collect_vec()
    }

    pub fn sematic_tokens(
        &self,
        range: Option<Range>,
        encoding: &PositionEncodingKind,
    ) -> Result<Vec<SemanticToken>> {
        let doc = self;
        let query_source = tree_sitter_owl_ms::HIGHLIGHTS_QUERY;

        let query = Query::new(&LANGUAGE, query_source).expect("valid query expect");
        let mut query_cursor = QueryCursor::new();
        if let Some(range) = range {
            query_cursor.set_point_range(range.into());
        }
        let matches = query_cursor.matches(
            &query,
            doc.tree().root_node(),
            RopeProvider::new(doc.rope()),
        );

        let mut tokens = vec![];

        let mut nodes = matches
            .map_deref(|m| m.captures)
            .flatten()
            .map(|c| {
                (
                    c.node,
                    treesitter_highlight_capture_into_semantic_token_type_index(
                        query.capture_names()[c.index as usize],
                    ),
                )
            })
            .collect_vec();

        // node start points need to be strictly in order, because the delta might otherwise negatively overflow
        // TODO is this needed? are query matches in order?
        nodes.sort_unstable_by_key(|(n, _)| n.start_byte());

        let mut last_line = 0;
        let mut last_character = 0; // the indexing is encoding dependent
        for (node, type_index) in nodes {
            let range: Range = node.range().into();
            // This will never happen tokens are never longer than u32
            #[allow(clippy::cast_possible_truncation)]
            let length = range.len_lsp(self.rope(), encoding) as u32;
            let range = range.into_lsp(self.rope(), encoding)?;
            let start = range.start;

            let delta_line = start.line - last_line;
            let delta_start = if delta_line == 0 {
                start.character - last_character // same line
            } else {
                start.character // some other line
            };

            let token = SemanticToken {
                delta_line,
                delta_start,
                length,
                token_type: type_index,
                token_modifiers_bitset: 0,
            };

            last_line = start.line;
            last_character = start.character;
            tokens.push(token);
        }
        Ok(tokens)
    }

    /// What other urls are directly (depth = 1) reachable from this document.
    /// Contains all import URL's unprocessed.
    pub fn reachable_urls(&self, include_prefix: bool) -> Vec<Url> {
        // TODO please do not clone this thing :>
        let (imports, prefixes) = self.stage2.directly_reachable_urls.clone();
        if include_prefix {
            imports.into_iter().chain(prefixes).collect_vec()
        } else {
            imports
        }
    }

    pub fn abbreviated_iri_to_full_iri(&self, iri: &str) -> Option<String> {
        self.queried_document.abbreviated_iri_to_full_iri(iri)
    }

    pub fn rename_edits(
        &self,
        full_iri: &String,
        new_iri: Option<&String>,
        iri_kind: &String,
        original: &str,
    ) -> Vec<(Range, String)> {
        self.queried_document
            .parsed_document
            .query(&ALL_QUERIES.iri_query)
            .into_iter()
            .map(|m| {
                let (iri, range, parent_kind) = match &m.captures[..] {
                    [iri_capture] => (
                        match iri_capture.node.kind.as_str() {
                            "full_iri" => trim_full_iri(iri_capture.node.text.clone()),
                            "simple_iri" | "abbreviated_iri" => self
                                .abbreviated_iri_to_full_iri(&iri_capture.node.text)
                                .unwrap_or(iri_capture.node.text.clone()),

                            _ => unreachable!(),
                        },
                        iri_capture.node.range,
                        self.node_by_id(iri_capture.node.id)
                            .expect("the node id to be valid")
                            .parent()
                            .expect("the iri node to have a parent of a specific iri kind")
                            .kind(),
                    ),
                    _ => unreachable!(),
                };
                if &iri == full_iri && iri_kind == parent_kind {
                    Ok(Some((
                        range,
                        new_iri
                            .map(|new_iri| {
                                self.full_iri_to_abbreviated_iri(new_iri)
                                    .unwrap_or(format!("<{new_iri}>"))
                            })
                            .unwrap_or(original.to_string()),
                    )))
                } else {
                    Ok(None)
                }
            })
            .filter_and_log()
            .flatten()
            .collect_vec()
    }

    pub fn references(&self, full_iri: &Iri, include_declaration: bool) -> Vec<Range> {
        // TODO change this into using queried_document directly
        self.queried_document
            .parsed_document
            .query(&ALL_QUERIES.iri_query)
            .into_iter()
            .map(|m| {
                let (iri, range, node_id) = match &m.captures[..] {
                    [iri_capture] => (
                        match iri_capture.node.kind.as_str() {
                            "full_iri" => trim_full_iri(iri_capture.node.text.clone()),
                            "simple_iri" | "abbreviated_iri" => self
                                .abbreviated_iri_to_full_iri(&iri_capture.node.text)
                                .unwrap_or(iri_capture.node.text.clone()),

                            _ => unreachable!(),
                        },
                        iri_capture.node.range,
                        iri_capture.node.id,
                    ),
                    _ => unreachable!(),
                };

                if &iri == full_iri {
                    if !include_declaration {
                        if let Some(node) = self.node_by_id(node_id) {
                            let iri_context_kind = node
                                .parent()
                                .expect("IRIs should have parent nodes")
                                .parent()
                                .expect("IRI supertype should have a parent")
                                .kind();
                            if iri_context_kind.ends_with("frame") {
                                // This is a definition we want to filter out
                                return Ok(None);
                            }
                        }
                    }

                    Ok(Some(range))
                } else {
                    Ok(None)
                }
            })
            .filter_and_log()
            .flatten()
            .collect_vec()
    }

    /// Take this document, generate the diagnostics in workspace context and send the results via the client.
    pub async fn publish_lsp_diagnostics(
        &self,
        workspace: &Workspace,
        encoding: &PositionEncodingKind,
        client: &tower_lsp::Client,
    ) {
        let diagnostics = self
            .diagnostics(workspace)
            .iter()
            .map(|Diagnostic { range, label }| {
                Ok(lsp_types::Diagnostic {
                    range: range.into_lsp(self.rope(), encoding)?,
                    severity: Some(DiagnosticSeverity::ERROR),
                    code: None,
                    code_description: None,
                    source: Some("owl language server".to_string()),
                    message: label.clone(),
                    related_information: None,
                    tags: None,
                    data: None,
                })
            })
            .filter_and_log()
            .collect_vec();

        // TODO create diagnostics for files that depend on this file
        debug!(
            "Publish diagnostics for {} {:#?}",
            self.path.display(),
            diagnostics
        );

        client
            .publish_diagnostics(self.uri.clone(), diagnostics, Some(self.version))
            .await;
    }
}

#[derive(Debug)]
pub struct FormattingSettings {
    pub tab_size: u32,
    pub ruler_width: u32,
    pub order_frames: bool,
}

/// An internal document that has no semantic analysis. Just text and syntax tree.
#[derive(Debug)]
struct ParsedDocument {
    /// File location
    path: PathBuf,
    /// URL and location where this document was loaded from
    uri: Url,
    version: i32,
    tree: Tree,
    rope: Rope,
}

impl Hash for ParsedDocument {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.path.hash(state);
        self.uri.hash(state);
        Hash::hash(&self.version, state);
    }
}

impl From<ParsedDocument> for QueriedDocument {
    fn from(val: ParsedDocument) -> Self {
        debug!("ParsedDocument -> QueriedDocument");
        let ontology_id = val.ontology_id();
        let prefixes = val.prefixes();
        let imports = val.imports();

        QueriedDocument {
            path: val.path.clone(),
            uri: val.uri.clone(),
            _version: val.version,
            parsed_document: val,
            ontology_id,
            prefixes,
            imports,
        }
    }
}

/// This acts the same way as ``node_by_id`` but for all nodes
pub fn node_by_id_map(tree: &Tree) -> HashMap<usize, Node<'_>> {
    let mut res = HashMap::new();
    let mut w = tree.walk();
    'outer: loop {
        res.insert(w.node().id(), w.node());

        // In order traversal
        if !w.goto_first_child() {
            while !w.goto_next_sibling() {
                if !w.goto_parent() {
                    break 'outer;
                }
            }
        }
    }
    res
}

fn node_by_id(parsed_document: &ParsedDocument, id: usize) -> Option<Node<'_>> {
    let mut w = parsed_document.tree.walk();
    loop {
        if w.node().id() == id {
            return Some(w.node());
        }

        // In order traversal
        if !w.goto_first_child() {
            while !w.goto_next_sibling() {
                if !w.goto_parent() {
                    return None;
                }
            }
        }
    }
}

impl ParsedDocument {
    pub fn query(&self, query: &Query) -> Vec<UnwrappedQueryMatch> {
        query_helper(self, query, None)
    }

    pub fn query_range(&self, query: &Query, range: Range) -> Vec<UnwrappedQueryMatch> {
        query_helper(self, query, Some(range))
    }

    /// Returns the ontology IRI if possible and the version IRI if possible.
    pub fn ontology_id(&self) -> Option<(String, Option<String>)> {
        match &self.query(&ALL_QUERIES.ontology)[..] {
            [] => None,
            [ontology] => match &ontology.captures[..] {
                [] => None,
                // This should be a full IRI so lets trim it
                [iri] => Some((trim_full_iri(iri.node.text.clone()), None)),
                [iri, version_iri] => Some((
                    trim_full_iri(iri.node.text.clone()),
                    Some(trim_full_iri(version_iri.node.text.clone())),
                )),
                _ => unreachable!("The query has only one capture"),
            },
            _ => unreachable!("the grammar only parses ontology zero or one time"),
        }
    }

    /// Returns the prefixes of a document (without colon `:`) in a prefix name to iri map.
    ///
    /// Some prefixes should always be defined
    ///
    /// ```owl-ms
    /// Prefix: rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    /// Prefix: rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    /// Prefix: xsd: <http://www.w3.org/2001/XMLSchema#>
    /// Prefix: owl: <http://www.w3.org/2002/07/owl#>
    /// ```
    pub fn prefixes(&self) -> HashMap<String, String> {
        self.query(&ALL_QUERIES.prefix)
            .into_iter()
            .map(|m| match &m.captures[..] {
                [name, iri] => (
                    name.node.text.trim_end_matches(':').to_string(),
                    trim_full_iri(iri.node.text.clone()),
                ),
                _ => unreachable!(),
            })
            // Horned owl has no default here. Let's keep it out for now.
            // .chain(
            //     STANDARD_PREFIX_NAMES
            //         .iter()
            //         .map(|(a, b)| (a.to_string(), b.to_string())),
            // )
            .unique()
            .collect()
    }

    pub fn imports(&self) -> Vec<Iri> {
        self.query(&ALL_QUERIES.import_query)
            .iter()
            .filter_map(|m| match &m.captures[..] {
                [iri] => Oxiri::parse(trim_full_iri(iri.node.text.clone())).ok(),
                _ => unimplemented!(),
            })
            .map(|iri| iri.as_str().to_string())
            .collect_vec()
    }
}

#[cached(
    time = 5,
    key = "u64",
    convert = r#"{
        let mut hasher = DefaultHasher::new();
        stage1.hash(&mut hasher);
        range.hash(&mut hasher);
        query.capture_names().hash(&mut hasher);
        Hash::hash(&query.start_byte_for_pattern(0), &mut hasher);
        Hash::hash(&query.end_byte_for_pattern(0), &mut hasher);
        hasher.finish()
     } "#
)]
fn query_helper(
    stage1: &ParsedDocument,
    query: &Query,
    range: Option<Range>,
) -> Vec<UnwrappedQueryMatch> {
    let mut query_cursor = QueryCursor::new();
    if let Some(range) = range {
        query_cursor.set_point_range(range.into());
    }
    let rope_provider = RopeProvider::new(&stage1.rope);

    query_cursor
        .matches(query, stage1.tree.root_node(), rope_provider)
        .map_deref(|m| UnwrappedQueryMatch {
            _pattern_index: m.pattern_index,
            _id: m.id(),
            captures: m
                .captures
                .iter()
                .sorted_by_key(|c| c.index)
                .map(|c| UnwrappedQueryCapture {
                    node: UnwrappedNode {
                        id: c.node.id(),
                        text: node_text(&c.node, &stage1.rope).to_string(),
                        range: c.node.range().into(),
                        kind: c.node.kind().into(),
                    },
                    index: c.index,
                })
                .collect_vec(),
        })
        .collect_vec()
}

#[derive(Debug)]
struct QueriedDocument {
    /// File location
    path: PathBuf,
    /// URL and location where this document was loaded from
    uri: Url,
    _version: i32,
    parsed_document: ParsedDocument,

    ontology_id: Option<(Iri, Option<Iri>)>,
    prefixes: HashMap<String, Iri>,
    imports: Vec<Iri>,
}

impl QueriedDocument {
    /// Finds flat references to other document URL's in this document
    pub fn reachable_urls(&self) -> (Vec<Url>, Vec<Url>) {
        let imports = self
            .imports
            .iter()
            .filter_map(|iri| Url::parse(iri).ok())
            .collect_vec();

        let prefixes = self
            .prefixes
            .iter()
            // Filter out the empty prefix ":"
            .filter_map(|(prefix, url)| if prefix.is_empty() { None } else { Some(url) })
            .filter_map(|url| Url::parse(url).ok())
            // Filter out the current document as a prefix (most likely the empty prefix ":")
            .filter(|url| url != &self.uri)
            .map(|url| {
                // Remove fragments from prefixes
                if url.fragment().is_some() {
                    let mut url = url.clone();
                    url.set_fragment(Some(""));
                    url
                } else {
                    url
                }
            })
            .collect_vec();

        (imports, prefixes)
    }

    pub fn abbreviated_iri_to_full_iri(&self, abbreviated_iri: &str) -> Option<String> {
        let prefixes = &self.prefixes;
        if let Some((prefix, simple_iri)) = abbreviated_iri.split_once(':') {
            prefixes
                .get(prefix)
                .map(|resolved_prefix| resolved_prefix.clone() + simple_iri)
        } else {
            // Simple IRIs get a free colon prepended
            // ref: https://www.w3.org/TR/owl2-manchester-syntax/#IRIs.2C_Integers.2C_Literals.2C_and_Entities
            prefixes
                .get("")
                .map(|resolved_prefix| resolved_prefix.clone() + abbreviated_iri)
        }
    }

    fn document_all_frame_infos<'a>(
        &self,
        definitions: impl Iterator<Item = &'a IriDefinition>,
    ) -> HashMap<Iri, FrameInfo> {
        let mut frame_infos: HashMap<String, FrameInfo> = HashMap::new();

        // First we collect the annotations
        for frame_info in
            self.document_annotations()
                .into_iter()
                .map(|(frame_iri, annotation_iri, literal)| FrameInfo {
                    iri: frame_iri.clone(),
                    annotations: HashMap::from([(annotation_iri, vec![literal])]),
                    frame_type: FrameType::Unknown,
                    definitions: Vec::new(),
                })
        {
            if let Some(frame_info_mut) = frame_infos.get_mut(&frame_info.iri) {
                // Merge the frame info for the same IRI
                frame_info_mut.extend(frame_info);
            } else {
                frame_infos.insert(frame_info.iri.clone(), frame_info);
            }
        }

        // Second we collect the location and frame type (definitions)
        for frame_info in definitions.map(|definiton| FrameInfo {
            iri: definiton.iri.clone(),
            annotations: HashMap::new(),
            frame_type: definiton.kind,
            definitions: vec![definiton.location.clone()],
        }) {
            if let Some(frame_info_mut) = frame_infos.get_mut(&frame_info.iri) {
                // Merge the frame info for the same IRI
                frame_info_mut.extend(frame_info);
            } else {
                frame_infos.insert(frame_info.iri.clone(), frame_info);
            }
        }

        frame_infos
    }

    fn document_annotations(&self) -> Vec<(String, String, String)> {
        self.parsed_document
            .query(&ALL_QUERIES.annotation_query)
            .iter()
            .map(|m| match &m.captures[..] {
                [frame_iri, annotation_iri, literal] => {
                    let iri = trim_full_iri(frame_iri.node.text.clone());
                    let frame_iri = self.abbreviated_iri_to_full_iri(&iri).unwrap_or(iri);
                    let iri = trim_full_iri(annotation_iri.node.text.clone());
                    let annotation_iri = self.abbreviated_iri_to_full_iri(&iri).unwrap_or(iri);
                    let literal = trim_string_value(&literal.node.text);

                    (frame_iri, annotation_iri, literal)
                }
                _ => unreachable!(),
            })
            .collect_vec()
    }

    fn document_definitions(&self) -> Vec<IriDefinition> {
        let node_by_id = node_by_id_map(&self.parsed_document.tree);
        self.parsed_document
            .query(&ALL_QUERIES.frame_query)
            .iter()
            .map(|m| match &m.captures[..] {
                [frame_iri, frame] => {
                    let iri = trim_full_iri(frame_iri.node.text.clone());
                    let frame_node_id = frame_iri.node.id;
                    let iri_parent_kind = node_by_id
                        .get(&frame_node_id)
                        .expect("Node id must be valid after query")
                        .parent()
                        .expect("All frame IRIs should have paretns")
                        .kind();
                    let frame_iri = self.abbreviated_iri_to_full_iri(&iri).unwrap_or(iri);

                    IriDefinition {
                        iri: frame_iri,
                        location: Location {
                            uri: Url::from_file_path(&self.path)
                                .expect("Path should be valid file URL"),
                            range: frame.node.range,
                        },
                        kind: FrameType::parse(iri_parent_kind),
                    }
                }
                _ => unreachable!(),
            })
            .collect()
    }

    fn document_references(&self) -> Vec<(String, Range)> {
        self.parsed_document
            .query(&ALL_QUERIES.iri_query)
            .iter()
            .map(|m| match &m.captures[..] {
                [iri_capture] => {
                    let iri = trim_full_iri(iri_capture.node.text.clone());
                    let iri = self.abbreviated_iri_to_full_iri(&iri).unwrap_or(iri);

                    (iri, iri_capture.node.range)
                }
                _ => unreachable!(),
            })
            .collect()
    }

    fn analyze(&self) -> Stage2Document {
        debug!("QueriedDocument -> Stage2Document");

        let references = timeit("references", || self.document_references());
        let definitions = timeit("definitions", || self.document_definitions());

        // Find iri locations
        let mut iri_locations: HashMap<String, Vec<Range>> = HashMap::new();
        for (iri, range) in &references {
            iri_locations.entry(iri.clone()).or_default().push(*range);
        }

        Stage2Document {
            all_frame_infos: timeit("all frame infos", || {
                self.document_all_frame_infos(definitions.iter())
            }),
            local_diagnostics: timeit("syntax errors", || syntax_errors(&self.parsed_document)),
            directly_reachable_urls: timeit("reachable urls", || self.reachable_urls()),
            iri_locations,
            references: references.into_iter().map(|(iri, _)| iri).collect(),
            definitions: definitions
                .into_iter()
                .map(|IriDefinition { iri, .. }| iri)
                .collect(),
        }
    }
}

/// Returns the word before the [`character`] position in the [`line`]
pub fn word_before_character(byte_index: usize, line: &str) -> String {
    line.get(..byte_index)
        .map(|s| {
            s.chars()
                .rev()
                .take_while(|c| c.is_alphabetic())
                .collect_vec()
                .iter()
                .rev()
                .collect()
        })
        .unwrap_or_default()
}

/// External documents are ontologies that are not expected to change in any way.
#[derive(Debug)]
pub struct ExternalDocument {
    pub uri: Url,
    pub text: String,
    pub graph: InfoGraph,
    reachable_urls: Vec<Url>,
    imports: Vec<Url>,
    definitions: HashSet<Iri>,
}

impl PartialEq for ExternalDocument {
    fn eq(&self, other: &Self) -> bool {
        self.uri == other.uri
    }
}

impl Eq for ExternalDocument {}

#[derive(Debug)]
pub struct InfoGraph(LightGraph, GraphName);

type GraphName = String;

impl From<SetOntology<ArcStr>> for InfoGraph {
    fn from(value: SetOntology<ArcStr>) -> Self {
        let mut graph = LightGraph::new();

        let ontology_iri = &value.iter().find_map(|ac| match &ac.component {
            Component::OntologyID(horned_owl::model::OntologyID {
                iri: Some(id),
                viri: _,
            }) => Some(SimpleTerm::Iri(IriRef::new(MownStr::from_ref(id)).unwrap())),
            _ => None,
        });

        for ac in &value {
            match &ac.component {
                AnnotationAssertion(aa) => match &aa.subject {
                    AnnotationSubject::IRI(iri) => {
                        let subject = SimpleTerm::Iri(IriRef::new(MownStr::from_ref(iri)).unwrap());
                        let predicate =
                            SimpleTerm::Iri(IriRef::new(MownStr::from_ref(&aa.ann.ap)).unwrap());
                        let object = match &aa.ann.av {
                            AnnotationValue::Literal(literal) => match literal {
                                Literal::Simple { literal } => SimpleTerm::LiteralDatatype(
                                    literal.clone().into(),
                                    IriRef::new_unchecked(MownStr::from_ref(
                                        "http://www.w3.org/2001/XMLSchema#string",
                                    )),
                                ),
                                Literal::Language { literal, lang } => SimpleTerm::LiteralLanguage(
                                    literal.clone().into(),
                                    LanguageTag::new(lang.clone().into()).unwrap(),
                                ),
                                Literal::Datatype {
                                    literal,
                                    datatype_iri,
                                } => SimpleTerm::LiteralDatatype(
                                    literal.clone().into(),
                                    IriRef::new(MownStr::from_ref(datatype_iri)).unwrap(),
                                ),
                            },
                            AnnotationValue::IRI(iri) => {
                                SimpleTerm::Iri(IriRef::new(MownStr::from_ref(iri)).unwrap())
                            }
                            AnnotationValue::AnonymousIndividual(anonymous_individual) => {
                                SimpleTerm::BlankNode(
                                    BnodeId::new(MownStr::from_ref(anonymous_individual)).unwrap(),
                                )
                            }
                        };

                        if graph.insert(subject, predicate, object).is_err() {
                            // This should not happen :>
                            error!("The term index is full");
                        }
                    }
                    AnnotationSubject::AnonymousIndividual(_) => {
                        // TODO support anonymous individual
                    }
                },
                Component::Import(horned_owl::model::Import(iri)) => {
                    if let Some(subject) = ontology_iri {
                        let predicate = SimpleTerm::Iri(IriRef::new_unchecked(MownStr::from_ref(
                            "http://www.w3.org/2002/07/owl#imports",
                        )));
                        let object = SimpleTerm::Iri(
                            IriRef::new(MownStr::from_ref(iri)).expect("valid IRI"),
                        );
                        graph
                            .insert(subject, predicate, object)
                            .expect("graph should not be full");
                    }
                }
                Component::DeclareClass(DeclareClass(horned_owl::model::Class(iri)))
                | Component::DeclareDatatype(DeclareDatatype(Datatype(iri)))
                | Component::DeclareObjectProperty(DeclareObjectProperty(ObjectProperty(iri)))
                | Component::DeclareAnnotationProperty(DeclareAnnotationProperty(
                    AnnotationProperty(iri),
                ))
                | Component::DeclareDataProperty(DeclareDataProperty(DataProperty(iri)))
                | Component::DeclareNamedIndividual(DeclareNamedIndividual(NamedIndividual(iri))) =>
                {
                    let subject = SimpleTerm::Iri(
                        IriRef::new(MownStr::from_ref(iri)).expect("Class IRI should be valid IRI"),
                    );
                    let predicate =
                        SimpleTerm::Iri(IriRef::new_unchecked(MownStr::from_ref(IRI_RDF_TYPE)));

                    let object = SimpleTerm::Iri(IriRef::new_unchecked(MownStr::from_ref(
                        "http://www.w3.org/2002/07/owl#Class",
                    )));
                    graph
                        .insert(subject, predicate, object)
                        .expect("graph should not be full");
                }
                _ => (),
            }
        }

        let graph_name = ontology_iri
            .as_ref()
            .and_then(|s| s.iri().clone())
            .map_or("???".into(), |i| i.to_string()); //TODO what default graph name?

        Self(graph, graph_name)
    }
}

const IRI_RDF_TYPE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

impl Hash for ExternalDocument {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.uri.hash(state);
    }
}

impl ExternalDocument {
    /// The ontology type is currently determent by tial and error
    pub fn new(text: String, url: Url) -> Result<ExternalDocument> {
        debug!("try creating external document... {url}");

        // Try parsing different styles
        // This could try more styles in the future

        debug!("try rdf...");
        let builder = lock_global_build_arc();
        let doc = ExternalDocument::try_parse_rdf(&text)
            .or_else(|e| {
                warn!("rdf failed with {e}");
                debug!("try rdf...");
                ExternalDocument::try_parse_owx(&text, &builder)
            })
            .map(|graph| {
                let imports = ExternalDocument::gen_imports(&graph).collect_vec();

                let reachable_urls =
                    ExternalDocument::gen_reachable_urls(&url, &graph, &imports).collect_vec();

                ExternalDocument {
                    reachable_urls,
                    definitions: ExternalDocument::gen_definitions(&graph),
                    imports,
                    graph,
                    text,
                    uri: url,
                }
            });

        if let Ok(doc) = &doc {
            debug!("parsing worked! {}", doc.uri);
        }
        doc
    }

    fn try_parse_rdf(text: &str) -> Result<InfoGraph> {
        sophia::xml::parser::parse_str(text)
            .collect_triples::<LightGraph>()
            .map_err(|e| Error::Sophia(format!("{e}")))
            .map(|g| {
                // Find Match for: x <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Ontology>.
                let owl = Namespace::new_unchecked("http://www.w3.org/2002/07/owl#");
                let owl_ontology = owl.get("Ontology").unwrap();
                let ontology_id = &g
                    .triples_matching(Any, [sophia::api::ns::rdf::type_], [owl_ontology])
                    .flatten()
                    .next()
                    .and_then(|[s, _, _]| s.iri())
                    .map_or("???".into(), |i| i.to_string()); //TODO default case

                InfoGraph(g, ontology_id.clone())
            })
    }

    fn try_parse_owx(text: &str, builder: &Build<ArcStr>) -> Result<InfoGraph> {
        // let builder = lock_global_build_arc().await;
        let mut buffer = text.as_bytes();
        let (ontology, _) = horned_owl::io::owx::reader::read_with_build::<
            ArcStr,
            SetOntology<ArcStr>,
            _,
        >(&mut buffer, builder)?;
        let graph: InfoGraph = ontology.into();

        Ok(graph)
    }

    fn gen_imports(graph: &InfoGraph) -> Box<dyn Iterator<Item = Url> + '_> {
        let graph = &graph.0;

        // `graph.triples_matching` will exceed the stack size (Stack Overflow) on large graphs
        let iris = graph
            .triples()
            .flatten()
            .filter(|[_, p, _]| {
                p.iri()
                    .is_some_and(|iri| iri.as_str() == "http://www.w3.org/2002/07/owl#imports")
            })
            .filter_map(|[_, _, o]| o.iri())
            .flat_map(|iri| Url::parse(&iri))
            .unique();

        Box::new(iris)
    }

    pub fn imports(&self) -> &Vec<Url> {
        &self.imports
    }

    pub fn definitions(&self) -> &HashSet<Iri> {
        debug!("Definitions of {} are {:#?}", self.uri, self.definitions);
        &self.definitions
    }

    pub fn gen_definitions(graph: &InfoGraph) -> HashSet<Iri> {
        let mut hash_set = HashSet::new();
        graph
            .0
            .triples()
            .flatten()
            .filter_map(|[s, p, o]| {
                p.iri().map(|iri| {
                    debug!("{s:?} {iri:?} {o:?}");
                    if let Some(subject_iri) = s.iri() {
                        if iri.as_str() == IRI_RDF_TYPE {
                            hash_set.insert(subject_iri.as_str().to_string());
                        }
                    }
                })
            })
            .collect_vec();
        hash_set
    }

    // Because external documents most likely relate to other external ones and because there are many of them in a graph the depth should be limited
    fn reachable_docs_recursive_helper(
        &self,
        workspace: &Workspace,
        result: &mut HashSet<Url>,
        depth: u32,
        include_prefix: bool,
    ) -> Result<()> {
        if depth >= 1 {
            // Do nothing max depth reached
            return Ok(());
        }
        if result.contains(&self.uri) {
            // Do nothing
            return Ok(());
        }

        let urls = self.reachable_urls();

        result.insert(self.uri.clone());

        // TODO shitty child urls :<
        let docs = urls.iter().filter_map(|url| {
            workspace.document_by_url(url)
            // TODO maybe reactivate but for now lets not log here
            // .ok_or(Error::DocumentNotLoaded(url.clone()))
            // Workspace::resolve_url_to_document(&self.try_get_workspace()?, &url)
            // .inspect_log()
            // .ok()
        });

        for doc in docs {
            match doc {
                DocumentReference::Internal(internal_document) => {
                    internal_document.reachable_docs_recursive_helper(
                        result,
                        workspace,
                        include_prefix,
                    )?;
                }
                DocumentReference::External(external_document) => {
                    external_document.reachable_docs_recursive_helper(
                        workspace,
                        result,
                        0,
                        include_prefix,
                    )?;
                }
            }
        }

        Ok(())
    }

    fn gen_reachable_urls<'a>(
        uri: &'a Url,
        graph: &'a InfoGraph,
        imports: &'a [Url],
    ) -> Box<dyn Iterator<Item = Url> + 'a> {
        // TODO this is not that stable yet
        let child_urls = graph
            .0
            .iris()
            .filter_map(std::result::Result::ok)
            .filter_map(|term| term.iri())
            .unique()
            .filter_map(|iri| Url::parse(&iri).ok())
            // Filter out IRI's that point to this ontology
            .filter(|url| {
                // This should be faster than url.make_relative, because url contains a serialized version
                !url.to_string()
                    .starts_with(uri.to_string().trim_end_matches('#'))
            })
            .filter(|url| !url.to_string().contains(&graph.1))
            .map(iri_to_onology_url)
            .unique();

        Box::new(imports.iter().cloned().chain(child_urls).unique())
    }

    pub fn reachable_urls(&self) -> &Vec<Url> {
        &self.reachable_urls
    }

    pub fn get_frame_info(&self, iri: &Iri) -> Option<FrameInfo> {
        get_frame_info_helper_ex(self, iri)
    }
}

/// Convert some IRI (here in URL type) into a URL where the IRI can be fetched from
pub fn iri_to_onology_url(mut url: Url) -> Url {
    if url.fragment().is_some() {
        url.set_fragment(Some(""));
    } else if let Ok(mut seg) = url.path_segments_mut() {
        // TODO check for obo ontology
        // See https://obofoundry.org/principles/fp-003-uris.html

        seg.pop();
    }
    url
}

#[cached(
    size = 2000,
    key = "u64",
    convert = r#"{
            let mut hasher = DefaultHasher::new();
            doc.hash(&mut hasher);
            iri.hash(&mut hasher);
            hasher.finish()
     } "#
)]
fn get_frame_info_helper_ex(doc: &ExternalDocument, iri: &Iri) -> Option<FrameInfo> {
    let graph = &doc.graph.0;

    graph
        .triples_matching(
            |s: SimpleTerm| s.iri().is_some_and(|subject| subject.as_str() == iri),
            Any,
            Any,
        )
        .flatten()
        .map(|[_, p, o]| FrameInfo {
            iri: iri.clone(),
            annotations: once((simple_term_to_string(p), vec![simple_term_to_string(o)])).collect(),
            frame_type: FrameType::Unknown,
            definitions: vec![Location {
                uri: doc.uri.clone(),
                range: Range::ZERO,
            }],
        })
        .tree_reduce(FrameInfo::merge)
}

fn simple_term_to_string(simple_term: &SimpleTerm) -> String {
    match simple_term {
        SimpleTerm::Iri(iri_ref) => iri_ref.to_string(),
        SimpleTerm::BlankNode(bnode_id) => bnode_id.to_string(),
        SimpleTerm::LiteralDatatype(mown_str, iri_ref) => match iri_ref.as_str() {
            "http://www.w3.org/2001/XMLSchema#string" => mown_str.to_string(),
            _ => format!("\"{mown_str}\"^^{iri_ref}"),
        },
        SimpleTerm::LiteralLanguage(mown_str, language_tag) => {
            format!("\"{mown_str}\"@{}", language_tag.borrowed())
        }
        SimpleTerm::Triple(_) => "TODO triple".into(),
        SimpleTerm::Variable(var_name) => var_name.to_string(),
    }
}

/// This is a version of a query match that has no reference to the tree or cursor
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct UnwrappedQueryMatch {
    _pattern_index: usize,
    pub captures: Vec<UnwrappedQueryCapture>,
    _id: u32,
}

/// This is a version of a query capture that has no reference to the tree or cursor
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct UnwrappedQueryCapture {
    pub node: UnwrappedNode,
    pub index: u32,
}

/// This is a version of a node that has no reference to the tree
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct UnwrappedNode {
    /// ID's of a changed tree stay the same. So you can search for up-to-date information that way
    pub id: usize,
    /// This information can be outdated
    pub text: String,
    /// This information can be outdated
    pub range: Range,
    pub kind: String,
}

/// This represents information about a frame.
/// For example the following frame has information.
/// ```owl-ms
/// Class: PizzaThing
///     Annotations: rdfs:label "Pizza"
/// ```
/// Then the [`FrameInfo`] contains the label "Pizza" and the frame type "Class".
#[derive(Clone, Debug)]
pub struct FrameInfo {
    pub iri: Iri,
    pub annotations: HashMap<Iri, Vec<String>>,
    pub frame_type: FrameType,
    pub definitions: Vec<Location>,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Location {
    pub uri: Url,
    pub range: Range,
}

impl Location {
    pub fn into_lsp(
        self,
        rope: &Rope,
        encoding: &PositionEncodingKind,
    ) -> Result<tower_lsp::lsp_types::Location> {
        Ok(tower_lsp::lsp_types::Location {
            uri: self.uri.clone(),
            range: self.range.into_lsp(rope, encoding)?,
        })
    }
}

impl FrameInfo {
    fn merge(a: FrameInfo, b: FrameInfo) -> FrameInfo {
        let mut c = a;
        c.extend(b);
        c
    }

    fn extend(&mut self, b: FrameInfo) {
        for (key_a, values_a) in b.annotations {
            if let Some(values_b) = self.annotations.get_mut(&key_a) {
                values_b.extend(values_a);
            } else {
                self.annotations.insert(key_a, values_a);
            }
        }
        self.definitions.extend(b.definitions);
        self.definitions.dedup();
        self.frame_type = match (self.frame_type, b.frame_type) {
            (a, b) if a == b => a,
            (FrameType::Unknown, b) => b,
            (a, FrameType::Unknown) => a,
            _ => FrameType::Invalid, // a != b and not one of them is unknown => conflict
        };
    }

    const LABEL_IRI: &'static str = "http://www.w3.org/2000/01/rdf-schema#label";

    pub fn label(&self) -> Option<String> {
        self.annotation_display(FrameInfo::LABEL_IRI)
    }

    pub fn annotation_display(&self, iri: &str) -> Option<String> {
        self.annotations
            .get(iri)
            // TODO #20 make this more usable by providing multiple lines with indentation
            .map(|resolved| {
                resolved
                    .iter()
                    .map(|s| trim_string_value(s))
                    .unique()
                    .join(", ")
            })
    }

    pub fn info_display(&self, workspace: &Workspace) -> String {
        let entity = self.frame_type;
        let label = self
            .label()
            .unwrap_or(trim_url_before_last(&self.iri).to_string());

        debug!("info display / frame annotations {:#?}", self.annotations);

        let annotations = self
            .annotations
            .keys()
            .map(|iri| {
                let iri_label = workspace
                    .get_frame_info(iri)
                    .map(|fi| {
                        fi.label()
                            .unwrap_or_else(|| trim_url_before_last(&fi.iri).to_string())
                    })
                    .unwrap_or(iri.clone());
                // TODO #28 use values directly
                let mut annotation_display = self.annotation_display(iri).unwrap_or(iri.clone());

                // If this is a multiline string then give it some space to work with
                if annotation_display.contains('\n') {
                    annotation_display = format!("\n{annotation_display}\n\n");
                }

                format!("- `{iri_label}`: {annotation_display}")
            })
            .join("\n");

        format!(
            "{entity} **{label}**\n\n---\n{annotations}\n\nIRI: {}",
            self.iri
        )
    }

    /// This is a quick and dirty matcher that returns a match score from `0` to ``usize::MAX``
    pub fn matches(&self, query: &str) -> usize {
        let mut sum = 0usize;
        if self.iri.contains(query) {
            sum += 5000;
        }
        for (annotation_iri, values) in &self.annotations {
            for value in values {
                if value.contains(query) {
                    if annotation_iri == FrameInfo::LABEL_IRI {
                        sum += 1000;

                        if let Some((l, r)) = value.split_once(query) {
                            // Starts with query
                            if l.is_empty() {
                                sum += 100;
                            }
                            // Ends with query
                            if r.is_empty() {
                                sum += 10;
                            }

                            // Query found at exact word boundary
                            if r.starts_with(' ') && l.ends_with(' ') {
                                sum += 10;
                            }

                            // Chars not matching query
                            sum = sum.saturating_sub(l.len() * 10);
                            sum = sum.saturating_sub(r.len() * 10);
                            sum += 1;
                        }
                    } else {
                        sum += 1;
                    }
                }
            }
        }
        sum
    }
}

fn trim_url_before_last(iri: &str) -> &str {
    iri.rsplit_once(['/', '#']).map_or(iri, |(_, b)| b)
}

fn trim_string_value(value: &str) -> String {
    value
        .trim_start_matches('"')
        .trim_end_matches("@en")
        .trim_end_matches("@de")
        .trim_end_matches("@pt")
        .trim_end_matches("^^xsd:string") // typed literal with type string
        .trim_end_matches('"')
        .replace("\\\"", "\"")
        .trim()
        .to_string()
}

// TODO maybe use Arc<String>
pub type Iri = String;

pub fn node_text(node: &Node, rope: &Rope) -> String {
    rope.get_byte_slice(node.start_byte()..node.end_byte())
        .map_or(String::new(), |rs| rs.to_string())
}

/// Generate the diagnostics for a single node, walking recursively down to every child and every syntax error within
fn syntax_errors(stage1: &ParsedDocument) -> Vec<Diagnostic> {
    let mut cursor = stage1.tree.root_node().walk();
    let mut diagnostics = Vec::<Diagnostic>::new();

    loop {
        let node = cursor.node();

        if node.is_error() {
            // log
            let range: Range = cursor.node().range().into();

            // root has no parents so use itself
            let parent_kind = node.parent().unwrap_or(node).kind();

            if let Some(static_node) = NODE_TYPES.get(parent_kind) {
                let valid_children: String = Itertools::intersperse(
                    static_node
                        .children
                        .types
                        .iter()
                        .map(|sn| node_type_to_string(&sn.type_)),
                    ", ".to_string(),
                )
                .collect();

                let parent = node_type_to_string(parent_kind);
                let msg = format!("Syntax Error. expected {valid_children} inside {parent}");

                diagnostics.push(Diagnostic {
                    range,
                    label: msg.to_string(),
                });
            }
            // move along
            while !cursor.goto_next_sibling() {
                // move out
                if !cursor.goto_parent() {
                    // this node has no parent, it's the root
                    return diagnostics;
                }
            }
        } else if node.has_error() {
            // move in
            let has_child = cursor.goto_first_child(); // should always work

            if !has_child {
                while !cursor.goto_next_sibling() {
                    // move out
                    if !cursor.goto_parent() {
                        // this node has no parent, it's the root
                        return diagnostics;
                    }
                }
            }
        } else {
            // move along
            while !cursor.goto_next_sibling() {
                // move out
                if !cursor.goto_parent() {
                    // this node has no parent, it's the root
                    return diagnostics;
                }
            }
        }
    }
}

fn semantic_errors(doc: &InternalDocument, workspace: &Workspace) -> Vec<Diagnostic> {
    let mut diagnostics = Vec::new();

    let uses: &HashSet<Iri> = &doc.stage2.references;

    let mut defines: HashSet<String> = doc.stage2.definitions.clone();

    let imports_recursive = timeit("semantic errors  reachable", || {
        // This takes the longes :<
        doc.reachable_docs_recursive(workspace, false)
    });
    debug!("Imports recursive {} {:#?}", doc.uri, imports_recursive);

    for url in imports_recursive {
        if let Some(doc) = workspace.document_by_url(&url) {
            match doc {
                DocumentReference::Internal(internal_document) => {
                    defines.extend(internal_document.stage2.definitions.clone());
                }
                DocumentReference::External(external_document) => {
                    defines.extend(external_document.definitions().clone());
                }
            }
        }
    }

    // TODO this is a quick fix for now. The correct way will be not not include prefixes in the used Iris
    let prefixes: HashSet<&Iri> = doc.queried_document.prefixes.values().collect();
    let imports = &doc.queried_document.imports;

    let ontology_id = &doc.queried_document.ontology_id;

    for diff in uses
        .difference(&defines)
        .filter(|iri| !prefixes.contains(*iri))
        .filter(|iri| !imports.contains(*iri))
    {
        // Skip ontology and version IRIs
        if let Some((iri, version)) = &ontology_id {
            if diff == iri {
                continue;
            }
            if let Some(version) = version {
                if diff == version {
                    continue;
                }
            }
        }

        if let Some(vec) = doc.stage2.iri_locations.get(diff) {
            for ele in vec {
                diagnostics.push(Diagnostic {
                    range: *ele,
                    label: format!("Iri {diff} used but not defined"),
                });
            }
        }
    }

    diagnostics
}

fn node_type_to_string(node_type: &str) -> String {
    Itertools::intersperse(
        node_type.split_terminator('_').map(capitalize_string),
        " ".to_string(),
    )
    .collect()
}

fn capitalize_string(s: &str) -> String {
    let mut c = s.chars();
    match c.next() {
        None => String::new(),
        Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
    }
}

/// taken from <https://www.w3.org/TR/owl2-syntax/#Entity_Declarations_and_Typing>
#[derive(Clone, Copy, Eq, PartialEq, Debug, Hash)]
pub enum FrameType {
    Class,
    DataType,
    ObjectProperty,
    DataProperty,
    AnnotationProperty,
    Individual,
    Ontology,

    /// The frame type of IRI that has no valid frame (this can be because of conflicts)
    Invalid,
    /// The frame type of IRI that has no frame at all (can be overridden)
    Unknown,
}

impl FrameType {
    pub fn parse(kind: &str) -> FrameType {
        match kind {
            "datatype_iri" | "datatype_frame" => FrameType::DataType,
            "annotation_property_iri" | "annotation_property_frame" => {
                FrameType::AnnotationProperty
            }
            "individual_iri" | "individual_frame" => FrameType::Individual,
            "ontology_iri" | "ontology_frame" => FrameType::Ontology,
            "data_property_iri" | "data_property_frame" => FrameType::DataProperty,
            "object_property_iri" | "object_property_frame" => FrameType::ObjectProperty,
            "class_frame" | "class_iri" => FrameType::Class,
            kind => {
                error!("Implement {kind}");
                FrameType::Invalid
            }
        }
    }
}

impl From<FrameType> for tower_lsp::lsp_types::SymbolKind {
    fn from(val: FrameType) -> Self {
        match val {
            FrameType::Class => SymbolKind::CLASS,
            FrameType::DataType => SymbolKind::STRUCT,
            FrameType::ObjectProperty | FrameType::DataProperty | FrameType::AnnotationProperty => {
                SymbolKind::PROPERTY
            }
            FrameType::Individual => SymbolKind::OBJECT,
            FrameType::Ontology => SymbolKind::MODULE,
            FrameType::Invalid | FrameType::Unknown => SymbolKind::NULL,
        }
    }
}

impl Display for FrameType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let name = match self {
            FrameType::Class => "Class",
            FrameType::DataType => "Data Type",
            FrameType::ObjectProperty => "Object Property",
            FrameType::DataProperty => "Data Property",
            FrameType::AnnotationProperty => "Annotation Property",
            FrameType::Individual => "Named Individual",
            FrameType::Ontology => "Ontology",
            FrameType::Invalid => "Invalid Frame Type",
            FrameType::Unknown => "Unknown Frame Type",
        };
        write!(f, "{name}")
    }
}

/// Takes an IRI in any form and removed the <> symbols
pub fn trim_full_iri(untrimmed_iri: String) -> Iri {
    let iri = untrimmed_iri;
    iri.trim_end_matches('>')
        .trim_start_matches('<')
        .to_string()
}

// Horned owl has no default here. Let's keep it out for now.
// static STANDARD_PREFIX_NAMES: [(&str, &str); 4] = [
//     ("rdf:", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
//     ("rdfs:", "http://www.w3.org/2000/01/rdf-schema#"),
//     ("owl:", "http://www.w3.org/2002/07/owl#"),
//     ("xsd:", "http://www.w3.org/2001/XMLSchema#"),
// ];

fn to_doc(node: &Node, rope: &Rope, options: &FormattingSettings) -> RcDoc<'static, ()> {
    // I do not target 32 systems
    #[allow(clippy::cast_possible_wrap)]
    let nest_depth = options.tab_size as isize;
    let text = node_text(node, rope);
    debug!(
        "to_doc for {text} that is {} at {:?}",
        node.kind(),
        node.range()
    );
    let mut cursor = node.walk();

    // So if this node as an error child then the translation into RcDoc could exclude that error node.
    // Therefore, lets not translate it at all.
    if node.children(&mut cursor).any(|child| child.is_error()) {
        return RcDoc::text(text);
    }

    match node.kind() {
        "source_file" => {
            source_file_to_doc(node, rope, options)
        },
        "ontology" =>
            ontology_to_doc(node, rope,options, nest_depth)
        ,
        "prefix_declaration" | "import" | "annotation" => RcDoc::intersperse(
            node.children(&mut cursor)
                .map(|n| to_doc(&n, rope,options)),
            RcDoc::line(),
        )
        .nest(nest_depth)
        .group(),
        "annotations"
        // class
        | "sub_class_of" | "class_equivalent_to" | "class_disjoint_with" | "disjoint_union_of" | "has_key"
        // datatype
        | "datatype_equavalent_to" // TODO weird typo that is all over the app
        // individual
        | "individual_facts" | "individual_same_as" | "individual_different_from" | "individual_types"
        // annotation property
        | "annotation_property_domin" // TODO also typo
        | "annotation_property_range" | "annotation_property_sub_property_of"
        // data property
        | "data_property_domain" | "data_property_range" | "data_property_characteristics" | "data_property_sub_property_of" | "data_property_equivalent_to" | "data_property_disjoint_with"
        // object property
        |"domain" |"range" |"sub_property_of" |"object_property_equivalent_to" |"object_property_disjoint_with" |"inverse_of" |"characteristics" |"sub_property_chain"
        // misc
        |"equivalent_classes" |"disjoint_classes" |"equivalent_object_properties" |"disjoint_object_properties" |"equivalent_data_properties" |"disjoint_data_properties" |"same_individual" |"different_individuals"
         => {
            nesting_property_with_keyword_to_frame(node, rope, options, nest_depth)
        },
        "description"
         => {
             let subs=node.children(&mut cursor).chunk_by(|n| n.kind()=="or").into_iter().map(|(is_or, chunks)|{
                 if is_or {
                     RcDoc::line().append(RcDoc::text("or").append(RcDoc::space()))
                 } else {
                     let conjunction_node = chunks.exactly_one().unwrap_or_else(|_| unreachable!("chunk should contain exactly one separator node"));
                     to_doc(&conjunction_node, rope, options)
                 }
             }).collect_vec();
            RcDoc::concat(subs)
        },
        "conjunction"
         => {
             let subs=node.children(&mut cursor).chunk_by(|n| n.kind()=="and").into_iter().map(|(is_or, chunks)|{
                 if is_or {
                     RcDoc::line().append(RcDoc::text("and").append(RcDoc::space()))
                 } else {
                     RcDoc::intersperse(chunks.map(|n| to_doc(&n, rope, options)), RcDoc::space())
                 }
             }).collect_vec();
            RcDoc::concat(subs)
        },
        "primary"=>{
            RcDoc::intersperse(node.children(&mut cursor).map(|n|to_doc(&n, rope, options)), RcDoc::space())
        },
        "nested_description"
         => {
            RcDoc::text("(").append(RcDoc::line()).append(
                to_doc(&node.named_child(0).expect("open parentheses to have sibling"), rope, options)
            ).nest(nest_depth).append(RcDoc::line()).append(")")
        },
        "class_frame"
        | "datatype_frame"
        | "data_property_frame"
        | "object_property_frame"
        | "annotation_property_frame"
        | "individual_frame"
         => frame_to_doc(node, rope, options, nest_depth),
        _ => RcDoc::text(text), // this applies also to "ERROR" nodes!
    }
}

fn nesting_property_with_keyword_to_frame(
    node: &Node,
    rope: &Rope,
    options: &FormattingSettings,
    nest_depth: isize,
) -> RcDoc<'static> {
    let mut cursor = node.walk();
    let mut docs = vec![];

    // This should be the keyword
    if let Some(child) = node.child(0) {
        docs.push(to_doc(&child, rope, options).append(RcDoc::line()));
    }

    for (is_separator, chunk) in &node
        .children(&mut cursor)
        .skip(1)
        .chunk_by(|x| x.kind() == "," || x.kind() == "o")
    {
        if is_separator {
            let n = &chunk.exactly_one().unwrap_or_else(|_| {
                unreachable!("chunk should contain exactly one separator node")
            });

            if n.kind() == "o" {
                docs.push(RcDoc::text(" o").append(RcDoc::line()));
            } else {
                docs.push(RcDoc::text(",").append(RcDoc::line()));
            }
        } else {
            docs.push(RcDoc::intersperse(
                chunk.map(|n| to_doc(&n, rope, options)),
                RcDoc::line(),
            ));
        }
    }

    RcDoc::concat(docs).nest(nest_depth).group()
}

fn source_file_to_doc(
    node: &Node,
    rope: &Rope,
    options: &FormattingSettings,
) -> RcDoc<'static, ()> {
    let mut cursor = node.walk();
    let prefix_docs = node
        .children_by_field_name("prefix", &mut cursor)
        .map(|n| to_doc(&n, rope, options))
        .collect_vec();
    let ontology_doc = node
        .child_by_field_name("ontology")
        .map_or(RcDoc::nil(), |n| to_doc(&n, rope, options));
    if prefix_docs.is_empty() {
        ontology_doc
    } else {
        RcDoc::intersperse(
            [
                RcDoc::intersperse(prefix_docs, RcDoc::hardline()),
                ontology_doc,
            ],
            RcDoc::hardline().append(RcDoc::hardline()),
        )
    }
}

fn ontology_to_doc(
    node: &Node,
    rope: &Rope,
    options: &FormattingSettings,
    nest_depth: isize,
) -> RcDoc<'static> {
    let mut cursor = node.walk();
    RcDoc::intersperse(
        [
            RcDoc::text("Ontology:")
                .append(RcDoc::line())
                .append(RcDoc::intersperse(
                    node.child_by_field_name("iri")
                        .into_iter()
                        .map(|n| to_doc(&n, rope, options))
                        .chain(
                            node.child_by_field_name("version_iri")
                                .into_iter()
                                .map(|n| to_doc(&n, rope, options)),
                        ),
                    RcDoc::line(),
                ))
                .nest(nest_depth)
                .group(),
            // imports
            RcDoc::intersperse(
                node.children_by_field_name("import", &mut cursor.clone())
                    .map(|n| to_doc(&n, rope, options).append(RcDoc::hardline())),
                RcDoc::nil(),
            ),
            // annotations
            RcDoc::intersperse(
                node.children_by_field_name("annotations", &mut cursor.clone())
                    .map(|n| to_doc(&n, rope, options).append(RcDoc::hardline())),
                RcDoc::nil(),
            ),
            // frames
            RcDoc::intersperse(
                {
                    let frame_nodes = node.children_by_field_name("frame", &mut cursor);

                    let maybe_sorted: Box<dyn Iterator<Item = Node<'_>>> = if options.order_frames {
                        Box::new(frame_nodes.sorted_by_key(|n| frame_order(n.kind())))
                    } else {
                        Box::new(frame_nodes)
                    };

                    maybe_sorted.map(|n| to_doc(&n, rope, options).append(RcDoc::hardline()))
                },
                RcDoc::hardline(),
            ),
        ],
        RcDoc::hardline(),
    )
}

fn frame_order(frame_kind: &str) -> u32 {
    match frame_kind {
        "annotation_property_frame" => 1,
        "datatype_frame" => 2,
        "object_property_frame" => 3,
        "data_property_frame" => 4,
        "class_frame" => 5,
        "individual_frame" => 6,
        _ => u32::MAX,
    }
}

fn frame_to_doc(
    node: &Node,
    rope: &Rope,
    options: &FormattingSettings,
    nest_depth: isize,
) -> RcDoc<'static> {
    let mut cursor = node.walk();
    node.child(0)
        .map_or(RcDoc::nil(), |n| to_doc(&n, rope, options))
        .append(RcDoc::line())
        .append(
            node.child(1)
                .map_or(RcDoc::nil(), |n| to_doc(&n, rope, options)),
        )
        .nest(nest_depth)
        .group()
        .append(RcDoc::hardline())
        .append(RcDoc::intersperse(
            node.children(&mut cursor)
                .skip(2)
                .map(|n| to_doc(&n, rope, options)),
            RcDoc::hardline(),
        ))
        .nest(nest_depth)
        .group()
}

// This can not be cached, because some dependencies are maybe not loaded.
// Therefore the result could change indepenent of the document.
fn reachable_docs_recursive_cached(
    doc: &InternalDocument,
    workspace: &Workspace,
    include_prefix: bool,
) -> Vec<Url> {
    let mut set: HashSet<Url> = HashSet::new();
    doc.reachable_docs_recursive_helper(&mut set, workspace, include_prefix)
        .log_if_error();
    set.into_iter().collect_vec()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::pos::Position;
    use indoc::indoc;
    use pretty_assertions::assert_eq;
    use tempdir::TempDir;
    use test_log::test;

    /// Well the file:/// syntax is not valid for all OS's, that's why generating a random file URL is easier.
    struct TmpUrl {
        url: Url,
        _tmp_dir: TempDir,
    }

    impl TmpUrl {
        fn new() -> Self {
            let tmp_dir = TempDir::new("owl-ms-test").unwrap();
            let url = Url::from_file_path(tmp_dir.path().join("file.omn")).unwrap();
            Self {
                url,
                _tmp_dir: tmp_dir,
            }
        }

        fn url(&self) -> Url {
            self.url.clone()
        }
    }

    #[test(tokio::test)]
    async fn internal_document_formatted_should_format_correctly() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            indoc! {"
                Prefix:  a:  <http://a/a>  Prefix:  a:  <http://a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>    Ontology:   a   v   Import:    <http://a/a>    Import:    <http://a/a> Annotations:    rdfs:label     \"a\"    Class:   a   SubClassOf:   b,e,f SubClassOf:   cccccccccccccccccccccccc,ddddddddddddddddddddd,eeeeeeeeeee   Class:   a     SubClassOf: a    Annotations:   rdfs:label    \"Y\"    EquivalentTo:    a   ,   a DisjointWith:    a  ,  a   DisjointUnionOf:  Annotations: y 12, a 2    a,a    HasKey:    a
            "}
            .into(),
        );

        info!("sexp:\n{}", doc.tree().root_node().to_sexp());

        let result = doc.formatted(&FormattingSettings {
            tab_size: 4,
            ruler_width: 35,
            order_frames: true,
        });

        assert_eq!(
            result,
            indoc! {"
                Prefix: a: <http://a/a>
                Prefix:
                    a:
                    <http://a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>

                Ontology: a v
                Import: <http://a/a>
                Import: <http://a/a>

                Annotations: rdfs:label \"a\"

                Class: a
                    SubClassOf: b, e, f
                    SubClassOf:
                        cccccccccccccccccccccccc,
                        ddddddddddddddddddddd,
                        eeeeeeeeeee

                Class: a
                    SubClassOf: a
                    Annotations: rdfs:label \"Y\"
                    EquivalentTo: a, a
                    DisjointWith: a, a
                    DisjointUnionOf:
                        Annotations: y 12, a 2
                        a,
                        a
                    HasKey: a
            "}
        );
    }

    #[test(tokio::test)]
    async fn internal_document_formatted_with_description_should_format_correctly() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            indoc! {r"
                Ontology:a
                Class: a
                SubClassOf:   (aaaaaaaa and bbbbbb)    or   (bbbb and hasRel some (ccccccc or ddddddd or eeeeeeeee))
            "}
            .into(),
        );

        info!("sexp:\n{}", doc.tree().root_node().to_sexp());

        let result = doc.formatted(&FormattingSettings {
            tab_size: 4,
            ruler_width: 35,
            order_frames: true,
        });

        assert_eq!(
            result,
            indoc! {r"
                Ontology: a


                Class: a
                    SubClassOf:
                        (
                            aaaaaaaa
                            and bbbbbb
                        )
                        or (
                            bbbb
                            and hasRel some (
                                ccccccc
                                or ddddddd
                                or eeeeeeeee
                            )
                        )
            "}
        );
    }

    #[test(tokio::test)]
    async fn internal_document_formatted_without_frame_order_should_format_correctly() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            indoc! {r"
                Ontology:a
                Class: a
                AnnotationProperty: a
            "}
            .into(),
        );

        let result = doc.formatted(&FormattingSettings {
            tab_size: 4,
            ruler_width: 35,
            order_frames: false,
        });

        assert_eq!(
            result,
            indoc! {r"
                Ontology: a

                
                Class: a

                
                AnnotationProperty: a

            "}
        );
    }

    #[test(tokio::test)]
    async fn internal_document_abbreviated_iri_to_full_iri_should_convert_abbreviated_iri() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            "
                Prefix: owl: <http://www.w3.org/2002/07/owl#>
                Prefix: ja: <http://www.semanticweb.org/janek/ontologies/2025/5/untitled-ontology-3/>
            "
            .into(),
        );

        let full_iri = doc.abbreviated_iri_to_full_iri("owl:Nothing");
        let full_iri_2 = doc.abbreviated_iri_to_full_iri("ja:Janek");

        assert_eq!(
            full_iri,
            Some("http://www.w3.org/2002/07/owl#Nothing".to_string())
        );
        assert_eq!(
            full_iri_2,
            Some(
                "http://www.semanticweb.org/janek/ontologies/2025/5/untitled-ontology-3/Janek"
                    .to_string()
            )
        );
    }

    #[test]
    fn internal_document_abbreviated_iri_to_full_iri_should_convert_simple_iri() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            "
                Prefix: : <http://www.w3.org/2002/07/owl#>
            "
            .into(),
        );

        let full_iri = doc.abbreviated_iri_to_full_iri(":Nothing");
        let full_iri_2 = doc.abbreviated_iri_to_full_iri("Nothing");

        assert_eq!(
            full_iri,
            Some("http://www.w3.org/2002/07/owl#Nothing".to_string())
        );
        assert_eq!(
            full_iri_2,
            Some("http://www.w3.org/2002/07/owl#Nothing".to_string())
        );
    }

    #[test]
    fn internal_document_prefix_should_return_all_prefixes() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            "
                Prefix: : <http://www.semanticweb.org/janek/ontologies/2025/5/untitled-ontology-3/>
                Prefix: owl: <http://www.w3.org/2002/07/owl#>
                Prefix: rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                Prefix: rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                Prefix: xml: <http://www.w3.org/XML/1998/namespace>
                Prefix: xsd: <http://www.w3.org/2001/XMLSchema#>

                Ontology: <http://www.semanticweb.org/janek/ontologies/2025/5/untitled-ontology-3>
            "
            .into(),
        );

        let prefixes = doc
            .prefixes()
            .iter()
            .map(|(a, b)| (a.clone(), b.clone()))
            .sorted()
            .collect_vec();

        assert_eq!(
            prefixes,
            vec![
                (
                    String::new(),
                    "http://www.semanticweb.org/janek/ontologies/2025/5/untitled-ontology-3/"
                        .into()
                ),
                ("owl".into(), "http://www.w3.org/2002/07/owl#".into()),
                (
                    "rdf".into(),
                    "http://www.w3.org/1999/02/22-rdf-syntax-ns#".into()
                ),
                (
                    "rdfs".into(),
                    "http://www.w3.org/2000/01/rdf-schema#".into()
                ),
                ("xml".into(), "http://www.w3.org/XML/1998/namespace".into()),
                ("xsd".into(), "http://www.w3.org/2001/XMLSchema#".into())
            ]
        );
    }

    #[test]
    fn internal_document_get_frame_info_should_show_definitions() {
        // Arrange
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            r#"
                Ontology:
                    Class: A
                        Annotations: rdfs:label "This class is in the first file"

                        SubClassOf: class-in-other-file
             "#
            .into(),
        );

        // Act
        let info = doc.frame_info_by_iri(&"A".to_string());

        // Assert
        info!("{doc:#?}");
        let info = info.unwrap();

        assert_eq!(info.iri, "A".to_string());
        assert_eq!(
            info.definitions,
            vec![Location {
                uri: tmp_url.url(),
                range: Range {
                    start: Position::new(2, 20),
                    end: Position::new(5, 55),
                }
            }]
        );
    }

    #[test]
    fn external_document_new_given_owl_text_does_parse_ontology() {
        // Arrange
        let ontology_text = r#"
        <?xml version="1.0"?>
        <Ontology xmlns="http://www.w3.org/2002/07/owl#" xml:base="http://www.example.com/iri" ontologyIRI="http://www.example.com/iri">
            <Declaration>
                <Class IRI="https://www.example.com/o1"/>
            </Declaration>
        </Ontology>
        "#
        .to_string();

        // Act
        let external_doc = ExternalDocument::new(
            ontology_text.clone(),
            Url::parse("https://example.com/onto.owx").unwrap(),
        );

        // Assert
        let doc = external_doc.unwrap();
        assert_eq!(doc.text, ontology_text);
    }

    #[test]
    fn external_document_reachable_documents_given_imports_does_return_imports() {
        // Arrange
        let tmp_url = TmpUrl::new();
        let owl_ontology_text = format!(
            r#"
            <?xml version="1.0"?>
            <Ontology xmlns="http://www.w3.org/2002/07/owl#" xml:base="http://www.example.com/iri" ontologyIRI="http://www.example.com/iri">
                <Import>{}</Import>
                <Import>http://www.example.com/other-property</Import>
                <Declaration>
                    <Class IRI="https://www.example.com/o9"/>
                </Declaration>
            </Ontology>
        "#,
            tmp_url.url()
        );

        let external_doc = ExternalDocument::new(
            owl_ontology_text.clone(),
            Url::parse("https://example.com/onto.owx").unwrap(),
        )
        .unwrap();

        // Act
        let urls = external_doc.imports();

        // Assert
        assert!(urls.contains(&Url::parse("http://www.example.com/other-property").unwrap()));
        assert!(urls.contains(&tmp_url.url()));
    }

    #[test]
    fn word_before_character_should_find_word() {
        let word = word_before_character(25, "This is a line with multi words");
        assert_eq!(word, "multi");
    }

    #[test]
    fn full_iri_to_abbreviated_iri_should_work_for_simple_iris() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            "
                Prefix: owl: <http://www.w3.org/2002/07/owl#>
            "
            .into(),
        );

        let abbr_iri = doc.full_iri_to_abbreviated_iri("http://www.w3.org/2002/07/owl#Thing");

        assert_eq!(abbr_iri, Some("owl:Thing".to_string()));
    }

    #[test]
    fn full_iri_to_abbreviated_iri_should_work_for_simple_iris_with_empty_prefix() {
        let tmp_url = TmpUrl::new();
        let doc = InternalDocument::new(
            tmp_url.url(),
            -1,
            "
                Prefix: : <http://www.w3.org/2002/07/owl#>
            "
            .into(),
        );

        let abbr_iri = doc.full_iri_to_abbreviated_iri("http://www.w3.org/2002/07/owl#Thing");

        assert_eq!(abbr_iri, Some("Thing".to_string()));
    }
}