use std::sync::Arc;
use mir_types::{Atomic, Union};
pub struct DocblockParser;
impl DocblockParser {
pub fn parse(text: &str) -> ParsedDocblock {
let mut result = ParsedDocblock::default();
let lines = extract_lines(text);
for line in lines {
let line = line.trim();
if line.is_empty() || !line.starts_with('@') {
continue;
}
if let Some(rest) = line.strip_prefix("@param") {
let rest = rest.trim();
if let Some((ty_str, name)) = parse_param_line(rest) {
let ty = parse_type_string(&ty_str);
result.params.push((name, ty));
}
} else if let Some(rest) = strip_tag(line, "@return") {
let ty = parse_type_string(extract_type_token(rest.trim()));
result.return_type = Some(ty);
} else if let Some(rest) = line.strip_prefix("@var")
.or_else(|| line.strip_prefix("@psalm-var"))
.or_else(|| line.strip_prefix("@phpstan-var"))
{
let rest = rest.trim();
let type_str = extract_type_token(rest);
let ty = parse_type_string(type_str);
result.var_type = Some(ty);
let after_type = rest[type_str.len()..].trim();
if after_type.starts_with('$') {
result.var_name = Some(after_type.trim_start_matches('$').split_whitespace().next().unwrap_or("").to_string());
}
} else if let Some(rest) = strip_tag(line, "@psalm-return")
.or_else(|| strip_tag(line, "@phpstan-return"))
{
let ty = parse_type_string(extract_type_token(rest.trim()));
result.return_type = Some(ty); } else if let Some(rest) = line
.strip_prefix("@psalm-param")
.or_else(|| line.strip_prefix("@phpstan-param"))
{
let rest = rest.trim();
if let Some((ty_str, name)) = parse_param_line(rest) {
let ty = parse_type_string(&ty_str);
if let Some(entry) = result.params.iter_mut().find(|(n, _)| *n == name) {
entry.1 = ty;
} else {
result.params.push((name, ty));
}
}
} else if let Some(rest) = line.strip_prefix("@template") {
let rest = rest.trim();
let (name, bound) = parse_template_line(rest);
result.templates.push((name, bound));
} else if let Some(rest) = line
.strip_prefix("@extends")
.or_else(|| line.strip_prefix("@psalm-extends"))
{
result.extends = Some(rest.trim().to_string());
} else if let Some(rest) = line
.strip_prefix("@implements")
.or_else(|| line.strip_prefix("@psalm-implements"))
{
result.implements.push(rest.trim().to_string());
} else if let Some(rest) = line.strip_prefix("@throws") {
let class = rest.split_whitespace().next().unwrap_or("").to_string();
if !class.is_empty() {
result.throws.push(class);
}
} else if let Some(rest) = line
.strip_prefix("@psalm-assert-if-true")
.or_else(|| line.strip_prefix("@phpstan-assert-if-true"))
{
let rest = rest.trim();
if let Some((ty_str, name)) = parse_param_line(rest) {
result
.assertions_if_true
.push((name, parse_type_string(&ty_str)));
}
} else if let Some(rest) = line
.strip_prefix("@psalm-assert-if-false")
.or_else(|| line.strip_prefix("@phpstan-assert-if-false"))
{
let rest = rest.trim();
if let Some((ty_str, name)) = parse_param_line(rest) {
result
.assertions_if_false
.push((name, parse_type_string(&ty_str)));
}
} else if let Some(rest) = line
.strip_prefix("@psalm-assert")
.or_else(|| line.strip_prefix("@phpstan-assert"))
{
let rest = rest.trim();
if let Some((ty_str, name)) = parse_param_line(rest) {
result.assertions.push((name, parse_type_string(&ty_str)));
}
} else if line.contains("@psalm-suppress")
|| line.contains("@phpstan-ignore")
{
let suppressed = line
.split_whitespace()
.nth(1)
.unwrap_or("")
.to_string();
if !suppressed.is_empty() {
result.suppressed_issues.push(suppressed);
}
} else if line.starts_with("@deprecated") {
result.is_deprecated = true;
} else if line.starts_with("@internal") {
result.is_internal = true;
} else if line.starts_with("@psalm-pure") || line.starts_with("@pure") {
result.is_pure = true;
} else if line.starts_with("@psalm-immutable") || line.starts_with("@immutable") {
result.is_immutable = true;
} else if line.starts_with("@readonly") {
result.is_readonly = true;
} else if line.starts_with("@api") || line.starts_with("@psalm-api") {
result.is_api = true;
}
}
result
}
}
#[derive(Debug, Default, Clone)]
pub struct ParsedDocblock {
pub params: Vec<(String, Union)>,
pub return_type: Option<Union>,
pub var_type: Option<Union>,
pub var_name: Option<String>,
pub templates: Vec<(String, Option<Union>)>,
pub extends: Option<String>,
pub implements: Vec<String>,
pub throws: Vec<String>,
pub assertions: Vec<(String, Union)>,
pub assertions_if_true: Vec<(String, Union)>,
pub assertions_if_false: Vec<(String, Union)>,
pub suppressed_issues: Vec<String>,
pub is_deprecated: bool,
pub is_internal: bool,
pub is_pure: bool,
pub is_immutable: bool,
pub is_readonly: bool,
pub is_api: bool,
}
impl ParsedDocblock {
pub fn get_param_type(&self, name: &str) -> Option<&Union> {
let name = name.trim_start_matches('$');
self.params
.iter()
.find(|(n, _)| n.trim_start_matches('$') == name)
.map(|(_, ty)| ty)
}
}
pub fn parse_type_string(s: &str) -> Union {
let s = s.trim();
if let Some(inner) = s.strip_prefix('?') {
let inner_ty = parse_type_string(inner);
let mut u = inner_ty;
u.add_type(Atomic::TNull);
return u;
}
if s.contains('|') && !is_inside_generics(s) {
let parts = split_union(s);
if parts.len() > 1 {
let mut u = Union::empty();
for part in parts {
for atomic in parse_type_string(&part).types {
u.add_type(atomic);
}
}
return u;
}
}
if s.contains('&') && !is_inside_generics(s) {
let first = s.split('&').next().unwrap_or(s);
return parse_type_string(first.trim());
}
if let Some(value_str) = s.strip_suffix("[]") {
let value = parse_type_string(value_str);
return Union::single(Atomic::TArray {
key: Box::new(Union::single(Atomic::TInt)),
value: Box::new(value),
});
}
if let Some(open) = s.find('<') {
if s.ends_with('>') {
let name = &s[..open];
let inner = &s[open + 1..s.len() - 1];
return parse_generic(name, inner);
}
}
match s.to_lowercase().as_str() {
"string" => Union::single(Atomic::TString),
"non-empty-string" => Union::single(Atomic::TNonEmptyString),
"numeric-string" => Union::single(Atomic::TNumericString),
"class-string" => Union::single(Atomic::TClassString(None)),
"int" | "integer" => Union::single(Atomic::TInt),
"positive-int" => Union::single(Atomic::TPositiveInt),
"negative-int" => Union::single(Atomic::TNegativeInt),
"non-negative-int" => Union::single(Atomic::TNonNegativeInt),
"float" | "double" => Union::single(Atomic::TFloat),
"bool" | "boolean" => Union::single(Atomic::TBool),
"true" => Union::single(Atomic::TTrue),
"false" => Union::single(Atomic::TFalse),
"null" => Union::single(Atomic::TNull),
"void" => Union::single(Atomic::TVoid),
"never" | "never-return" | "no-return" | "never-returns" => Union::single(Atomic::TNever),
"mixed" => Union::single(Atomic::TMixed),
"object" => Union::single(Atomic::TObject),
"array" => Union::single(Atomic::TArray {
key: Box::new(Union::single(Atomic::TMixed)),
value: Box::new(Union::mixed()),
}),
"list" => Union::single(Atomic::TList {
value: Box::new(Union::mixed()),
}),
"callable" => Union::single(Atomic::TCallable {
params: None,
return_type: None,
}),
"iterable" => Union::single(Atomic::TArray {
key: Box::new(Union::single(Atomic::TMixed)),
value: Box::new(Union::mixed()),
}),
"scalar" => Union::single(Atomic::TScalar),
"numeric" => Union::single(Atomic::TNumeric),
"resource" => Union::mixed(), "static" => Union::single(Atomic::TStaticObject { fqcn: Arc::from("") }),
"self" | "$this" => Union::single(Atomic::TSelf { fqcn: Arc::from("") }),
"parent" => Union::single(Atomic::TParent { fqcn: Arc::from("") }),
_ if !s.is_empty() && s.chars().next().map(|c| c.is_alphanumeric() || c == '\\' || c == '_').unwrap_or(false) => {
Union::single(Atomic::TNamedObject {
fqcn: normalize_fqcn(s).into(),
type_params: vec![],
})
}
_ => Union::mixed(),
}
}
fn parse_generic(name: &str, inner: &str) -> Union {
match name.to_lowercase().as_str() {
"array" => {
let params = split_generics(inner);
let (key, value) = if params.len() >= 2 {
(parse_type_string(params[0].trim()), parse_type_string(params[1].trim()))
} else {
(Union::single(Atomic::TInt), parse_type_string(params[0].trim()))
};
Union::single(Atomic::TArray {
key: Box::new(key),
value: Box::new(value),
})
}
"list" | "non-empty-list" => {
let value = parse_type_string(inner.trim());
if name.to_lowercase().starts_with("non-empty") {
Union::single(Atomic::TNonEmptyList {
value: Box::new(value),
})
} else {
Union::single(Atomic::TList {
value: Box::new(value),
})
}
}
"non-empty-array" => {
let params = split_generics(inner);
let (key, value) = if params.len() >= 2 {
(parse_type_string(params[0].trim()), parse_type_string(params[1].trim()))
} else {
(Union::single(Atomic::TInt), parse_type_string(params[0].trim()))
};
Union::single(Atomic::TNonEmptyArray {
key: Box::new(key),
value: Box::new(value),
})
}
"iterable" => {
let params = split_generics(inner);
let value = if params.len() >= 2 {
parse_type_string(params[1].trim())
} else {
parse_type_string(params[0].trim())
};
Union::single(Atomic::TArray {
key: Box::new(Union::single(Atomic::TMixed)),
value: Box::new(value),
})
}
"class-string" => {
Union::single(Atomic::TClassString(Some(normalize_fqcn(inner.trim()).into())))
}
"int" => {
Union::single(Atomic::TIntRange {
min: None,
max: None,
})
}
_ => {
let params: Vec<Union> = split_generics(inner)
.iter()
.map(|p| parse_type_string(p.trim()))
.collect();
Union::single(Atomic::TNamedObject {
fqcn: normalize_fqcn(name).into(),
type_params: params,
})
}
}
}
fn strip_tag<'a>(line: &'a str, tag: &str) -> Option<&'a str> {
if let Some(rest) = line.strip_prefix(tag) {
if rest.is_empty() || rest.starts_with(' ') || rest.starts_with('\t') {
return Some(rest);
}
if rest.starts_with('s') {
let after_s = &rest[1..];
if after_s.is_empty() || after_s.starts_with(' ') || after_s.starts_with('\t') {
return Some(after_s);
}
}
}
None
}
fn extract_type_token(s: &str) -> &str {
let mut depth = 0i32;
let mut end = s.len();
let chars: Vec<(usize, char)> = s.char_indices().collect();
let mut i = 0;
while i < chars.len() {
let (byte_pos, ch) = chars[i];
match ch {
'<' | '(' | '{' => depth += 1,
'>' | ')' | '}' => depth -= 1,
' ' | '\t' if depth == 0 => {
end = byte_pos;
break;
}
_ => {}
}
i += 1;
}
&s[..end]
}
fn extract_lines(text: &str) -> Vec<String> {
text.lines()
.map(|l| {
let l = l.trim();
let l = l.trim_start_matches("/**").trim();
let l = l.trim_end_matches("*/").trim();
let l = l.trim_start_matches("*/").trim();
let l = if let Some(stripped) = l.strip_prefix("* ") { stripped } else { l.trim_start_matches('*') };
l.trim().to_string()
})
.collect()
}
fn parse_param_line(s: &str) -> Option<(String, String)> {
let mut parts = s.splitn(3, char::is_whitespace);
let ty = parts.next()?.trim().to_string();
let name = parts.next()?.trim().trim_start_matches('$').to_string();
if ty.is_empty() || name.is_empty() {
return None;
}
Some((ty, name))
}
fn parse_template_line(s: &str) -> (String, Option<Union>) {
let mut parts = s.splitn(3, char::is_whitespace);
let name = parts.next().unwrap_or("").trim().to_string();
let of_keyword = parts.next().unwrap_or("").trim().to_lowercase();
let bound = if of_keyword == "of" {
parts.next().map(|b| parse_type_string(b.trim()))
} else {
None
};
(name, bound)
}
fn split_union(s: &str) -> Vec<String> {
let mut parts = Vec::new();
let mut depth = 0;
let mut current = String::new();
for ch in s.chars() {
match ch {
'<' | '(' | '{' => { depth += 1; current.push(ch); }
'>' | ')' | '}' => { depth -= 1; current.push(ch); }
'|' if depth == 0 => {
parts.push(current.trim().to_string());
current = String::new();
}
_ => current.push(ch),
}
}
if !current.trim().is_empty() {
parts.push(current.trim().to_string());
}
parts
}
fn split_generics(s: &str) -> Vec<String> {
let mut parts = Vec::new();
let mut depth = 0;
let mut current = String::new();
for ch in s.chars() {
match ch {
'<' | '(' | '{' => { depth += 1; current.push(ch); }
'>' | ')' | '}' => { depth -= 1; current.push(ch); }
',' if depth == 0 => {
parts.push(current.trim().to_string());
current = String::new();
}
_ => current.push(ch),
}
}
if !current.trim().is_empty() {
parts.push(current.trim().to_string());
}
parts
}
fn is_inside_generics(s: &str) -> bool {
let mut depth = 0i32;
for ch in s.chars() {
match ch {
'<' | '(' | '{' => depth += 1,
'>' | ')' | '}' => depth -= 1,
_ => {}
}
}
depth != 0
}
fn normalize_fqcn(s: &str) -> String {
s.trim_start_matches('\\').to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use mir_types::Atomic;
#[test]
fn parse_string() {
let u = parse_type_string("string");
assert_eq!(u.types.len(), 1);
assert!(matches!(u.types[0], Atomic::TString));
}
#[test]
fn parse_nullable_string() {
let u = parse_type_string("?string");
assert!(u.is_nullable());
assert!(u.contains(|t| matches!(t, Atomic::TString)));
}
#[test]
fn parse_union() {
let u = parse_type_string("string|int|null");
assert!(u.contains(|t| matches!(t, Atomic::TString)));
assert!(u.contains(|t| matches!(t, Atomic::TInt)));
assert!(u.is_nullable());
}
#[test]
fn parse_array_of_string() {
let u = parse_type_string("array<string>");
assert!(u.contains(|t| matches!(t, Atomic::TArray { .. })));
}
#[test]
fn parse_list_of_int() {
let u = parse_type_string("list<int>");
assert!(u.contains(|t| matches!(t, Atomic::TList { .. })));
}
#[test]
fn parse_named_class() {
let u = parse_type_string("Foo\\Bar");
assert!(u.contains(|t| matches!(t, Atomic::TNamedObject { fqcn, .. } if fqcn.as_ref() == "Foo\\Bar")));
}
#[test]
fn parse_docblock_param_return() {
let doc = r#"/**
* @param string $name
* @param int $age
* @return bool
*/"#;
let parsed = DocblockParser::parse(doc);
assert_eq!(parsed.params.len(), 2);
assert!(parsed.return_type.is_some());
let ret = parsed.return_type.unwrap();
assert!(ret.contains(|t| matches!(t, Atomic::TBool)));
}
#[test]
fn parse_template() {
let doc = "/** @template T of object */";
let parsed = DocblockParser::parse(doc);
assert_eq!(parsed.templates.len(), 1);
assert_eq!(parsed.templates[0].0, "T");
assert!(parsed.templates[0].1.is_some());
}
#[test]
fn parse_deprecated() {
let doc = "/** @deprecated use newMethod() instead */";
let parsed = DocblockParser::parse(doc);
assert!(parsed.is_deprecated);
}
}