Skip to main content

citum_schema_data/
citation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Citation input model for the Citum processor.
7//!
8//! This module defines the structures for representing citations as input
9//! to the processor. Citations reference entries in the bibliography and
10//! can include locators, prefixes, suffixes, and mode information.
11
12#[cfg(feature = "schema")]
13use schemars::JsonSchema;
14use serde::{Deserialize, Deserializer, Serialize, Serializer};
15#[cfg(feature = "bindings")]
16use specta::Type;
17use std::borrow::Cow;
18use std::hash::{Hash, Hasher};
19
20/// A list of citations to process.
21pub type Citations = Vec<Citation>;
22
23/// Citation mode for author-date styles.
24///
25/// Determines how the author name is rendered relative to the citation.
26#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
27#[cfg_attr(feature = "schema", derive(JsonSchema))]
28#[cfg_attr(feature = "bindings", derive(Type))]
29#[serde(rename_all = "kebab-case")]
30pub enum CitationMode {
31    /// Author inline in text: "Smith (2020) argues..."
32    /// Also known as "narrative" or "in-text" citations.
33    Integral,
34    /// Author in parentheses: "(Smith, 2020)"
35    /// The default mode for most citations.
36    #[default]
37    NonIntegral,
38}
39
40/// Explicit integral citation name-memory state for one citation item.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
42#[cfg_attr(feature = "schema", derive(JsonSchema))]
43#[cfg_attr(feature = "bindings", derive(Type))]
44#[serde(rename_all = "kebab-case")]
45pub enum IntegralNameState {
46    /// Render this item as the first integral mention in scope.
47    First,
48    /// Render this item as a subsequent integral mention in scope.
49    Subsequent,
50}
51
52/// Position of a citation in the document flow.
53///
54/// Indicates where this citation appears relative to previous citations
55/// of the same item(s). Used for note-based styles to detect ibid and
56/// subsequent citations, and for author-date styles to apply position-specific
57/// formatting rules (e.g., short forms after first citation).
58#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
59#[cfg_attr(feature = "schema", derive(JsonSchema))]
60#[cfg_attr(feature = "bindings", derive(Type))]
61#[serde(rename_all = "kebab-case")]
62pub enum Position {
63    /// First citation of an item.
64    First,
65    /// Subsequent citation of an item (non-consecutive).
66    Subsequent,
67    /// Same item cited immediately before, no locator on either.
68    Ibid,
69    /// Same item cited immediately before, with different locator.
70    IbidWithLocator,
71}
72
73/// A citation containing one or more references.
74#[derive(Debug, Clone, Default, Deserialize, Serialize)]
75#[cfg_attr(feature = "schema", derive(JsonSchema))]
76#[cfg_attr(feature = "bindings", derive(Type))]
77#[serde(rename_all = "kebab-case")]
78pub struct Citation {
79    /// The citation ID (optional, for tracking).
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub id: Option<String>,
82    /// Note number for footnote/endnote styles.
83    /// Assigned by the document processor, not the citation processor.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub note_number: Option<u32>,
86    /// Citation mode: integral (narrative) vs non-integral (parenthetical).
87    /// Only relevant for author-date styles.
88    #[serde(default, skip_serializing_if = "is_default_mode")]
89    pub mode: CitationMode,
90    /// Position of this citation in the document flow.
91    /// Detected automatically by the processor or set explicitly by the caller.
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub position: Option<Position>,
94    /// Suppress the author name across all items in this citation.
95    /// Used when the author is already named in the prose: "Smith argues (2020)".
96    /// Applies uniformly to all items — per-item suppression is not supported
97    /// because mixed-visibility citations are typographically incoherent.
98    #[serde(default, skip_serializing_if = "is_false")]
99    pub suppress_author: bool,
100    /// Prefix text before all citation items.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub prefix: Option<String>,
103    /// Suffix text after all citation items.
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub suffix: Option<String>,
106    /// The citation items (references being cited).
107    pub items: Vec<CitationItem>,
108    /// If true, the entire citation is a single dynamic compound set.
109    ///
110    /// The first item acts as the head and subsequent items are merged as tails
111    /// in the bibliography. Ignored for non-numeric (compound-numeric) styles.
112    /// Item order is preserved and sorting is suppressed when this flag is set.
113    #[serde(default, skip_serializing_if = "is_false")]
114    pub grouped: bool,
115}
116
117impl Citation {
118    /// Create a simple single-item citation.
119    ///
120    /// Convenience constructor for a citation with a single reference ID and default settings.
121    pub fn simple(id: &str) -> Self {
122        Self {
123            items: vec![CitationItem {
124                id: id.to_string(),
125                ..Default::default()
126            }],
127            ..Default::default()
128        }
129    }
130}
131
132/// Helper for skip_serializing_if on mode field.
133fn is_default_mode(mode: &CitationMode) -> bool {
134    *mode == CitationMode::NonIntegral
135}
136
137/// Helper for skip_serializing_if on bool fields that default to false.
138fn is_false(b: &bool) -> bool {
139    !b
140}
141
142/// Locator types for pinpoint citations.
143#[derive(Debug, Clone, Default)]
144#[cfg_attr(feature = "bindings", derive(Type))]
145pub enum LocatorType {
146    /// Locator refers to a book within a larger work.
147    Book,
148    /// Locator refers to a chapter.
149    Chapter,
150    /// Locator refers to a clause.
151    Clause,
152    /// Locator refers to a column.
153    Column,
154    /// Locator refers to a corollary.
155    Corollary,
156    /// Locator refers to a definition.
157    Definition,
158    /// Locator refers to a division.
159    Division,
160    /// Locator refers to a figure.
161    Figure,
162    /// Locator refers to a folio.
163    Folio,
164    /// Locator refers to a numbered line.
165    Line,
166    /// Locator refers to a lemma.
167    Lemma,
168    /// Locator refers to a note.
169    Note,
170    /// Locator refers to a numbered unit.
171    Number,
172    /// Locator refers to an opus number.
173    Opus,
174    #[default]
175    /// Locator refers to a page.
176    Page,
177    /// Locator refers to a paragraph.
178    Paragraph,
179    /// Locator refers to a sub-paragraph.
180    Subparagraph,
181    /// Locator refers to a sub-clause.
182    Subclause,
183    /// Locator refers to a sub-division.
184    Subdivision,
185    /// Locator refers to a sub-section.
186    Subsection,
187    /// Locator refers to a part or division.
188    Part,
189    /// Locator refers to a problem.
190    Problem,
191    /// Locator refers to a proposition.
192    Proposition,
193    /// Locator refers to a recital.
194    Recital,
195    /// Locator refers to a schedule.
196    Schedule,
197    /// Locator refers to a section.
198    Section,
199    /// Locator refers to a surah.
200    Surah,
201    /// Locator refers to a theorem.
202    Theorem,
203    /// Locator refers to an entry under a headword.
204    SubVerbo,
205    /// Locator refers to a supplement.
206    Supplement,
207    /// Locator refers to a verse.
208    Verse,
209    /// Locator refers to a volume.
210    Volume,
211    /// Locator refers to a periodical volume.
212    VolumePeriodical,
213    /// Locator refers to a monograph volume.
214    VolumeBook,
215    /// Locator refers to an issue.
216    Issue,
217    /// Locator refers to an algorithm.
218    Algorithm,
219    /// Locator refers to a custom pinpoint label.
220    Custom(String),
221}
222
223impl LocatorType {
224    /// Return the canonical kebab-case key for this locator label.
225    #[must_use]
226    pub fn as_key(&self) -> Cow<'_, str> {
227        match self {
228            Self::Book => Cow::Borrowed("book"),
229            Self::Chapter => Cow::Borrowed("chapter"),
230            Self::Clause => Cow::Borrowed("clause"),
231            Self::Column => Cow::Borrowed("column"),
232            Self::Corollary => Cow::Borrowed("corollary"),
233            Self::Definition => Cow::Borrowed("definition"),
234            Self::Division => Cow::Borrowed("division"),
235            Self::Figure => Cow::Borrowed("figure"),
236            Self::Folio => Cow::Borrowed("folio"),
237            Self::Line => Cow::Borrowed("line"),
238            Self::Lemma => Cow::Borrowed("lemma"),
239            Self::Note => Cow::Borrowed("note"),
240            Self::Number => Cow::Borrowed("number"),
241            Self::Opus => Cow::Borrowed("opus"),
242            Self::Page => Cow::Borrowed("page"),
243            Self::Paragraph => Cow::Borrowed("paragraph"),
244            Self::Subparagraph => Cow::Borrowed("subparagraph"),
245            Self::Subclause => Cow::Borrowed("subclause"),
246            Self::Subdivision => Cow::Borrowed("subdivision"),
247            Self::Subsection => Cow::Borrowed("subsection"),
248            Self::Part => Cow::Borrowed("part"),
249            Self::Problem => Cow::Borrowed("problem"),
250            Self::Proposition => Cow::Borrowed("proposition"),
251            Self::Recital => Cow::Borrowed("recital"),
252            Self::Schedule => Cow::Borrowed("schedule"),
253            Self::Section => Cow::Borrowed("section"),
254            Self::Surah => Cow::Borrowed("surah"),
255            Self::Theorem => Cow::Borrowed("theorem"),
256            Self::SubVerbo => Cow::Borrowed("sub-verbo"),
257            Self::Supplement => Cow::Borrowed("supplement"),
258            Self::Verse => Cow::Borrowed("verse"),
259            Self::Volume => Cow::Borrowed("volume"),
260            Self::VolumePeriodical => Cow::Borrowed("volume-periodical"),
261            Self::VolumeBook => Cow::Borrowed("volume-book"),
262            Self::Issue => Cow::Borrowed("issue"),
263            Self::Algorithm => Cow::Borrowed("algorithm"),
264            Self::Custom(value) => normalize_kind_key(value)
265                .map(Cow::Owned)
266                .unwrap_or_else(|| Cow::Borrowed(value.as_str())),
267        }
268    }
269
270    /// Parse a locator label from a known keyword or custom identifier.
271    ///
272    /// # Errors
273    ///
274    /// Returns an error when the input is empty or normalizes to an empty key.
275    pub fn from_key(value: &str) -> Result<Self, String> {
276        let canonical = normalize_kind_key(value)
277            .ok_or_else(|| "locator label must not be empty".to_string())?;
278        Ok(match canonical.as_str() {
279            "algorithm" => Self::Algorithm,
280            "book" => Self::Book,
281            "chapter" => Self::Chapter,
282            "clause" => Self::Clause,
283            "column" => Self::Column,
284            "corollary" => Self::Corollary,
285            "definition" => Self::Definition,
286            "division" => Self::Division,
287            "figure" => Self::Figure,
288            "folio" => Self::Folio,
289            "line" => Self::Line,
290            "lemma" => Self::Lemma,
291            "note" => Self::Note,
292            "number" => Self::Number,
293            "opus" => Self::Opus,
294            "page" => Self::Page,
295            "paragraph" => Self::Paragraph,
296            "part" => Self::Part,
297            "problem" => Self::Problem,
298            "proposition" => Self::Proposition,
299            "recital" => Self::Recital,
300            "schedule" => Self::Schedule,
301            "section" => Self::Section,
302            "subclause" => Self::Subclause,
303            "subdivision" => Self::Subdivision,
304            "subparagraph" => Self::Subparagraph,
305            "subsection" => Self::Subsection,
306            "sub-verbo" => Self::SubVerbo,
307            "supplement" => Self::Supplement,
308            "surah" => Self::Surah,
309            "theorem" => Self::Theorem,
310            "verse" => Self::Verse,
311            "volume" => Self::Volume,
312            "volume-book" => Self::VolumeBook,
313            "volume-periodical" => Self::VolumePeriodical,
314            "issue" => Self::Issue,
315            _ => Self::Custom(canonical),
316        })
317    }
318}
319
320impl PartialEq for LocatorType {
321    fn eq(&self, other: &Self) -> bool {
322        self.as_key().as_ref() == other.as_key().as_ref()
323    }
324}
325
326impl Eq for LocatorType {}
327
328impl Hash for LocatorType {
329    fn hash<H: Hasher>(&self, state: &mut H) {
330        self.as_key().as_ref().hash(state);
331    }
332}
333
334impl Serialize for LocatorType {
335    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
336    where
337        S: Serializer,
338    {
339        serializer.serialize_str(self.as_key().as_ref())
340    }
341}
342
343impl<'de> Deserialize<'de> for LocatorType {
344    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
345    where
346        D: Deserializer<'de>,
347    {
348        let value = String::deserialize(deserializer)?;
349        Self::from_key(&value).map_err(serde::de::Error::custom)
350    }
351}
352
353#[cfg(feature = "schema")]
354impl JsonSchema for LocatorType {
355    fn schema_name() -> std::borrow::Cow<'static, str> {
356        "LocatorType".into()
357    }
358
359    fn json_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
360        schemars::json_schema!({
361            "type": "string",
362            "description": "Known locator label keyword or custom kebab-case identifier."
363        })
364    }
365}
366
367/// A locator value that supports both plain strings and explicit plurality.
368///
369/// Plain strings use heuristic plural detection (checking for `-`, `–`, `,`, `&`).
370/// Use the explicit form to override when the heuristic fails (e.g., "figure A-3"
371/// should be singular despite containing a hyphen).
372#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
373#[cfg_attr(feature = "schema", derive(JsonSchema))]
374#[cfg_attr(feature = "bindings", derive(Type))]
375#[serde(untagged)]
376pub enum LocatorValue {
377    /// Plain string value with heuristic plural detection.
378    Text(String),
379    /// Explicit value with manual plural override.
380    Explicit {
381        /// The locator value string.
382        value: String,
383        /// Whether this locator is plural.
384        plural: bool,
385    },
386}
387
388impl LocatorValue {
389    /// Returns the raw value string.
390    pub fn value_str(&self) -> &str {
391        match self {
392            LocatorValue::Text(s) => s,
393            LocatorValue::Explicit { value, .. } => value,
394        }
395    }
396
397    /// Returns whether this locator value is plural.
398    ///
399    /// For `Text`, uses the heuristic (contains `-`, `–`, `,`, or `&`).
400    /// For `Explicit`, returns the specified `plural` field.
401    pub fn is_plural(&self) -> bool {
402        match self {
403            LocatorValue::Text(s) => {
404                s.contains('\u{2013}') || s.contains('-') || s.contains(',') || s.contains('&')
405            }
406            LocatorValue::Explicit { plural, .. } => *plural,
407        }
408    }
409}
410
411impl Default for LocatorValue {
412    fn default() -> Self {
413        LocatorValue::Text(String::new())
414    }
415}
416
417impl From<String> for LocatorValue {
418    fn from(s: String) -> Self {
419        LocatorValue::Text(s)
420    }
421}
422
423impl From<&str> for LocatorValue {
424    fn from(s: &str) -> Self {
425        LocatorValue::Text(s.to_string())
426    }
427}
428
429/// A single segment of a compound locator.
430///
431/// Pairs a locator type with its value, e.g. `{ label: chapter, value: "3" }`.
432#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
433#[cfg_attr(feature = "schema", derive(JsonSchema))]
434#[cfg_attr(feature = "bindings", derive(Type))]
435#[serde(rename_all = "kebab-case")]
436pub struct LocatorSegment {
437    /// The locator type for this segment.
438    pub label: LocatorType,
439    /// The locator value (e.g., "3", "42-45").
440    pub value: LocatorValue,
441}
442
443impl LocatorSegment {
444    /// Create a locator segment from a canonical label and value.
445    pub fn new(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
446        Self {
447            label,
448            value: value.into(),
449        }
450    }
451}
452
453/// A canonical citation locator.
454///
455/// Simple locators use the single-segment form, while compound locators use
456/// the explicit `segments` wrapper.
457#[derive(Debug, Clone, Serialize, PartialEq)]
458#[cfg_attr(feature = "bindings", derive(Type))]
459#[serde(untagged)]
460pub enum CitationLocator {
461    /// A single labeled locator.
462    Single(LocatorSegment),
463    /// Multiple ordered locator segments.
464    Compound {
465        /// Ordered locator segments.
466        segments: Vec<LocatorSegment>,
467    },
468}
469
470#[derive(Debug, Clone, Deserialize)]
471#[serde(untagged)]
472enum CitationLocatorRepr {
473    Single(LocatorSegment),
474    Compound { segments: Vec<LocatorSegment> },
475}
476
477impl<'de> Deserialize<'de> for CitationLocator {
478    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
479    where
480        D: serde::Deserializer<'de>,
481    {
482        use serde::de::Error;
483
484        match CitationLocatorRepr::deserialize(deserializer)? {
485            CitationLocatorRepr::Single(segment) => Ok(Self::Single(segment)),
486            CitationLocatorRepr::Compound { segments } => {
487                Self::compound(segments).map_err(D::Error::custom)
488            }
489        }
490    }
491}
492
493impl CitationLocator {
494    /// Create a single-segment locator.
495    pub fn single(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
496        Self::Single(LocatorSegment::new(label, value))
497    }
498
499    /// Create a compound locator with two or more segments.
500    ///
501    /// # Errors
502    ///
503    /// Returns an error when fewer than two locator segments are supplied.
504    pub fn compound(segments: Vec<LocatorSegment>) -> Result<Self, &'static str> {
505        if segments.len() < 2 {
506            return Err("compound locators must contain at least two segments");
507        }
508        Ok(Self::Compound { segments })
509    }
510
511    /// Returns the ordered locator segments as a slice.
512    pub fn segments(&self) -> &[LocatorSegment] {
513        match self {
514            Self::Single(segment) => std::slice::from_ref(segment),
515            Self::Compound { segments } => segments.as_slice(),
516        }
517    }
518
519    /// Returns true if this locator contains multiple segments.
520    pub fn is_compound(&self) -> bool {
521        matches!(self, Self::Compound { .. })
522    }
523
524    /// Returns a stable string form used for locator comparison.
525    pub fn canonical_string(&self) -> String {
526        self.segments()
527            .iter()
528            .map(|segment| format!("{}:{}", segment.label.as_key(), segment.value.value_str()))
529            .collect::<Vec<_>>()
530            .join(",")
531    }
532}
533
534#[cfg(feature = "schema")]
535impl JsonSchema for CitationLocator {
536    fn schema_name() -> std::borrow::Cow<'static, str> {
537        "CitationLocator".into()
538    }
539
540    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
541        let single_schema = generator.subschema_for::<LocatorSegment>();
542        let compound_schema = schemars::json_schema!({
543            "type": "object",
544            "properties": {
545                "segments": generator.subschema_for::<Vec<LocatorSegment>>()
546            },
547            "required": ["segments"]
548        });
549        schemars::json_schema!({
550            "oneOf": [single_schema, compound_schema]
551        })
552    }
553}
554
555fn normalize_kind_key(value: &str) -> Option<String> {
556    let mut normalized = String::new();
557    let mut pending_dash = false;
558
559    for ch in value.trim().chars() {
560        if ch.is_ascii_alphanumeric() {
561            if pending_dash && !normalized.is_empty() {
562                normalized.push('-');
563            }
564            normalized.push(ch.to_ascii_lowercase());
565            pending_dash = false;
566        } else if !normalized.is_empty() {
567            pending_dash = true;
568        }
569    }
570
571    if normalized.is_empty() {
572        None
573    } else {
574        Some(normalized)
575    }
576}
577
578/// A single citation item referencing a bibliography entry.
579#[derive(Debug, Clone, Default, Deserialize, Serialize)]
580#[cfg_attr(feature = "schema", derive(JsonSchema))]
581#[cfg_attr(feature = "bindings", derive(Type))]
582#[serde(rename_all = "kebab-case")]
583pub struct CitationItem {
584    /// The reference ID (citekey).
585    pub id: String,
586    /// Canonical locator value for pinpoint citations.
587    #[serde(skip_serializing_if = "Option::is_none")]
588    pub locator: Option<CitationLocator>,
589    /// Prefix text before this item
590    #[serde(skip_serializing_if = "Option::is_none")]
591    pub prefix: Option<String>,
592    /// Suffix text after this item
593    #[serde(skip_serializing_if = "Option::is_none")]
594    pub suffix: Option<String>,
595    /// Explicit integral name-memory state override for this item.
596    #[serde(skip_serializing_if = "Option::is_none")]
597    pub integral_name_state: Option<IntegralNameState>,
598    /// Explicit org-abbreviation state override for this item.
599    #[serde(skip_serializing_if = "Option::is_none")]
600    pub org_abbreviation_state: Option<IntegralNameState>,
601}
602
603impl CitationItem {
604    /// Returns the canonical locator segments when present.
605    pub fn locator_segments(&self) -> Option<&[LocatorSegment]> {
606        self.locator.as_ref().map(CitationLocator::segments)
607    }
608}
609
610/// Normalize a textual locator string into the canonical locator model.
611///
612/// # Panics
613///
614/// This function does not panic under normal use; the internal `unwrap` is
615/// guarded by the preceding segment-count match.
616pub fn normalize_locator_text(
617    locator: &str,
618    aliases: &[(String, LocatorType)],
619) -> Option<CitationLocator> {
620    let locator = locator.trim();
621    if locator.is_empty() {
622        return None;
623    }
624
625    let raw_segments = split_locator_segments(locator, aliases);
626    let segments: Vec<LocatorSegment> = raw_segments
627        .into_iter()
628        .filter_map(|segment| parse_locator_segment(segment, aliases))
629        .collect();
630
631    match segments.len() {
632        0 => None,
633        1 => {
634            let mut it = segments.into_iter();
635            Some(CitationLocator::Single(it.next()?))
636        }
637        _ => CitationLocator::compound(segments).ok(),
638    }
639}
640
641fn split_locator_segments<'a>(locator: &'a str, aliases: &[(String, LocatorType)]) -> Vec<&'a str> {
642    let mut parts = Vec::new();
643    let mut start = 0;
644
645    for (idx, ch) in locator.char_indices() {
646        if ch != ',' {
647            continue;
648        }
649
650        #[allow(
651            clippy::string_slice,
652            reason = "idx is a valid char boundary from char_indices()"
653        )]
654        let candidate = locator[idx + ch.len_utf8()..].trim_start();
655        if begins_with_locator_label(candidate, aliases) {
656            #[allow(
657                clippy::string_slice,
658                reason = "start and idx are valid char boundaries"
659            )]
660            parts.push(locator[start..idx].trim());
661            start = idx + ch.len_utf8();
662        }
663    }
664
665    #[allow(clippy::string_slice, reason = "start is a valid char boundary")]
666    parts.push(locator[start..].trim());
667    parts
668}
669
670fn parse_locator_segment(
671    segment: &str,
672    aliases: &[(String, LocatorType)],
673) -> Option<LocatorSegment> {
674    let segment = segment.trim();
675    if segment.is_empty() {
676        return None;
677    }
678
679    if let Some((label, rest)) = strip_locator_label(segment, aliases) {
680        let value = rest.trim_start_matches(':').trim();
681        if value.is_empty() {
682            return None;
683        }
684        return Some(LocatorSegment::new(label, value));
685    }
686
687    Some(LocatorSegment::new(LocatorType::Page, segment))
688}
689
690fn begins_with_locator_label(segment: &str, aliases: &[(String, LocatorType)]) -> bool {
691    strip_locator_label(segment, aliases).is_some()
692}
693
694fn strip_locator_label<'a>(
695    segment: &'a str,
696    aliases: &[(String, LocatorType)],
697) -> Option<(LocatorType, &'a str)> {
698    let lower = segment.to_lowercase();
699    let mut best: Option<(LocatorType, usize)> = None;
700
701    for (alias, label) in aliases {
702        if let Some(remainder) = lower.strip_prefix(alias)
703            && alias_boundary(remainder)
704        {
705            let alias_len = alias.len();
706            if best
707                .as_ref()
708                .is_none_or(|(_, best_len)| alias_len > *best_len)
709            {
710                best = Some((label.clone(), alias_len));
711            }
712        }
713    }
714
715    best.map(|(label, alias_len)| {
716        #[allow(clippy::string_slice, reason = "alias_len is the length of a prefix")]
717        (label, segment[alias_len..].trim_start())
718    })
719}
720
721fn alias_boundary(remainder: &str) -> bool {
722    remainder.is_empty()
723        || remainder.starts_with(':')
724        || remainder.starts_with('.')
725        || remainder.starts_with(char::is_whitespace)
726}
727
728#[cfg(test)]
729#[allow(
730    clippy::unwrap_used,
731    clippy::expect_used,
732    clippy::panic,
733    clippy::indexing_slicing,
734    clippy::todo,
735    clippy::unimplemented,
736    clippy::unreachable,
737    clippy::get_unwrap,
738    reason = "Panicking is acceptable and often desired in tests."
739)]
740mod tests {
741    use super::*;
742
743    #[test]
744    fn test_citation_deserialization() {
745        let json = r#"
746        {
747            "items": [
748                {
749                    "id": "kuhn1962"
750                }
751            ],
752            "mode": "integral"
753        }
754        "#;
755        let citation: Citation = serde_json::from_str(json).unwrap();
756        assert_eq!(citation.items.len(), 1);
757        assert_eq!(citation.items[0].id, "kuhn1962");
758        assert_eq!(citation.mode, CitationMode::Integral);
759    }
760
761    #[test]
762    fn test_citation_simple_constructor_defaults() {
763        let citation = Citation::simple("kuhn1962");
764
765        assert_eq!(citation.items.len(), 1);
766        assert_eq!(citation.items[0].id, "kuhn1962");
767        assert_eq!(citation.mode, CitationMode::NonIntegral);
768        assert_eq!(citation.position, None);
769        assert!(!citation.suppress_author);
770        assert_eq!(citation.note_number, None);
771        assert_eq!(citation.prefix, None);
772        assert_eq!(citation.suffix, None);
773    }
774
775    #[test]
776    fn test_citation_default_fields_are_omitted_in_serialization() {
777        let citation = Citation::simple("kuhn1962");
778        let json = serde_json::to_value(&citation).unwrap();
779        let object = json.as_object().unwrap();
780
781        assert!(!object.contains_key("mode"));
782        assert!(!object.contains_key("suppress-author"));
783
784        let explicit = Citation {
785            mode: CitationMode::Integral,
786            suppress_author: true,
787            ..citation
788        };
789        let explicit_json = serde_json::to_value(&explicit).unwrap();
790        let explicit_object = explicit_json.as_object().unwrap();
791
792        assert_eq!(explicit_object.get("mode").unwrap(), "integral");
793        assert_eq!(explicit_object.get("suppress-author").unwrap(), true);
794    }
795
796    #[test]
797    fn test_citation_item_with_locator() {
798        let json = r#"
799        {
800            "id": "kuhn1962",
801            "locator": {
802                "label": "page",
803                "value": "42-45"
804            }
805        }
806        "#;
807        let item: CitationItem = serde_json::from_str(json).unwrap();
808        assert_eq!(item.id, "kuhn1962");
809        assert_eq!(
810            item.locator,
811            Some(CitationLocator::single(LocatorType::Page, "42-45"))
812        );
813    }
814
815    #[test]
816    fn test_compound_locator_serde_roundtrip() {
817        let json = r#"
818        {
819            "id": "smith2020",
820            "locator": {
821                "segments": [
822                    { "label": "chapter", "value": "3" },
823                    { "label": "section", "value": "42" }
824                ]
825            }
826        }
827        "#;
828        let item: CitationItem = serde_json::from_str(json).unwrap();
829        let segs = item.locator.as_ref().unwrap().segments();
830        assert_eq!(segs.len(), 2);
831        assert_eq!(segs[0].label, LocatorType::Chapter);
832        assert_eq!(segs[0].value.value_str(), "3");
833        assert_eq!(segs[1].label, LocatorType::Section);
834        assert_eq!(segs[1].value.value_str(), "42");
835
836        // Round-trip
837        let serialized = serde_json::to_string(&item).unwrap();
838        let deserialized: CitationItem = serde_json::from_str(&serialized).unwrap();
839        assert_eq!(deserialized.locator, item.locator);
840    }
841
842    #[test]
843    fn test_compound_locator_rejects_single_segment() {
844        let err = CitationLocator::compound(vec![LocatorSegment::new(LocatorType::Page, "42")])
845            .expect_err("single-segment compound locator must be rejected");
846        assert!(err.contains("at least two"));
847    }
848
849    #[test]
850    fn test_citation_locator_canonical_string_is_stable() {
851        let locator = CitationLocator::compound(vec![
852            LocatorSegment::new(LocatorType::Page, "23"),
853            LocatorSegment::new(LocatorType::Line, "13"),
854        ])
855        .unwrap();
856
857        assert_eq!(locator.canonical_string(), "page:23,line:13");
858    }
859
860    #[test]
861    fn test_custom_locator_type_round_trips_as_plain_string() {
862        let json = r#"
863        {
864            "id": "score2024",
865            "locator": {
866                "label": "Movement",
867                "value": "II"
868            }
869        }
870        "#;
871
872        let item: CitationItem = serde_json::from_str(json).expect("custom locator should parse");
873        let locator = item.locator.expect("custom locator should exist");
874        let segment = &locator.segments()[0];
875
876        assert_eq!(segment.label, LocatorType::Custom("movement".to_string()));
877        let serialized = serde_json::to_value(&CitationItem {
878            id: "score2024".to_string(),
879            locator: Some(locator),
880            ..Default::default()
881        })
882        .expect("custom locator should serialize");
883
884        assert_eq!(serialized["locator"]["label"], "movement");
885    }
886
887    #[test]
888    fn test_custom_locator_type_normalizes_manual_construction() {
889        let locator = LocatorType::Custom("Reel Label".to_string());
890
891        assert_eq!(locator.as_key(), "reel-label");
892        assert_eq!(
893            locator,
894            LocatorType::from_key("reel-label").expect("known custom key should parse")
895        );
896        assert_eq!(
897            serde_json::to_string(&locator).expect("custom locator should serialize"),
898            "\"reel-label\""
899        );
900    }
901
902    #[test]
903    fn test_locator_segments_single() {
904        let item = CitationItem {
905            id: "test".to_string(),
906            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
907            ..Default::default()
908        };
909        let segments = item.locator_segments().unwrap();
910        assert_eq!(segments.len(), 1);
911        assert_eq!(segments[0].label, LocatorType::Page);
912    }
913
914    #[test]
915    fn test_locator_segments_none() {
916        let item = CitationItem {
917            id: "test".to_string(),
918            ..Default::default()
919        };
920        assert!(item.locator_segments().is_none());
921    }
922
923    #[test]
924    fn test_single_locator_serializes_without_segments_wrapper() {
925        let item = CitationItem {
926            id: "test".to_string(),
927            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
928            ..Default::default()
929        };
930        let json = serde_json::to_value(&item).unwrap();
931        let locator = json
932            .as_object()
933            .unwrap()
934            .get("locator")
935            .and_then(serde_json::Value::as_object)
936            .unwrap();
937        assert!(locator.contains_key("label"));
938        assert!(!locator.contains_key("segments"));
939    }
940
941    #[test]
942    fn test_compound_locator_deserialization() {
943        let json = r#"
944        {
945            "id": "smith2020",
946            "locator": {
947                "segments": [
948                    { "label": "page", "value": "23" },
949                    { "label": "line", "value": "13" }
950                ]
951            }
952        }
953        "#;
954        let item: CitationItem = serde_json::from_str(json).unwrap();
955        let segs = item.locator.as_ref().unwrap().segments();
956        assert_eq!(segs.len(), 2);
957        assert_eq!(segs[0].label, LocatorType::Page);
958        assert_eq!(segs[0].value.value_str(), "23");
959        assert_eq!(segs[1].label, LocatorType::Line);
960        assert_eq!(segs[1].value.value_str(), "13");
961    }
962
963    #[test]
964    fn test_locator_value_explicit_plural_override() {
965        let json = r#"
966        {
967            "id": "test",
968            "locator": {
969                "label": "figure",
970                "value": {
971                    "value": "A-3",
972                    "plural": false
973                }
974            }
975        }
976        "#;
977        let item: CitationItem = serde_json::from_str(json).unwrap();
978        let segs = item.locator.as_ref().unwrap().segments();
979        assert_eq!(segs[0].value.value_str(), "A-3");
980        assert!(!segs[0].value.is_plural());
981    }
982
983    #[test]
984    fn test_locator_value_heuristic_plural() {
985        let lv_range = LocatorValue::from("42-45");
986        assert!(lv_range.is_plural());
987
988        let lv_single = LocatorValue::from("42");
989        assert!(!lv_single.is_plural());
990
991        let lv_en_dash = LocatorValue::from("42–45");
992        assert!(lv_en_dash.is_plural());
993
994        let lv_comma = LocatorValue::from("1, 3, 5");
995        assert!(lv_comma.is_plural());
996
997        let lv_ampersand = LocatorValue::from("A & B");
998        assert!(lv_ampersand.is_plural());
999    }
1000
1001    #[test]
1002    fn test_normalize_locator_text_with_explicit_aliases() {
1003        let aliases = vec![
1004            ("page".to_string(), LocatorType::Page),
1005            ("p.".to_string(), LocatorType::Page),
1006            ("chapter".to_string(), LocatorType::Chapter),
1007            ("ch.".to_string(), LocatorType::Chapter),
1008            ("section".to_string(), LocatorType::Section),
1009            ("§".to_string(), LocatorType::Section),
1010        ];
1011
1012        // Bare number defaults to Page
1013        assert_eq!(
1014            normalize_locator_text("45", &aliases),
1015            Some(CitationLocator::single(LocatorType::Page, "45"))
1016        );
1017
1018        // Explicit label
1019        assert_eq!(
1020            normalize_locator_text("chapter 2", &aliases),
1021            Some(CitationLocator::single(LocatorType::Chapter, "2"))
1022        );
1023
1024        // Abbreviated label
1025        assert_eq!(
1026            normalize_locator_text("ch. 3", &aliases),
1027            Some(CitationLocator::single(LocatorType::Chapter, "3"))
1028        );
1029
1030        // Symbol label
1031        assert_eq!(
1032            normalize_locator_text("§ 4", &aliases),
1033            Some(CitationLocator::single(LocatorType::Section, "4"))
1034        );
1035
1036        // Compound locator
1037        let compound = normalize_locator_text("chapter 2, page 10", &aliases).unwrap();
1038        assert!(compound.is_compound());
1039        let segs = compound.segments();
1040        assert_eq!(segs[0].label, LocatorType::Chapter);
1041        assert_eq!(segs[1].label, LocatorType::Page);
1042
1043        // Empty or invalid input
1044        assert_eq!(normalize_locator_text("", &aliases), None);
1045        assert_eq!(normalize_locator_text("   ", &aliases), None);
1046        assert_eq!(normalize_locator_text("chapter:", &aliases), None);
1047    }
1048
1049    #[test]
1050    fn test_normalize_locator_text_with_abbreviated_aliases() {
1051        let aliases = vec![
1052            ("page".to_string(), LocatorType::Page),
1053            ("pp.".to_string(), LocatorType::Page),
1054            ("vol.".to_string(), LocatorType::Volume),
1055        ];
1056
1057        assert_eq!(
1058            normalize_locator_text("page 45", &aliases),
1059            Some(CitationLocator::single(LocatorType::Page, "45"))
1060        );
1061        assert_eq!(
1062            normalize_locator_text("pp. 10-12", &aliases),
1063            Some(CitationLocator::single(LocatorType::Page, "10-12"))
1064        );
1065        assert_eq!(
1066            normalize_locator_text("vol. 1", &aliases),
1067            Some(CitationLocator::single(LocatorType::Volume, "1"))
1068        );
1069    }
1070}