Skip to main content

citum_schema_data/
citation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Citation input model for the Citum processor.
7//!
8//! This module defines the structures for representing citations as input
9//! to the processor. Citations reference entries in the bibliography and
10//! can include locators, prefixes, suffixes, and mode information.
11
12#[cfg(feature = "schema")]
13use schemars::JsonSchema;
14use serde::{Deserialize, Deserializer, Serialize, Serializer};
15#[cfg(feature = "bindings")]
16use specta::Type;
17use std::borrow::Cow;
18use std::hash::{Hash, Hasher};
19
20/// A list of citations to process.
21pub type Citations = Vec<Citation>;
22
23/// Citation mode for author-date styles.
24///
25/// Determines how the author name is rendered relative to the citation.
26#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
27#[cfg_attr(feature = "schema", derive(JsonSchema))]
28#[cfg_attr(feature = "bindings", derive(Type))]
29#[serde(rename_all = "kebab-case")]
30pub enum CitationMode {
31    /// Author inline in text: "Smith (2020) argues..."
32    /// Also known as "narrative" or "in-text" citations.
33    Integral,
34    /// Author in parentheses: "(Smith, 2020)"
35    /// The default mode for most citations.
36    #[default]
37    NonIntegral,
38}
39
40/// Explicit integral citation name-memory state for one citation item.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
42#[cfg_attr(feature = "schema", derive(JsonSchema))]
43#[cfg_attr(feature = "bindings", derive(Type))]
44#[serde(rename_all = "kebab-case")]
45pub enum IntegralNameState {
46    /// Render this item as the first integral mention in scope.
47    First,
48    /// Render this item as a subsequent integral mention in scope.
49    Subsequent,
50}
51
52/// Position of a citation in the document flow.
53///
54/// Indicates where this citation appears relative to previous citations
55/// of the same item(s). Used for note-based styles to detect ibid and
56/// subsequent citations, and for author-date styles to apply position-specific
57/// formatting rules (e.g., short forms after first citation).
58#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
59#[cfg_attr(feature = "schema", derive(JsonSchema))]
60#[cfg_attr(feature = "bindings", derive(Type))]
61#[serde(rename_all = "kebab-case")]
62pub enum Position {
63    /// First citation of an item.
64    First,
65    /// Subsequent citation of an item (non-consecutive).
66    Subsequent,
67    /// Same item cited immediately before, no locator on either.
68    Ibid,
69    /// Same item cited immediately before, with different locator.
70    IbidWithLocator,
71}
72
73/// A citation containing one or more references.
74#[derive(Debug, Clone, Default, Deserialize, Serialize)]
75#[cfg_attr(feature = "schema", derive(JsonSchema))]
76#[cfg_attr(feature = "bindings", derive(Type))]
77#[serde(rename_all = "kebab-case")]
78pub struct Citation {
79    /// The citation ID (optional, for tracking).
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub id: Option<String>,
82    /// Note number for footnote/endnote styles.
83    /// Assigned by the document processor, not the citation processor.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub note_number: Option<u32>,
86    /// Citation mode: integral (narrative) vs non-integral (parenthetical).
87    /// Only relevant for author-date styles.
88    #[serde(default, skip_serializing_if = "is_default_mode")]
89    pub mode: CitationMode,
90    /// Position of this citation in the document flow.
91    /// Detected automatically by the processor or set explicitly by the caller.
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub position: Option<Position>,
94    /// Suppress the author name across all items in this citation.
95    /// Used when the author is already named in the prose: "Smith argues (2020)".
96    /// Applies uniformly to all items — per-item suppression is not supported
97    /// because mixed-visibility citations are typographically incoherent.
98    #[serde(default, skip_serializing_if = "is_false")]
99    pub suppress_author: bool,
100    /// Prefix text before all citation items.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub prefix: Option<String>,
103    /// Suffix text after all citation items.
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub suffix: Option<String>,
106    /// The citation items (references being cited).
107    pub items: Vec<CitationItem>,
108    /// If true, the entire citation is a single dynamic compound set.
109    ///
110    /// The first item acts as the head and subsequent items are merged as tails
111    /// in the bibliography. Ignored for non-numeric (compound-numeric) styles.
112    /// Item order is preserved and sorting is suppressed when this flag is set.
113    #[serde(default, skip_serializing_if = "is_false")]
114    pub grouped: bool,
115}
116
117impl Citation {
118    /// Create a simple single-item citation.
119    ///
120    /// Convenience constructor for a citation with a single reference ID and default settings.
121    pub fn simple(id: &str) -> Self {
122        Self {
123            items: vec![CitationItem {
124                id: id.to_string(),
125                ..Default::default()
126            }],
127            ..Default::default()
128        }
129    }
130}
131
132/// Helper for skip_serializing_if on mode field.
133fn is_default_mode(mode: &CitationMode) -> bool {
134    *mode == CitationMode::NonIntegral
135}
136
137/// Helper for skip_serializing_if on bool fields that default to false.
138fn is_false(b: &bool) -> bool {
139    !b
140}
141
142/// Locator types for pinpoint citations.
143#[derive(Debug, Clone, Default)]
144#[cfg_attr(feature = "bindings", derive(Type))]
145pub enum LocatorType {
146    /// Locator refers to a book within a larger work.
147    Book,
148    /// Locator refers to a chapter.
149    Chapter,
150    /// Locator refers to a clause.
151    Clause,
152    /// Locator refers to a column.
153    Column,
154    /// Locator refers to a corollary.
155    Corollary,
156    /// Locator refers to a definition.
157    Definition,
158    /// Locator refers to a division.
159    Division,
160    /// Locator refers to a figure.
161    Figure,
162    /// Locator refers to a folio.
163    Folio,
164    /// Locator refers to a numbered line.
165    Line,
166    /// Locator refers to a lemma.
167    Lemma,
168    /// Locator refers to a note.
169    Note,
170    /// Locator refers to a numbered unit.
171    Number,
172    /// Locator refers to an opus number.
173    Opus,
174    #[default]
175    /// Locator refers to a page.
176    Page,
177    /// Locator refers to a paragraph.
178    Paragraph,
179    /// Locator refers to a sub-paragraph.
180    Subparagraph,
181    /// Locator refers to a sub-clause.
182    Subclause,
183    /// Locator refers to a sub-division.
184    Subdivision,
185    /// Locator refers to a sub-section.
186    Subsection,
187    /// Locator refers to a part or division.
188    Part,
189    /// Locator refers to a problem.
190    Problem,
191    /// Locator refers to a proposition.
192    Proposition,
193    /// Locator refers to a recital.
194    Recital,
195    /// Locator refers to a schedule.
196    Schedule,
197    /// Locator refers to a section.
198    Section,
199    /// Locator refers to a surah.
200    Surah,
201    /// Locator refers to a theorem.
202    Theorem,
203    /// Locator refers to an entry under a headword.
204    SubVerbo,
205    /// Locator refers to a supplement.
206    Supplement,
207    /// Locator refers to a verse.
208    Verse,
209    /// Locator refers to a volume.
210    Volume,
211    /// Locator refers to a periodical volume.
212    VolumePeriodical,
213    /// Locator refers to a monograph volume.
214    VolumeBook,
215    /// Locator refers to an issue.
216    Issue,
217    /// Locator refers to an algorithm.
218    Algorithm,
219    /// Locator refers to a custom pinpoint label.
220    Custom(String),
221}
222
223impl LocatorType {
224    /// Return the canonical kebab-case key for this locator label.
225    #[must_use]
226    pub fn as_key(&self) -> Cow<'_, str> {
227        match self {
228            Self::Book => Cow::Borrowed("book"),
229            Self::Chapter => Cow::Borrowed("chapter"),
230            Self::Clause => Cow::Borrowed("clause"),
231            Self::Column => Cow::Borrowed("column"),
232            Self::Corollary => Cow::Borrowed("corollary"),
233            Self::Definition => Cow::Borrowed("definition"),
234            Self::Division => Cow::Borrowed("division"),
235            Self::Figure => Cow::Borrowed("figure"),
236            Self::Folio => Cow::Borrowed("folio"),
237            Self::Line => Cow::Borrowed("line"),
238            Self::Lemma => Cow::Borrowed("lemma"),
239            Self::Note => Cow::Borrowed("note"),
240            Self::Number => Cow::Borrowed("number"),
241            Self::Opus => Cow::Borrowed("opus"),
242            Self::Page => Cow::Borrowed("page"),
243            Self::Paragraph => Cow::Borrowed("paragraph"),
244            Self::Subparagraph => Cow::Borrowed("subparagraph"),
245            Self::Subclause => Cow::Borrowed("subclause"),
246            Self::Subdivision => Cow::Borrowed("subdivision"),
247            Self::Subsection => Cow::Borrowed("subsection"),
248            Self::Part => Cow::Borrowed("part"),
249            Self::Problem => Cow::Borrowed("problem"),
250            Self::Proposition => Cow::Borrowed("proposition"),
251            Self::Recital => Cow::Borrowed("recital"),
252            Self::Schedule => Cow::Borrowed("schedule"),
253            Self::Section => Cow::Borrowed("section"),
254            Self::Surah => Cow::Borrowed("surah"),
255            Self::Theorem => Cow::Borrowed("theorem"),
256            Self::SubVerbo => Cow::Borrowed("sub-verbo"),
257            Self::Supplement => Cow::Borrowed("supplement"),
258            Self::Verse => Cow::Borrowed("verse"),
259            Self::Volume => Cow::Borrowed("volume"),
260            Self::VolumePeriodical => Cow::Borrowed("volume-periodical"),
261            Self::VolumeBook => Cow::Borrowed("volume-book"),
262            Self::Issue => Cow::Borrowed("issue"),
263            Self::Algorithm => Cow::Borrowed("algorithm"),
264            Self::Custom(value) => normalize_kind_key(value)
265                .map(Cow::Owned)
266                .unwrap_or_else(|| Cow::Borrowed(value.as_str())),
267        }
268    }
269
270    /// Parse a locator label from a known keyword or custom identifier.
271    ///
272    /// # Errors
273    ///
274    /// Returns an error when the input is empty or normalizes to an empty key.
275    pub fn from_key(value: &str) -> Result<Self, String> {
276        let canonical = normalize_kind_key(value)
277            .ok_or_else(|| "locator label must not be empty".to_string())?;
278        Ok(match canonical.as_str() {
279            "algorithm" => Self::Algorithm,
280            "book" => Self::Book,
281            "chapter" => Self::Chapter,
282            "clause" => Self::Clause,
283            "column" => Self::Column,
284            "corollary" => Self::Corollary,
285            "definition" => Self::Definition,
286            "division" => Self::Division,
287            "figure" => Self::Figure,
288            "folio" => Self::Folio,
289            "line" => Self::Line,
290            "lemma" => Self::Lemma,
291            "note" => Self::Note,
292            "number" => Self::Number,
293            "opus" => Self::Opus,
294            "page" => Self::Page,
295            "paragraph" => Self::Paragraph,
296            "part" => Self::Part,
297            "problem" => Self::Problem,
298            "proposition" => Self::Proposition,
299            "recital" => Self::Recital,
300            "schedule" => Self::Schedule,
301            "section" => Self::Section,
302            "subclause" => Self::Subclause,
303            "subdivision" => Self::Subdivision,
304            "subparagraph" => Self::Subparagraph,
305            "subsection" => Self::Subsection,
306            "sub-verbo" => Self::SubVerbo,
307            "supplement" => Self::Supplement,
308            "surah" => Self::Surah,
309            "theorem" => Self::Theorem,
310            "verse" => Self::Verse,
311            "volume" => Self::Volume,
312            "volume-book" => Self::VolumeBook,
313            "volume-periodical" => Self::VolumePeriodical,
314            "issue" => Self::Issue,
315            _ => Self::Custom(canonical),
316        })
317    }
318}
319
320impl PartialEq for LocatorType {
321    fn eq(&self, other: &Self) -> bool {
322        self.as_key().as_ref() == other.as_key().as_ref()
323    }
324}
325
326impl Eq for LocatorType {}
327
328impl Hash for LocatorType {
329    fn hash<H: Hasher>(&self, state: &mut H) {
330        self.as_key().as_ref().hash(state);
331    }
332}
333
334impl Serialize for LocatorType {
335    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
336    where
337        S: Serializer,
338    {
339        serializer.serialize_str(self.as_key().as_ref())
340    }
341}
342
343impl<'de> Deserialize<'de> for LocatorType {
344    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
345    where
346        D: Deserializer<'de>,
347    {
348        let value = String::deserialize(deserializer)?;
349        Self::from_key(&value).map_err(serde::de::Error::custom)
350    }
351}
352
353#[cfg(feature = "schema")]
354impl JsonSchema for LocatorType {
355    fn schema_name() -> std::borrow::Cow<'static, str> {
356        "LocatorType".into()
357    }
358
359    fn json_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
360        schemars::json_schema!({
361            "type": "string",
362            "description": "Known locator label keyword or custom kebab-case identifier."
363        })
364    }
365}
366
367/// A locator value that supports both plain strings and explicit plurality.
368///
369/// Plain strings use heuristic plural detection (checking for `-`, `–`, `,`, `&`).
370/// Use the explicit form to override when the heuristic fails (e.g., "figure A-3"
371/// should be singular despite containing a hyphen).
372#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
373#[cfg_attr(feature = "schema", derive(JsonSchema))]
374#[cfg_attr(feature = "bindings", derive(Type))]
375#[serde(untagged)]
376pub enum LocatorValue {
377    /// Plain string value with heuristic plural detection.
378    Text(String),
379    /// Explicit value with manual plural override.
380    Explicit {
381        /// The locator value string.
382        value: String,
383        /// Whether this locator is plural.
384        plural: bool,
385    },
386}
387
388impl LocatorValue {
389    /// Returns the raw value string.
390    pub fn value_str(&self) -> &str {
391        match self {
392            LocatorValue::Text(s) => s,
393            LocatorValue::Explicit { value, .. } => value,
394        }
395    }
396
397    /// Returns whether this locator value is plural.
398    ///
399    /// For `Text`, uses the heuristic (contains `-`, `–`, `,`, or `&`).
400    /// For `Explicit`, returns the specified `plural` field.
401    pub fn is_plural(&self) -> bool {
402        match self {
403            LocatorValue::Text(s) => {
404                s.contains('\u{2013}') || s.contains('-') || s.contains(',') || s.contains('&')
405            }
406            LocatorValue::Explicit { plural, .. } => *plural,
407        }
408    }
409}
410
411impl Default for LocatorValue {
412    fn default() -> Self {
413        LocatorValue::Text(String::new())
414    }
415}
416
417impl From<String> for LocatorValue {
418    fn from(s: String) -> Self {
419        LocatorValue::Text(s)
420    }
421}
422
423impl From<&str> for LocatorValue {
424    fn from(s: &str) -> Self {
425        LocatorValue::Text(s.to_string())
426    }
427}
428
429/// A single segment of a compound locator.
430///
431/// Pairs a locator type with its value, e.g. `{ label: chapter, value: "3" }`.
432#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
433#[cfg_attr(feature = "schema", derive(JsonSchema))]
434#[cfg_attr(feature = "bindings", derive(Type))]
435#[serde(rename_all = "kebab-case")]
436pub struct LocatorSegment {
437    /// The locator type for this segment.
438    pub label: LocatorType,
439    /// The locator value (e.g., "3", "42-45").
440    pub value: LocatorValue,
441}
442
443impl LocatorSegment {
444    /// Create a locator segment from a canonical label and value.
445    pub fn new(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
446        Self {
447            label,
448            value: value.into(),
449        }
450    }
451}
452
453/// A canonical citation locator.
454///
455/// Simple locators use the single-segment form, while compound locators use
456/// the explicit `segments` wrapper.
457#[derive(Debug, Clone, Serialize, PartialEq)]
458#[cfg_attr(feature = "bindings", derive(Type))]
459#[serde(untagged)]
460pub enum CitationLocator {
461    /// A single labeled locator.
462    Single(LocatorSegment),
463    /// Multiple ordered locator segments.
464    Compound {
465        /// Ordered locator segments.
466        segments: Vec<LocatorSegment>,
467    },
468}
469
470#[derive(Debug, Clone, Deserialize)]
471#[serde(untagged)]
472enum CitationLocatorRepr {
473    Single(LocatorSegment),
474    Compound { segments: Vec<LocatorSegment> },
475}
476
477impl<'de> Deserialize<'de> for CitationLocator {
478    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
479    where
480        D: serde::Deserializer<'de>,
481    {
482        use serde::de::Error;
483
484        match CitationLocatorRepr::deserialize(deserializer)? {
485            CitationLocatorRepr::Single(segment) => Ok(Self::Single(segment)),
486            CitationLocatorRepr::Compound { segments } => {
487                Self::compound(segments).map_err(D::Error::custom)
488            }
489        }
490    }
491}
492
493impl CitationLocator {
494    /// Create a single-segment locator.
495    pub fn single(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
496        Self::Single(LocatorSegment::new(label, value))
497    }
498
499    /// Create a compound locator with two or more segments.
500    ///
501    /// # Errors
502    ///
503    /// Returns an error when fewer than two locator segments are supplied.
504    pub fn compound(segments: Vec<LocatorSegment>) -> Result<Self, &'static str> {
505        if segments.len() < 2 {
506            return Err("compound locators must contain at least two segments");
507        }
508        Ok(Self::Compound { segments })
509    }
510
511    /// Returns the ordered locator segments as a slice.
512    pub fn segments(&self) -> &[LocatorSegment] {
513        match self {
514            Self::Single(segment) => std::slice::from_ref(segment),
515            Self::Compound { segments } => segments.as_slice(),
516        }
517    }
518
519    /// Returns true if this locator contains multiple segments.
520    pub fn is_compound(&self) -> bool {
521        matches!(self, Self::Compound { .. })
522    }
523
524    /// Returns a stable string form used for locator comparison.
525    pub fn canonical_string(&self) -> String {
526        self.segments()
527            .iter()
528            .map(|segment| format!("{}:{}", segment.label.as_key(), segment.value.value_str()))
529            .collect::<Vec<_>>()
530            .join(",")
531    }
532}
533
534#[cfg(feature = "schema")]
535impl JsonSchema for CitationLocator {
536    fn schema_name() -> std::borrow::Cow<'static, str> {
537        "CitationLocator".into()
538    }
539
540    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
541        let single_schema = generator.subschema_for::<LocatorSegment>();
542        let compound_schema = schemars::json_schema!({
543            "type": "object",
544            "properties": {
545                "segments": generator.subschema_for::<Vec<LocatorSegment>>()
546            },
547            "required": ["segments"]
548        });
549        schemars::json_schema!({
550            "oneOf": [single_schema, compound_schema]
551        })
552    }
553}
554
555fn normalize_kind_key(value: &str) -> Option<String> {
556    let mut normalized = String::new();
557    let mut pending_dash = false;
558
559    for ch in value.trim().chars() {
560        if ch.is_ascii_alphanumeric() {
561            if pending_dash && !normalized.is_empty() {
562                normalized.push('-');
563            }
564            normalized.push(ch.to_ascii_lowercase());
565            pending_dash = false;
566        } else if !normalized.is_empty() {
567            pending_dash = true;
568        }
569    }
570
571    if normalized.is_empty() {
572        None
573    } else {
574        Some(normalized)
575    }
576}
577
578/// A single citation item referencing a bibliography entry.
579#[derive(Debug, Clone, Default, Deserialize, Serialize)]
580#[cfg_attr(feature = "schema", derive(JsonSchema))]
581#[cfg_attr(feature = "bindings", derive(Type))]
582#[serde(rename_all = "kebab-case")]
583pub struct CitationItem {
584    /// The reference ID (citekey).
585    pub id: String,
586    /// Canonical locator value for pinpoint citations.
587    #[serde(skip_serializing_if = "Option::is_none")]
588    pub locator: Option<CitationLocator>,
589    /// Prefix text before this item
590    #[serde(skip_serializing_if = "Option::is_none")]
591    pub prefix: Option<String>,
592    /// Suffix text after this item
593    #[serde(skip_serializing_if = "Option::is_none")]
594    pub suffix: Option<String>,
595    /// Explicit integral name-memory state override for this item.
596    #[serde(skip_serializing_if = "Option::is_none")]
597    pub integral_name_state: Option<IntegralNameState>,
598}
599
600impl CitationItem {
601    /// Returns the canonical locator segments when present.
602    pub fn locator_segments(&self) -> Option<&[LocatorSegment]> {
603        self.locator.as_ref().map(CitationLocator::segments)
604    }
605}
606
607/// Normalize a textual locator string into the canonical locator model.
608///
609/// # Panics
610///
611/// This function does not panic under normal use; the internal `unwrap` is
612/// guarded by the preceding segment-count match.
613pub fn normalize_locator_text(
614    locator: &str,
615    aliases: &[(String, LocatorType)],
616) -> Option<CitationLocator> {
617    let locator = locator.trim();
618    if locator.is_empty() {
619        return None;
620    }
621
622    let raw_segments = split_locator_segments(locator, aliases);
623    let segments: Vec<LocatorSegment> = raw_segments
624        .into_iter()
625        .filter_map(|segment| parse_locator_segment(segment, aliases))
626        .collect();
627
628    match segments.len() {
629        0 => None,
630        1 => {
631            let mut it = segments.into_iter();
632            Some(CitationLocator::Single(it.next()?))
633        }
634        _ => CitationLocator::compound(segments).ok(),
635    }
636}
637
638fn split_locator_segments<'a>(locator: &'a str, aliases: &[(String, LocatorType)]) -> Vec<&'a str> {
639    let mut parts = Vec::new();
640    let mut start = 0;
641
642    for (idx, ch) in locator.char_indices() {
643        if ch != ',' {
644            continue;
645        }
646
647        #[allow(
648            clippy::string_slice,
649            reason = "idx is a valid char boundary from char_indices()"
650        )]
651        let candidate = locator[idx + ch.len_utf8()..].trim_start();
652        if begins_with_locator_label(candidate, aliases) {
653            #[allow(
654                clippy::string_slice,
655                reason = "start and idx are valid char boundaries"
656            )]
657            parts.push(locator[start..idx].trim());
658            start = idx + ch.len_utf8();
659        }
660    }
661
662    #[allow(clippy::string_slice, reason = "start is a valid char boundary")]
663    parts.push(locator[start..].trim());
664    parts
665}
666
667fn parse_locator_segment(
668    segment: &str,
669    aliases: &[(String, LocatorType)],
670) -> Option<LocatorSegment> {
671    let segment = segment.trim();
672    if segment.is_empty() {
673        return None;
674    }
675
676    if let Some((label, rest)) = strip_locator_label(segment, aliases) {
677        let value = rest.trim_start_matches(':').trim();
678        if value.is_empty() {
679            return None;
680        }
681        return Some(LocatorSegment::new(label, value));
682    }
683
684    Some(LocatorSegment::new(LocatorType::Page, segment))
685}
686
687fn begins_with_locator_label(segment: &str, aliases: &[(String, LocatorType)]) -> bool {
688    strip_locator_label(segment, aliases).is_some()
689}
690
691fn strip_locator_label<'a>(
692    segment: &'a str,
693    aliases: &[(String, LocatorType)],
694) -> Option<(LocatorType, &'a str)> {
695    let lower = segment.to_lowercase();
696    let mut best: Option<(LocatorType, usize)> = None;
697
698    for (alias, label) in aliases {
699        if let Some(remainder) = lower.strip_prefix(alias)
700            && alias_boundary(remainder)
701        {
702            let alias_len = alias.len();
703            if best
704                .as_ref()
705                .is_none_or(|(_, best_len)| alias_len > *best_len)
706            {
707                best = Some((label.clone(), alias_len));
708            }
709        }
710    }
711
712    best.map(|(label, alias_len)| {
713        #[allow(clippy::string_slice, reason = "alias_len is the length of a prefix")]
714        (label, segment[alias_len..].trim_start())
715    })
716}
717
718fn alias_boundary(remainder: &str) -> bool {
719    remainder.is_empty()
720        || remainder.starts_with(':')
721        || remainder.starts_with('.')
722        || remainder.starts_with(char::is_whitespace)
723}
724
725#[cfg(test)]
726#[allow(
727    clippy::unwrap_used,
728    clippy::expect_used,
729    clippy::panic,
730    clippy::indexing_slicing,
731    clippy::todo,
732    clippy::unimplemented,
733    clippy::unreachable,
734    clippy::get_unwrap,
735    reason = "Panicking is acceptable and often desired in tests."
736)]
737mod tests {
738    use super::*;
739
740    #[test]
741    fn test_citation_deserialization() {
742        let json = r#"
743        {
744            "items": [
745                {
746                    "id": "kuhn1962"
747                }
748            ],
749            "mode": "integral"
750        }
751        "#;
752        let citation: Citation = serde_json::from_str(json).unwrap();
753        assert_eq!(citation.items.len(), 1);
754        assert_eq!(citation.items[0].id, "kuhn1962");
755        assert_eq!(citation.mode, CitationMode::Integral);
756    }
757
758    #[test]
759    fn test_citation_simple_constructor_defaults() {
760        let citation = Citation::simple("kuhn1962");
761
762        assert_eq!(citation.items.len(), 1);
763        assert_eq!(citation.items[0].id, "kuhn1962");
764        assert_eq!(citation.mode, CitationMode::NonIntegral);
765        assert_eq!(citation.position, None);
766        assert!(!citation.suppress_author);
767        assert_eq!(citation.note_number, None);
768        assert_eq!(citation.prefix, None);
769        assert_eq!(citation.suffix, None);
770    }
771
772    #[test]
773    fn test_citation_default_fields_are_omitted_in_serialization() {
774        let citation = Citation::simple("kuhn1962");
775        let json = serde_json::to_value(&citation).unwrap();
776        let object = json.as_object().unwrap();
777
778        assert!(!object.contains_key("mode"));
779        assert!(!object.contains_key("suppress-author"));
780
781        let explicit = Citation {
782            mode: CitationMode::Integral,
783            suppress_author: true,
784            ..citation
785        };
786        let explicit_json = serde_json::to_value(&explicit).unwrap();
787        let explicit_object = explicit_json.as_object().unwrap();
788
789        assert_eq!(explicit_object.get("mode").unwrap(), "integral");
790        assert_eq!(explicit_object.get("suppress-author").unwrap(), true);
791    }
792
793    #[test]
794    fn test_citation_item_with_locator() {
795        let json = r#"
796        {
797            "id": "kuhn1962",
798            "locator": {
799                "label": "page",
800                "value": "42-45"
801            }
802        }
803        "#;
804        let item: CitationItem = serde_json::from_str(json).unwrap();
805        assert_eq!(item.id, "kuhn1962");
806        assert_eq!(
807            item.locator,
808            Some(CitationLocator::single(LocatorType::Page, "42-45"))
809        );
810    }
811
812    #[test]
813    fn test_compound_locator_serde_roundtrip() {
814        let json = r#"
815        {
816            "id": "smith2020",
817            "locator": {
818                "segments": [
819                    { "label": "chapter", "value": "3" },
820                    { "label": "section", "value": "42" }
821                ]
822            }
823        }
824        "#;
825        let item: CitationItem = serde_json::from_str(json).unwrap();
826        let segs = item.locator.as_ref().unwrap().segments();
827        assert_eq!(segs.len(), 2);
828        assert_eq!(segs[0].label, LocatorType::Chapter);
829        assert_eq!(segs[0].value.value_str(), "3");
830        assert_eq!(segs[1].label, LocatorType::Section);
831        assert_eq!(segs[1].value.value_str(), "42");
832
833        // Round-trip
834        let serialized = serde_json::to_string(&item).unwrap();
835        let deserialized: CitationItem = serde_json::from_str(&serialized).unwrap();
836        assert_eq!(deserialized.locator, item.locator);
837    }
838
839    #[test]
840    fn test_compound_locator_rejects_single_segment() {
841        let err = CitationLocator::compound(vec![LocatorSegment::new(LocatorType::Page, "42")])
842            .expect_err("single-segment compound locator must be rejected");
843        assert!(err.contains("at least two"));
844    }
845
846    #[test]
847    fn test_citation_locator_canonical_string_is_stable() {
848        let locator = CitationLocator::compound(vec![
849            LocatorSegment::new(LocatorType::Page, "23"),
850            LocatorSegment::new(LocatorType::Line, "13"),
851        ])
852        .unwrap();
853
854        assert_eq!(locator.canonical_string(), "page:23,line:13");
855    }
856
857    #[test]
858    fn test_custom_locator_type_round_trips_as_plain_string() {
859        let json = r#"
860        {
861            "id": "score2024",
862            "locator": {
863                "label": "Movement",
864                "value": "II"
865            }
866        }
867        "#;
868
869        let item: CitationItem = serde_json::from_str(json).expect("custom locator should parse");
870        let locator = item.locator.expect("custom locator should exist");
871        let segment = &locator.segments()[0];
872
873        assert_eq!(segment.label, LocatorType::Custom("movement".to_string()));
874        let serialized = serde_json::to_value(&CitationItem {
875            id: "score2024".to_string(),
876            locator: Some(locator),
877            ..Default::default()
878        })
879        .expect("custom locator should serialize");
880
881        assert_eq!(serialized["locator"]["label"], "movement");
882    }
883
884    #[test]
885    fn test_custom_locator_type_normalizes_manual_construction() {
886        let locator = LocatorType::Custom("Reel Label".to_string());
887
888        assert_eq!(locator.as_key(), "reel-label");
889        assert_eq!(
890            locator,
891            LocatorType::from_key("reel-label").expect("known custom key should parse")
892        );
893        assert_eq!(
894            serde_json::to_string(&locator).expect("custom locator should serialize"),
895            "\"reel-label\""
896        );
897    }
898
899    #[test]
900    fn test_locator_segments_single() {
901        let item = CitationItem {
902            id: "test".to_string(),
903            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
904            ..Default::default()
905        };
906        let segments = item.locator_segments().unwrap();
907        assert_eq!(segments.len(), 1);
908        assert_eq!(segments[0].label, LocatorType::Page);
909    }
910
911    #[test]
912    fn test_locator_segments_none() {
913        let item = CitationItem {
914            id: "test".to_string(),
915            ..Default::default()
916        };
917        assert!(item.locator_segments().is_none());
918    }
919
920    #[test]
921    fn test_single_locator_serializes_without_segments_wrapper() {
922        let item = CitationItem {
923            id: "test".to_string(),
924            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
925            ..Default::default()
926        };
927        let json = serde_json::to_value(&item).unwrap();
928        let locator = json
929            .as_object()
930            .unwrap()
931            .get("locator")
932            .and_then(serde_json::Value::as_object)
933            .unwrap();
934        assert!(locator.contains_key("label"));
935        assert!(!locator.contains_key("segments"));
936    }
937
938    #[test]
939    fn test_compound_locator_deserialization() {
940        let json = r#"
941        {
942            "id": "smith2020",
943            "locator": {
944                "segments": [
945                    { "label": "page", "value": "23" },
946                    { "label": "line", "value": "13" }
947                ]
948            }
949        }
950        "#;
951        let item: CitationItem = serde_json::from_str(json).unwrap();
952        let segs = item.locator.as_ref().unwrap().segments();
953        assert_eq!(segs.len(), 2);
954        assert_eq!(segs[0].label, LocatorType::Page);
955        assert_eq!(segs[0].value.value_str(), "23");
956        assert_eq!(segs[1].label, LocatorType::Line);
957        assert_eq!(segs[1].value.value_str(), "13");
958    }
959
960    #[test]
961    fn test_locator_value_explicit_plural_override() {
962        let json = r#"
963        {
964            "id": "test",
965            "locator": {
966                "label": "figure",
967                "value": {
968                    "value": "A-3",
969                    "plural": false
970                }
971            }
972        }
973        "#;
974        let item: CitationItem = serde_json::from_str(json).unwrap();
975        let segs = item.locator.as_ref().unwrap().segments();
976        assert_eq!(segs[0].value.value_str(), "A-3");
977        assert!(!segs[0].value.is_plural());
978    }
979
980    #[test]
981    fn test_locator_value_heuristic_plural() {
982        let lv_range = LocatorValue::from("42-45");
983        assert!(lv_range.is_plural());
984
985        let lv_single = LocatorValue::from("42");
986        assert!(!lv_single.is_plural());
987
988        let lv_en_dash = LocatorValue::from("42–45");
989        assert!(lv_en_dash.is_plural());
990
991        let lv_comma = LocatorValue::from("1, 3, 5");
992        assert!(lv_comma.is_plural());
993
994        let lv_ampersand = LocatorValue::from("A & B");
995        assert!(lv_ampersand.is_plural());
996    }
997
998    #[test]
999    fn test_normalize_locator_text_with_explicit_aliases() {
1000        let aliases = vec![
1001            ("page".to_string(), LocatorType::Page),
1002            ("p.".to_string(), LocatorType::Page),
1003            ("chapter".to_string(), LocatorType::Chapter),
1004            ("ch.".to_string(), LocatorType::Chapter),
1005            ("section".to_string(), LocatorType::Section),
1006            ("§".to_string(), LocatorType::Section),
1007        ];
1008
1009        // Bare number defaults to Page
1010        assert_eq!(
1011            normalize_locator_text("45", &aliases),
1012            Some(CitationLocator::single(LocatorType::Page, "45"))
1013        );
1014
1015        // Explicit label
1016        assert_eq!(
1017            normalize_locator_text("chapter 2", &aliases),
1018            Some(CitationLocator::single(LocatorType::Chapter, "2"))
1019        );
1020
1021        // Abbreviated label
1022        assert_eq!(
1023            normalize_locator_text("ch. 3", &aliases),
1024            Some(CitationLocator::single(LocatorType::Chapter, "3"))
1025        );
1026
1027        // Symbol label
1028        assert_eq!(
1029            normalize_locator_text("§ 4", &aliases),
1030            Some(CitationLocator::single(LocatorType::Section, "4"))
1031        );
1032
1033        // Compound locator
1034        let compound = normalize_locator_text("chapter 2, page 10", &aliases).unwrap();
1035        assert!(compound.is_compound());
1036        let segs = compound.segments();
1037        assert_eq!(segs[0].label, LocatorType::Chapter);
1038        assert_eq!(segs[1].label, LocatorType::Page);
1039
1040        // Empty or invalid input
1041        assert_eq!(normalize_locator_text("", &aliases), None);
1042        assert_eq!(normalize_locator_text("   ", &aliases), None);
1043        assert_eq!(normalize_locator_text("chapter:", &aliases), None);
1044    }
1045
1046    #[test]
1047    fn test_normalize_locator_text_with_abbreviated_aliases() {
1048        let aliases = vec![
1049            ("page".to_string(), LocatorType::Page),
1050            ("pp.".to_string(), LocatorType::Page),
1051            ("vol.".to_string(), LocatorType::Volume),
1052        ];
1053
1054        assert_eq!(
1055            normalize_locator_text("page 45", &aliases),
1056            Some(CitationLocator::single(LocatorType::Page, "45"))
1057        );
1058        assert_eq!(
1059            normalize_locator_text("pp. 10-12", &aliases),
1060            Some(CitationLocator::single(LocatorType::Page, "10-12"))
1061        );
1062        assert_eq!(
1063            normalize_locator_text("vol. 1", &aliases),
1064            Some(CitationLocator::single(LocatorType::Volume, "1"))
1065        );
1066    }
1067}