Skip to main content

citum_schema_data/
citation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Citation input model for the Citum processor.
7//!
8//! This module defines the structures for representing citations as input
9//! to the processor. Citations reference entries in the bibliography and
10//! can include locators, prefixes, suffixes, and mode information.
11
12#[cfg(feature = "schema")]
13use schemars::JsonSchema;
14use serde::{Deserialize, Deserializer, Serialize, Serializer};
15#[cfg(feature = "bindings")]
16use specta::Type;
17use std::borrow::Cow;
18use std::hash::{Hash, Hasher};
19
20/// A list of citations to process.
21pub type Citations = Vec<Citation>;
22
23/// Citation mode for author-date styles.
24///
25/// Determines how the author name is rendered relative to the citation.
26#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
27#[cfg_attr(feature = "schema", derive(JsonSchema))]
28#[cfg_attr(feature = "bindings", derive(Type))]
29#[serde(rename_all = "kebab-case")]
30pub enum CitationMode {
31    /// Author inline in text: "Smith (2020) argues..."
32    /// Also known as "narrative" or "in-text" citations.
33    Integral,
34    /// Author in parentheses: "(Smith, 2020)"
35    /// The default mode for most citations.
36    #[default]
37    NonIntegral,
38}
39
40/// Explicit integral citation name-memory state for one citation item.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
42#[cfg_attr(feature = "schema", derive(JsonSchema))]
43#[cfg_attr(feature = "bindings", derive(Type))]
44#[serde(rename_all = "kebab-case")]
45pub enum IntegralNameState {
46    /// Render this item as the first integral mention in scope.
47    First,
48    /// Render this item as a subsequent integral mention in scope.
49    Subsequent,
50}
51
52/// Position of a citation in the document flow.
53///
54/// Indicates where this citation appears relative to previous citations
55/// of the same item(s). Used for note-based styles to detect ibid and
56/// subsequent citations, and for author-date styles to apply position-specific
57/// formatting rules (e.g., short forms after first citation).
58#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
59#[cfg_attr(feature = "schema", derive(JsonSchema))]
60#[cfg_attr(feature = "bindings", derive(Type))]
61#[serde(rename_all = "kebab-case")]
62pub enum Position {
63    /// First citation of an item.
64    First,
65    /// Subsequent citation of an item (non-consecutive).
66    Subsequent,
67    /// Same item cited immediately before, no locator on either.
68    Ibid,
69    /// Same item cited immediately before, with different locator.
70    IbidWithLocator,
71}
72
73/// A citation containing one or more references.
74#[derive(Debug, Clone, Default, Deserialize, Serialize)]
75#[cfg_attr(feature = "schema", derive(JsonSchema))]
76#[cfg_attr(feature = "bindings", derive(Type))]
77#[serde(rename_all = "kebab-case")]
78pub struct Citation {
79    /// The citation ID (optional, for tracking).
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub id: Option<String>,
82    /// Note number for footnote/endnote styles.
83    /// Assigned by the document processor, not the citation processor.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub note_number: Option<u32>,
86    /// Citation mode: integral (narrative) vs non-integral (parenthetical).
87    /// Only relevant for author-date styles.
88    #[serde(default, skip_serializing_if = "is_default_mode")]
89    pub mode: CitationMode,
90    /// Position of this citation in the document flow.
91    /// Detected automatically by the processor or set explicitly by the caller.
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub position: Option<Position>,
94    /// Suppress the author name across all items in this citation.
95    /// Used when the author is already named in the prose: "Smith argues (2020)".
96    /// Applies uniformly to all items — per-item suppression is not supported
97    /// because mixed-visibility citations are typographically incoherent.
98    #[serde(default, skip_serializing_if = "is_false")]
99    pub suppress_author: bool,
100    /// Prefix text before all citation items.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub prefix: Option<String>,
103    /// Suffix text after all citation items.
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub suffix: Option<String>,
106    /// The citation items (references being cited).
107    pub items: Vec<CitationItem>,
108    /// If true, the entire citation is a single dynamic compound set.
109    ///
110    /// The first item acts as the head and subsequent items are merged as tails
111    /// in the bibliography. Ignored for non-numeric (compound-numeric) styles.
112    /// Item order is preserved and sorting is suppressed when this flag is set.
113    #[serde(default, skip_serializing_if = "is_false")]
114    pub grouped: bool,
115}
116
117impl Citation {
118    /// Create a simple single-item citation.
119    ///
120    /// Convenience constructor for a citation with a single reference ID and default settings.
121    pub fn simple(id: &str) -> Self {
122        Self {
123            items: vec![CitationItem {
124                id: id.to_string(),
125                ..Default::default()
126            }],
127            ..Default::default()
128        }
129    }
130}
131
132/// Helper for skip_serializing_if on mode field.
133fn is_default_mode(mode: &CitationMode) -> bool {
134    *mode == CitationMode::NonIntegral
135}
136
137/// Helper for skip_serializing_if on bool fields that default to false.
138fn is_false(b: &bool) -> bool {
139    !b
140}
141
142/// Locator types for pinpoint citations.
143#[derive(Debug, Clone, Default)]
144#[cfg_attr(feature = "bindings", derive(Type))]
145pub enum LocatorType {
146    /// Locator refers to a book within a larger work.
147    Book,
148    /// Locator refers to a chapter.
149    Chapter,
150    /// Locator refers to a clause.
151    Clause,
152    /// Locator refers to a column.
153    Column,
154    /// Locator refers to a corollary.
155    Corollary,
156    /// Locator refers to a definition.
157    Definition,
158    /// Locator refers to a division.
159    Division,
160    /// Locator refers to a figure.
161    Figure,
162    /// Locator refers to a folio.
163    Folio,
164    /// Locator refers to a numbered line.
165    Line,
166    /// Locator refers to a lemma.
167    Lemma,
168    /// Locator refers to a note.
169    Note,
170    /// Locator refers to a numbered unit.
171    Number,
172    /// Locator refers to an opus number.
173    Opus,
174    #[default]
175    /// Locator refers to a page.
176    Page,
177    /// Locator refers to a paragraph.
178    Paragraph,
179    /// Locator refers to a sub-paragraph.
180    Subparagraph,
181    /// Locator refers to a sub-clause.
182    Subclause,
183    /// Locator refers to a sub-division.
184    Subdivision,
185    /// Locator refers to a sub-section.
186    Subsection,
187    /// Locator refers to a part or division.
188    Part,
189    /// Locator refers to a problem.
190    Problem,
191    /// Locator refers to a proposition.
192    Proposition,
193    /// Locator refers to a recital.
194    Recital,
195    /// Locator refers to a schedule.
196    Schedule,
197    /// Locator refers to a section.
198    Section,
199    /// Locator refers to a surah.
200    Surah,
201    /// Locator refers to a theorem.
202    Theorem,
203    /// Locator refers to an entry under a headword.
204    SubVerbo,
205    /// Locator refers to a supplement.
206    Supplement,
207    /// Locator refers to a verse.
208    Verse,
209    /// Locator refers to a volume.
210    Volume,
211    /// Locator refers to a periodical volume.
212    VolumePeriodical,
213    /// Locator refers to a monograph volume.
214    VolumeBook,
215    /// Locator refers to an issue.
216    Issue,
217    /// Locator refers to an algorithm.
218    Algorithm,
219    /// Locator refers to a custom pinpoint label.
220    Custom(String),
221}
222
223impl LocatorType {
224    /// Return the canonical kebab-case key for this locator label.
225    #[must_use]
226    pub fn as_key(&self) -> Cow<'_, str> {
227        match self {
228            Self::Book => Cow::Borrowed("book"),
229            Self::Chapter => Cow::Borrowed("chapter"),
230            Self::Clause => Cow::Borrowed("clause"),
231            Self::Column => Cow::Borrowed("column"),
232            Self::Corollary => Cow::Borrowed("corollary"),
233            Self::Definition => Cow::Borrowed("definition"),
234            Self::Division => Cow::Borrowed("division"),
235            Self::Figure => Cow::Borrowed("figure"),
236            Self::Folio => Cow::Borrowed("folio"),
237            Self::Line => Cow::Borrowed("line"),
238            Self::Lemma => Cow::Borrowed("lemma"),
239            Self::Note => Cow::Borrowed("note"),
240            Self::Number => Cow::Borrowed("number"),
241            Self::Opus => Cow::Borrowed("opus"),
242            Self::Page => Cow::Borrowed("page"),
243            Self::Paragraph => Cow::Borrowed("paragraph"),
244            Self::Subparagraph => Cow::Borrowed("subparagraph"),
245            Self::Subclause => Cow::Borrowed("subclause"),
246            Self::Subdivision => Cow::Borrowed("subdivision"),
247            Self::Subsection => Cow::Borrowed("subsection"),
248            Self::Part => Cow::Borrowed("part"),
249            Self::Problem => Cow::Borrowed("problem"),
250            Self::Proposition => Cow::Borrowed("proposition"),
251            Self::Recital => Cow::Borrowed("recital"),
252            Self::Schedule => Cow::Borrowed("schedule"),
253            Self::Section => Cow::Borrowed("section"),
254            Self::Surah => Cow::Borrowed("surah"),
255            Self::Theorem => Cow::Borrowed("theorem"),
256            Self::SubVerbo => Cow::Borrowed("sub-verbo"),
257            Self::Supplement => Cow::Borrowed("supplement"),
258            Self::Verse => Cow::Borrowed("verse"),
259            Self::Volume => Cow::Borrowed("volume"),
260            Self::VolumePeriodical => Cow::Borrowed("volume-periodical"),
261            Self::VolumeBook => Cow::Borrowed("volume-book"),
262            Self::Issue => Cow::Borrowed("issue"),
263            Self::Algorithm => Cow::Borrowed("algorithm"),
264            Self::Custom(value) => normalize_kind_key(value)
265                .map(Cow::Owned)
266                .unwrap_or_else(|| Cow::Borrowed(value.as_str())),
267        }
268    }
269
270    /// Parse a locator label from a known keyword or custom identifier.
271    ///
272    /// # Errors
273    ///
274    /// Returns an error when the input is empty or normalizes to an empty key.
275    pub fn from_key(value: &str) -> Result<Self, String> {
276        let canonical = normalize_kind_key(value)
277            .ok_or_else(|| "locator label must not be empty".to_string())?;
278        Ok(match canonical.as_str() {
279            "algorithm" => Self::Algorithm,
280            "book" => Self::Book,
281            "chapter" => Self::Chapter,
282            "clause" => Self::Clause,
283            "column" => Self::Column,
284            "corollary" => Self::Corollary,
285            "definition" => Self::Definition,
286            "division" => Self::Division,
287            "figure" => Self::Figure,
288            "folio" => Self::Folio,
289            "line" => Self::Line,
290            "lemma" => Self::Lemma,
291            "note" => Self::Note,
292            "number" => Self::Number,
293            "opus" => Self::Opus,
294            "page" => Self::Page,
295            "paragraph" => Self::Paragraph,
296            "part" => Self::Part,
297            "problem" => Self::Problem,
298            "proposition" => Self::Proposition,
299            "recital" => Self::Recital,
300            "schedule" => Self::Schedule,
301            "section" => Self::Section,
302            "subclause" => Self::Subclause,
303            "subdivision" => Self::Subdivision,
304            "subparagraph" => Self::Subparagraph,
305            "subsection" => Self::Subsection,
306            "sub-verbo" => Self::SubVerbo,
307            "supplement" => Self::Supplement,
308            "surah" => Self::Surah,
309            "theorem" => Self::Theorem,
310            "verse" => Self::Verse,
311            "volume" => Self::Volume,
312            "volume-book" => Self::VolumeBook,
313            "volume-periodical" => Self::VolumePeriodical,
314            "issue" => Self::Issue,
315            _ => Self::Custom(canonical),
316        })
317    }
318}
319
320impl PartialEq for LocatorType {
321    fn eq(&self, other: &Self) -> bool {
322        self.as_key().as_ref() == other.as_key().as_ref()
323    }
324}
325
326impl Eq for LocatorType {}
327
328impl Hash for LocatorType {
329    fn hash<H: Hasher>(&self, state: &mut H) {
330        self.as_key().as_ref().hash(state);
331    }
332}
333
334impl Serialize for LocatorType {
335    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
336    where
337        S: Serializer,
338    {
339        serializer.serialize_str(self.as_key().as_ref())
340    }
341}
342
343impl<'de> Deserialize<'de> for LocatorType {
344    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
345    where
346        D: Deserializer<'de>,
347    {
348        let value = String::deserialize(deserializer)?;
349        Self::from_key(&value).map_err(serde::de::Error::custom)
350    }
351}
352
353#[cfg(feature = "schema")]
354impl JsonSchema for LocatorType {
355    fn schema_name() -> std::borrow::Cow<'static, str> {
356        "LocatorType".into()
357    }
358
359    fn json_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
360        schemars::json_schema!({
361            "type": "string",
362            "description": "Known locator label keyword or custom kebab-case identifier."
363        })
364    }
365}
366
367/// A locator value that supports both plain strings and explicit plurality.
368///
369/// Plain strings use heuristic plural detection (checking for `-`, `–`, `,`, `&`).
370/// Use the explicit form to override when the heuristic fails (e.g., "figure A-3"
371/// should be singular despite containing a hyphen).
372#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
373#[cfg_attr(feature = "schema", derive(JsonSchema))]
374#[cfg_attr(feature = "bindings", derive(Type))]
375#[serde(untagged)]
376pub enum LocatorValue {
377    /// Plain string value with heuristic plural detection.
378    Text(String),
379    /// Explicit value with manual plural override.
380    Explicit {
381        /// The locator value string.
382        value: String,
383        /// Whether this locator is plural.
384        plural: bool,
385    },
386}
387
388impl LocatorValue {
389    /// Returns the raw value string.
390    pub fn value_str(&self) -> &str {
391        match self {
392            LocatorValue::Text(s) => s,
393            LocatorValue::Explicit { value, .. } => value,
394        }
395    }
396
397    /// Returns whether this locator value is plural.
398    ///
399    /// For `Text`, uses the heuristic (contains `-`, `–`, `,`, or `&`).
400    /// For `Explicit`, returns the specified `plural` field.
401    pub fn is_plural(&self) -> bool {
402        match self {
403            LocatorValue::Text(s) => {
404                s.contains('\u{2013}') || s.contains('-') || s.contains(',') || s.contains('&')
405            }
406            LocatorValue::Explicit { plural, .. } => *plural,
407        }
408    }
409}
410
411impl Default for LocatorValue {
412    fn default() -> Self {
413        LocatorValue::Text(String::new())
414    }
415}
416
417impl From<String> for LocatorValue {
418    fn from(s: String) -> Self {
419        LocatorValue::Text(s)
420    }
421}
422
423impl From<&str> for LocatorValue {
424    fn from(s: &str) -> Self {
425        LocatorValue::Text(s.to_string())
426    }
427}
428
429/// A single segment of a compound locator.
430///
431/// Pairs a locator type with its value, e.g. `{ label: chapter, value: "3" }`.
432#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
433#[cfg_attr(feature = "schema", derive(JsonSchema))]
434#[cfg_attr(feature = "bindings", derive(Type))]
435#[serde(rename_all = "kebab-case")]
436pub struct LocatorSegment {
437    /// The locator type for this segment.
438    pub label: LocatorType,
439    /// The locator value (e.g., "3", "42-45").
440    pub value: LocatorValue,
441}
442
443impl LocatorSegment {
444    /// Create a locator segment from a canonical label and value.
445    pub fn new(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
446        Self {
447            label,
448            value: value.into(),
449        }
450    }
451}
452
453/// A canonical citation locator.
454///
455/// Simple locators use the single-segment form, while compound locators use
456/// the explicit `segments` wrapper.
457#[derive(Debug, Clone, Serialize, PartialEq)]
458#[cfg_attr(feature = "bindings", derive(Type))]
459#[serde(untagged)]
460pub enum CitationLocator {
461    /// A single labeled locator.
462    Single(LocatorSegment),
463    /// Multiple ordered locator segments.
464    Compound {
465        /// Ordered locator segments.
466        segments: Vec<LocatorSegment>,
467    },
468}
469
470#[derive(Debug, Clone, Deserialize)]
471#[serde(untagged)]
472enum CitationLocatorRepr {
473    Single(LocatorSegment),
474    Compound { segments: Vec<LocatorSegment> },
475}
476
477impl<'de> Deserialize<'de> for CitationLocator {
478    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
479    where
480        D: serde::Deserializer<'de>,
481    {
482        use serde::de::Error;
483
484        match CitationLocatorRepr::deserialize(deserializer)? {
485            CitationLocatorRepr::Single(segment) => Ok(Self::Single(segment)),
486            CitationLocatorRepr::Compound { segments } => {
487                Self::compound(segments).map_err(D::Error::custom)
488            }
489        }
490    }
491}
492
493impl CitationLocator {
494    /// Create a single-segment locator.
495    pub fn single(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
496        Self::Single(LocatorSegment::new(label, value))
497    }
498
499    /// Create a compound locator with two or more segments.
500    ///
501    /// # Errors
502    ///
503    /// Returns an error when fewer than two locator segments are supplied.
504    pub fn compound(segments: Vec<LocatorSegment>) -> Result<Self, &'static str> {
505        if segments.len() < 2 {
506            return Err("compound locators must contain at least two segments");
507        }
508        Ok(Self::Compound { segments })
509    }
510
511    /// Returns the ordered locator segments as a slice.
512    pub fn segments(&self) -> &[LocatorSegment] {
513        match self {
514            Self::Single(segment) => std::slice::from_ref(segment),
515            Self::Compound { segments } => segments.as_slice(),
516        }
517    }
518
519    /// Returns true if this locator contains multiple segments.
520    pub fn is_compound(&self) -> bool {
521        matches!(self, Self::Compound { .. })
522    }
523
524    /// Returns a stable string form used for locator comparison.
525    pub fn canonical_string(&self) -> String {
526        self.segments()
527            .iter()
528            .map(|segment| format!("{}:{}", segment.label.as_key(), segment.value.value_str()))
529            .collect::<Vec<_>>()
530            .join(",")
531    }
532}
533
534#[cfg(feature = "schema")]
535impl JsonSchema for CitationLocator {
536    fn schema_name() -> std::borrow::Cow<'static, str> {
537        "CitationLocator".into()
538    }
539
540    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
541        let single_schema = generator.subschema_for::<LocatorSegment>();
542        let compound_schema = schemars::json_schema!({
543            "type": "object",
544            "properties": {
545                "segments": generator.subschema_for::<Vec<LocatorSegment>>()
546            },
547            "required": ["segments"]
548        });
549        schemars::json_schema!({
550            "oneOf": [single_schema, compound_schema]
551        })
552    }
553}
554
555fn normalize_kind_key(value: &str) -> Option<String> {
556    let mut normalized = String::new();
557    let mut pending_dash = false;
558
559    for ch in value.trim().chars() {
560        if ch.is_ascii_alphanumeric() {
561            if pending_dash && !normalized.is_empty() {
562                normalized.push('-');
563            }
564            normalized.push(ch.to_ascii_lowercase());
565            pending_dash = false;
566        } else if !normalized.is_empty() {
567            pending_dash = true;
568        }
569    }
570
571    if normalized.is_empty() {
572        None
573    } else {
574        Some(normalized)
575    }
576}
577
578/// A single citation item referencing a bibliography entry.
579#[derive(Debug, Clone, Default, Deserialize, Serialize)]
580#[cfg_attr(feature = "schema", derive(JsonSchema))]
581#[cfg_attr(feature = "bindings", derive(Type))]
582#[serde(rename_all = "kebab-case")]
583pub struct CitationItem {
584    /// The reference ID (citekey).
585    pub id: String,
586    /// Canonical locator value for pinpoint citations.
587    #[serde(skip_serializing_if = "Option::is_none")]
588    pub locator: Option<CitationLocator>,
589    /// Prefix text before this item
590    #[serde(skip_serializing_if = "Option::is_none")]
591    pub prefix: Option<String>,
592    /// Suffix text after this item
593    #[serde(skip_serializing_if = "Option::is_none")]
594    pub suffix: Option<String>,
595    /// Explicit integral name-memory state override for this item.
596    #[serde(skip_serializing_if = "Option::is_none")]
597    pub integral_name_state: Option<IntegralNameState>,
598    /// Explicit org-abbreviation state override for this item.
599    #[serde(skip_serializing_if = "Option::is_none")]
600    pub org_abbreviation_state: Option<IntegralNameState>,
601}
602
603impl CitationItem {
604    /// Returns the canonical locator segments when present.
605    pub fn locator_segments(&self) -> Option<&[LocatorSegment]> {
606        self.locator.as_ref().map(CitationLocator::segments)
607    }
608}
609
610/// Normalize a textual locator string into the canonical locator model.
611pub fn normalize_locator_text(
612    locator: &str,
613    aliases: &[(String, LocatorType)],
614) -> Option<CitationLocator> {
615    let locator = locator.trim();
616    if locator.is_empty() {
617        return None;
618    }
619
620    let raw_segments = split_locator_segments(locator, aliases);
621    let segments: Vec<LocatorSegment> = raw_segments
622        .into_iter()
623        .filter_map(|segment| parse_locator_segment(segment, aliases))
624        .collect();
625
626    match segments.len() {
627        0 => None,
628        1 => {
629            let mut it = segments.into_iter();
630            Some(CitationLocator::Single(it.next()?))
631        }
632        _ => CitationLocator::compound(segments).ok(),
633    }
634}
635
636fn split_locator_segments<'a>(locator: &'a str, aliases: &[(String, LocatorType)]) -> Vec<&'a str> {
637    let mut parts = Vec::new();
638    let mut start = 0;
639
640    for (idx, ch) in locator.char_indices() {
641        if ch != ',' {
642            continue;
643        }
644
645        #[allow(
646            clippy::string_slice,
647            reason = "idx is a valid char boundary from char_indices()"
648        )]
649        let candidate = locator[idx + ch.len_utf8()..].trim_start();
650        if begins_with_locator_label(candidate, aliases) {
651            #[allow(
652                clippy::string_slice,
653                reason = "start and idx are valid char boundaries"
654            )]
655            parts.push(locator[start..idx].trim());
656            start = idx + ch.len_utf8();
657        }
658    }
659
660    #[allow(clippy::string_slice, reason = "start is a valid char boundary")]
661    parts.push(locator[start..].trim());
662    parts
663}
664
665fn parse_locator_segment(
666    segment: &str,
667    aliases: &[(String, LocatorType)],
668) -> Option<LocatorSegment> {
669    let segment = segment.trim();
670    if segment.is_empty() {
671        return None;
672    }
673
674    if let Some((label, rest)) = strip_locator_label(segment, aliases) {
675        let value = rest.trim_start_matches(':').trim();
676        if value.is_empty() {
677            return None;
678        }
679        return Some(LocatorSegment::new(label, value));
680    }
681
682    Some(LocatorSegment::new(LocatorType::Page, segment))
683}
684
685fn begins_with_locator_label(segment: &str, aliases: &[(String, LocatorType)]) -> bool {
686    strip_locator_label(segment, aliases).is_some()
687}
688
689fn strip_locator_label<'a>(
690    segment: &'a str,
691    aliases: &[(String, LocatorType)],
692) -> Option<(LocatorType, &'a str)> {
693    let lower = segment.to_lowercase();
694    let mut best: Option<(LocatorType, usize)> = None;
695
696    for (alias, label) in aliases {
697        if let Some(remainder) = lower.strip_prefix(alias)
698            && alias_boundary(remainder)
699        {
700            let alias_len = alias.len();
701            if best
702                .as_ref()
703                .is_none_or(|(_, best_len)| alias_len > *best_len)
704            {
705                best = Some((label.clone(), alias_len));
706            }
707        }
708    }
709
710    best.map(|(label, alias_len)| {
711        #[allow(clippy::string_slice, reason = "alias_len is the length of a prefix")]
712        (label, segment[alias_len..].trim_start())
713    })
714}
715
716fn alias_boundary(remainder: &str) -> bool {
717    remainder.is_empty()
718        || remainder.starts_with(':')
719        || remainder.starts_with('.')
720        || remainder.starts_with(char::is_whitespace)
721}
722
723#[cfg(test)]
724#[allow(
725    clippy::unwrap_used,
726    clippy::expect_used,
727    clippy::panic,
728    clippy::indexing_slicing,
729    clippy::todo,
730    clippy::unimplemented,
731    clippy::unreachable,
732    clippy::get_unwrap,
733    reason = "Panicking is acceptable and often desired in tests."
734)]
735mod tests {
736    use super::*;
737
738    #[test]
739    fn test_citation_deserialization() {
740        let json = r#"
741        {
742            "items": [
743                {
744                    "id": "kuhn1962"
745                }
746            ],
747            "mode": "integral"
748        }
749        "#;
750        let citation: Citation = serde_json::from_str(json).unwrap();
751        assert_eq!(citation.items.len(), 1);
752        assert_eq!(citation.items[0].id, "kuhn1962");
753        assert_eq!(citation.mode, CitationMode::Integral);
754    }
755
756    #[test]
757    fn test_citation_simple_constructor_defaults() {
758        let citation = Citation::simple("kuhn1962");
759
760        assert_eq!(citation.items.len(), 1);
761        assert_eq!(citation.items[0].id, "kuhn1962");
762        assert_eq!(citation.mode, CitationMode::NonIntegral);
763        assert_eq!(citation.position, None);
764        assert!(!citation.suppress_author);
765        assert_eq!(citation.note_number, None);
766        assert_eq!(citation.prefix, None);
767        assert_eq!(citation.suffix, None);
768    }
769
770    #[test]
771    fn test_citation_default_fields_are_omitted_in_serialization() {
772        let citation = Citation::simple("kuhn1962");
773        let json = serde_json::to_value(&citation).unwrap();
774        let object = json.as_object().unwrap();
775
776        assert!(!object.contains_key("mode"));
777        assert!(!object.contains_key("suppress-author"));
778
779        let explicit = Citation {
780            mode: CitationMode::Integral,
781            suppress_author: true,
782            ..citation
783        };
784        let explicit_json = serde_json::to_value(&explicit).unwrap();
785        let explicit_object = explicit_json.as_object().unwrap();
786
787        assert_eq!(explicit_object.get("mode").unwrap(), "integral");
788        assert_eq!(explicit_object.get("suppress-author").unwrap(), true);
789    }
790
791    #[test]
792    fn test_citation_item_with_locator() {
793        let json = r#"
794        {
795            "id": "kuhn1962",
796            "locator": {
797                "label": "page",
798                "value": "42-45"
799            }
800        }
801        "#;
802        let item: CitationItem = serde_json::from_str(json).unwrap();
803        assert_eq!(item.id, "kuhn1962");
804        assert_eq!(
805            item.locator,
806            Some(CitationLocator::single(LocatorType::Page, "42-45"))
807        );
808    }
809
810    #[test]
811    fn test_compound_locator_serde_roundtrip() {
812        let json = r#"
813        {
814            "id": "smith2020",
815            "locator": {
816                "segments": [
817                    { "label": "chapter", "value": "3" },
818                    { "label": "section", "value": "42" }
819                ]
820            }
821        }
822        "#;
823        let item: CitationItem = serde_json::from_str(json).unwrap();
824        let segs = item.locator.as_ref().unwrap().segments();
825        assert_eq!(segs.len(), 2);
826        assert_eq!(segs[0].label, LocatorType::Chapter);
827        assert_eq!(segs[0].value.value_str(), "3");
828        assert_eq!(segs[1].label, LocatorType::Section);
829        assert_eq!(segs[1].value.value_str(), "42");
830
831        // Round-trip
832        let serialized = serde_json::to_string(&item).unwrap();
833        let deserialized: CitationItem = serde_json::from_str(&serialized).unwrap();
834        assert_eq!(deserialized.locator, item.locator);
835    }
836
837    #[test]
838    fn test_compound_locator_rejects_single_segment() {
839        let err = CitationLocator::compound(vec![LocatorSegment::new(LocatorType::Page, "42")])
840            .expect_err("single-segment compound locator must be rejected");
841        assert!(err.contains("at least two"));
842    }
843
844    #[test]
845    fn test_citation_locator_canonical_string_is_stable() {
846        let locator = CitationLocator::compound(vec![
847            LocatorSegment::new(LocatorType::Page, "23"),
848            LocatorSegment::new(LocatorType::Line, "13"),
849        ])
850        .unwrap();
851
852        assert_eq!(locator.canonical_string(), "page:23,line:13");
853    }
854
855    #[test]
856    fn test_custom_locator_type_round_trips_as_plain_string() {
857        let json = r#"
858        {
859            "id": "score2024",
860            "locator": {
861                "label": "Movement",
862                "value": "II"
863            }
864        }
865        "#;
866
867        let item: CitationItem = serde_json::from_str(json).expect("custom locator should parse");
868        let locator = item.locator.expect("custom locator should exist");
869        let segment = &locator.segments()[0];
870
871        assert_eq!(segment.label, LocatorType::Custom("movement".to_string()));
872        let serialized = serde_json::to_value(&CitationItem {
873            id: "score2024".to_string(),
874            locator: Some(locator),
875            ..Default::default()
876        })
877        .expect("custom locator should serialize");
878
879        assert_eq!(serialized["locator"]["label"], "movement");
880    }
881
882    #[test]
883    fn test_custom_locator_type_normalizes_manual_construction() {
884        let locator = LocatorType::Custom("Reel Label".to_string());
885
886        assert_eq!(locator.as_key(), "reel-label");
887        assert_eq!(
888            locator,
889            LocatorType::from_key("reel-label").expect("known custom key should parse")
890        );
891        assert_eq!(
892            serde_json::to_string(&locator).expect("custom locator should serialize"),
893            "\"reel-label\""
894        );
895    }
896
897    #[test]
898    fn test_locator_segments_single() {
899        let item = CitationItem {
900            id: "test".to_string(),
901            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
902            ..Default::default()
903        };
904        let segments = item.locator_segments().unwrap();
905        assert_eq!(segments.len(), 1);
906        assert_eq!(segments[0].label, LocatorType::Page);
907    }
908
909    #[test]
910    fn test_locator_segments_none() {
911        let item = CitationItem {
912            id: "test".to_string(),
913            ..Default::default()
914        };
915        assert!(item.locator_segments().is_none());
916    }
917
918    #[test]
919    fn test_single_locator_serializes_without_segments_wrapper() {
920        let item = CitationItem {
921            id: "test".to_string(),
922            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
923            ..Default::default()
924        };
925        let json = serde_json::to_value(&item).unwrap();
926        let locator = json
927            .as_object()
928            .unwrap()
929            .get("locator")
930            .and_then(serde_json::Value::as_object)
931            .unwrap();
932        assert!(locator.contains_key("label"));
933        assert!(!locator.contains_key("segments"));
934    }
935
936    #[test]
937    fn test_compound_locator_deserialization() {
938        let json = r#"
939        {
940            "id": "smith2020",
941            "locator": {
942                "segments": [
943                    { "label": "page", "value": "23" },
944                    { "label": "line", "value": "13" }
945                ]
946            }
947        }
948        "#;
949        let item: CitationItem = serde_json::from_str(json).unwrap();
950        let segs = item.locator.as_ref().unwrap().segments();
951        assert_eq!(segs.len(), 2);
952        assert_eq!(segs[0].label, LocatorType::Page);
953        assert_eq!(segs[0].value.value_str(), "23");
954        assert_eq!(segs[1].label, LocatorType::Line);
955        assert_eq!(segs[1].value.value_str(), "13");
956    }
957
958    #[test]
959    fn test_locator_value_explicit_plural_override() {
960        let json = r#"
961        {
962            "id": "test",
963            "locator": {
964                "label": "figure",
965                "value": {
966                    "value": "A-3",
967                    "plural": false
968                }
969            }
970        }
971        "#;
972        let item: CitationItem = serde_json::from_str(json).unwrap();
973        let segs = item.locator.as_ref().unwrap().segments();
974        assert_eq!(segs[0].value.value_str(), "A-3");
975        assert!(!segs[0].value.is_plural());
976    }
977
978    #[test]
979    fn test_locator_value_heuristic_plural() {
980        let lv_range = LocatorValue::from("42-45");
981        assert!(lv_range.is_plural());
982
983        let lv_single = LocatorValue::from("42");
984        assert!(!lv_single.is_plural());
985
986        let lv_en_dash = LocatorValue::from("42–45");
987        assert!(lv_en_dash.is_plural());
988
989        let lv_comma = LocatorValue::from("1, 3, 5");
990        assert!(lv_comma.is_plural());
991
992        let lv_ampersand = LocatorValue::from("A & B");
993        assert!(lv_ampersand.is_plural());
994    }
995
996    #[test]
997    fn test_normalize_locator_text_with_explicit_aliases() {
998        let aliases = vec![
999            ("page".to_string(), LocatorType::Page),
1000            ("p.".to_string(), LocatorType::Page),
1001            ("chapter".to_string(), LocatorType::Chapter),
1002            ("ch.".to_string(), LocatorType::Chapter),
1003            ("section".to_string(), LocatorType::Section),
1004            ("§".to_string(), LocatorType::Section),
1005        ];
1006
1007        // Bare number defaults to Page
1008        assert_eq!(
1009            normalize_locator_text("45", &aliases),
1010            Some(CitationLocator::single(LocatorType::Page, "45"))
1011        );
1012
1013        // Explicit label
1014        assert_eq!(
1015            normalize_locator_text("chapter 2", &aliases),
1016            Some(CitationLocator::single(LocatorType::Chapter, "2"))
1017        );
1018
1019        // Abbreviated label
1020        assert_eq!(
1021            normalize_locator_text("ch. 3", &aliases),
1022            Some(CitationLocator::single(LocatorType::Chapter, "3"))
1023        );
1024
1025        // Symbol label
1026        assert_eq!(
1027            normalize_locator_text("§ 4", &aliases),
1028            Some(CitationLocator::single(LocatorType::Section, "4"))
1029        );
1030
1031        // Compound locator
1032        let compound = normalize_locator_text("chapter 2, page 10", &aliases).unwrap();
1033        assert!(compound.is_compound());
1034        let segs = compound.segments();
1035        assert_eq!(segs[0].label, LocatorType::Chapter);
1036        assert_eq!(segs[1].label, LocatorType::Page);
1037
1038        // Empty or invalid input
1039        assert_eq!(normalize_locator_text("", &aliases), None);
1040        assert_eq!(normalize_locator_text("   ", &aliases), None);
1041        assert_eq!(normalize_locator_text("chapter:", &aliases), None);
1042    }
1043
1044    #[test]
1045    fn test_normalize_locator_text_with_abbreviated_aliases() {
1046        let aliases = vec![
1047            ("page".to_string(), LocatorType::Page),
1048            ("pp.".to_string(), LocatorType::Page),
1049            ("vol.".to_string(), LocatorType::Volume),
1050        ];
1051
1052        assert_eq!(
1053            normalize_locator_text("page 45", &aliases),
1054            Some(CitationLocator::single(LocatorType::Page, "45"))
1055        );
1056        assert_eq!(
1057            normalize_locator_text("pp. 10-12", &aliases),
1058            Some(CitationLocator::single(LocatorType::Page, "10-12"))
1059        );
1060        assert_eq!(
1061            normalize_locator_text("vol. 1", &aliases),
1062            Some(CitationLocator::single(LocatorType::Volume, "1"))
1063        );
1064    }
1065}