Skip to main content

citum_schema_data/
citation.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Citation input model for the Citum processor.
7//!
8//! This module defines the structures for representing citations as input
9//! to the processor. Citations reference entries in the bibliography and
10//! can include locators, prefixes, suffixes, and mode information.
11
12#[cfg(feature = "schema")]
13use schemars::JsonSchema;
14use serde::{Deserialize, Deserializer, Serialize, Serializer};
15#[cfg(feature = "bindings")]
16use specta::Type;
17use std::borrow::Cow;
18use std::hash::{Hash, Hasher};
19
20/// A list of citations to process.
21pub type Citations = Vec<Citation>;
22
23/// Citation mode for author-date styles.
24///
25/// Determines how the author name is rendered relative to the citation.
26#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
27#[cfg_attr(feature = "schema", derive(JsonSchema))]
28#[cfg_attr(feature = "bindings", derive(Type))]
29#[serde(rename_all = "kebab-case")]
30pub enum CitationMode {
31    /// Author inline in text: "Smith (2020) argues..."
32    /// Also known as "narrative" or "in-text" citations.
33    Integral,
34    /// Author in parentheses: "(Smith, 2020)"
35    /// The default mode for most citations.
36    #[default]
37    NonIntegral,
38}
39
40/// Explicit integral citation name-memory state for one citation item.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
42#[cfg_attr(feature = "schema", derive(JsonSchema))]
43#[cfg_attr(feature = "bindings", derive(Type))]
44#[serde(rename_all = "kebab-case")]
45pub enum IntegralNameState {
46    /// Render this item as the first integral mention in scope.
47    First,
48    /// Render this item as a subsequent integral mention in scope.
49    Subsequent,
50}
51
52/// Position of a citation in the document flow.
53///
54/// Indicates where this citation appears relative to previous citations
55/// of the same item(s). Used for note-based styles to detect ibid and
56/// subsequent citations, and for author-date styles to apply position-specific
57/// formatting rules (e.g., short forms after first citation).
58#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
59#[cfg_attr(feature = "schema", derive(JsonSchema))]
60#[cfg_attr(feature = "bindings", derive(Type))]
61#[serde(rename_all = "kebab-case")]
62pub enum Position {
63    /// First citation of an item.
64    First,
65    /// Subsequent citation of an item (non-consecutive).
66    Subsequent,
67    /// Same item cited immediately before, no locator on either.
68    Ibid,
69    /// Same item cited immediately before, with different locator.
70    IbidWithLocator,
71}
72
73/// A citation containing one or more references.
74#[derive(Debug, Clone, Default, Deserialize, Serialize)]
75#[cfg_attr(feature = "schema", derive(JsonSchema))]
76#[cfg_attr(feature = "bindings", derive(Type))]
77#[serde(rename_all = "kebab-case")]
78pub struct Citation {
79    /// The citation ID (optional, for tracking).
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub id: Option<String>,
82    /// Note number for footnote/endnote styles.
83    /// Assigned by the document processor, not the citation processor.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub note_number: Option<u32>,
86    /// Citation mode: integral (narrative) vs non-integral (parenthetical).
87    /// Only relevant for author-date styles.
88    #[serde(default, skip_serializing_if = "is_default_mode")]
89    pub mode: CitationMode,
90    /// Position of this citation in the document flow.
91    /// Detected automatically by the processor or set explicitly by the caller.
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub position: Option<Position>,
94    /// Suppress the author name across all items in this citation.
95    /// Used when the author is already named in the prose: "Smith argues (2020)".
96    /// Applies uniformly to all items — per-item suppression is not supported
97    /// because mixed-visibility citations are typographically incoherent.
98    #[serde(default, skip_serializing_if = "is_false")]
99    pub suppress_author: bool,
100    /// Prefix text before all citation items.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub prefix: Option<String>,
103    /// Suffix text after all citation items.
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub suffix: Option<String>,
106    /// The citation items (references being cited).
107    pub items: Vec<CitationItem>,
108    /// If true, the entire citation is a single dynamic compound set.
109    ///
110    /// The first item acts as the head and subsequent items are merged as tails
111    /// in the bibliography. Ignored for non-numeric (compound-numeric) styles.
112    /// Item order is preserved and sorting is suppressed when this flag is set.
113    #[serde(default, skip_serializing_if = "is_false")]
114    pub grouped: bool,
115    /// Signal that this citation cluster opens a sentence, so its leading
116    /// character should be capitalized (e.g. "see also …" → "See also …").
117    ///
118    /// The processor cannot infer sentence context from rendered text; the host
119    /// (document pipeline, WASM bridge, or editor) supplies this flag
120    /// explicitly — mirroring LaTeX's capitalized cite commands (`\Citet`,
121    /// `\Parencite`, `\Textcite`). When `false` (the default), no
122    /// capitalization transform is applied.
123    #[serde(default, skip_serializing_if = "is_false")]
124    pub sentence_start: bool,
125}
126
127impl Citation {
128    /// Create a simple single-item citation.
129    ///
130    /// Convenience constructor for a citation with a single reference ID and default settings.
131    pub fn simple(id: &str) -> Self {
132        Self {
133            items: vec![CitationItem {
134                id: id.to_string(),
135                ..Default::default()
136            }],
137            ..Default::default()
138        }
139    }
140}
141
142/// Helper for skip_serializing_if on mode field.
143fn is_default_mode(mode: &CitationMode) -> bool {
144    *mode == CitationMode::NonIntegral
145}
146
147/// Helper for skip_serializing_if on bool fields that default to false.
148fn is_false(b: &bool) -> bool {
149    !b
150}
151
152/// Locator types for pinpoint citations.
153#[derive(Debug, Clone, Default)]
154#[cfg_attr(feature = "bindings", derive(Type))]
155pub enum LocatorType {
156    /// Locator refers to a book within a larger work.
157    Book,
158    /// Locator refers to a chapter.
159    Chapter,
160    /// Locator refers to a clause.
161    Clause,
162    /// Locator refers to a column.
163    Column,
164    /// Locator refers to a corollary.
165    Corollary,
166    /// Locator refers to a definition.
167    Definition,
168    /// Locator refers to a division.
169    Division,
170    /// Locator refers to a figure.
171    Figure,
172    /// Locator refers to a folio.
173    Folio,
174    /// Locator refers to a numbered line.
175    Line,
176    /// Locator refers to a lemma.
177    Lemma,
178    /// Locator refers to a note.
179    Note,
180    /// Locator refers to a numbered unit.
181    Number,
182    /// Locator refers to an opus number.
183    Opus,
184    #[default]
185    /// Locator refers to a page.
186    Page,
187    /// Locator refers to a paragraph.
188    Paragraph,
189    /// Locator refers to a sub-paragraph.
190    Subparagraph,
191    /// Locator refers to a sub-clause.
192    Subclause,
193    /// Locator refers to a sub-division.
194    Subdivision,
195    /// Locator refers to a sub-section.
196    Subsection,
197    /// Locator refers to a part or division.
198    Part,
199    /// Locator refers to a problem.
200    Problem,
201    /// Locator refers to a proposition.
202    Proposition,
203    /// Locator refers to a recital.
204    Recital,
205    /// Locator refers to a schedule.
206    Schedule,
207    /// Locator refers to a section.
208    Section,
209    /// Locator refers to a surah.
210    Surah,
211    /// Locator refers to a theorem.
212    Theorem,
213    /// Locator refers to an entry under a headword.
214    SubVerbo,
215    /// Locator refers to a supplement.
216    Supplement,
217    /// Locator refers to a verse.
218    Verse,
219    /// Locator refers to a volume.
220    Volume,
221    /// Locator refers to a periodical volume.
222    VolumePeriodical,
223    /// Locator refers to a monograph volume.
224    VolumeBook,
225    /// Locator refers to an issue.
226    Issue,
227    /// Locator refers to an algorithm.
228    Algorithm,
229    /// Locator refers to a custom pinpoint label.
230    Custom(String),
231}
232
233impl LocatorType {
234    /// Return the canonical kebab-case key for this locator label.
235    #[must_use]
236    pub fn as_key(&self) -> Cow<'_, str> {
237        match self {
238            Self::Book => Cow::Borrowed("book"),
239            Self::Chapter => Cow::Borrowed("chapter"),
240            Self::Clause => Cow::Borrowed("clause"),
241            Self::Column => Cow::Borrowed("column"),
242            Self::Corollary => Cow::Borrowed("corollary"),
243            Self::Definition => Cow::Borrowed("definition"),
244            Self::Division => Cow::Borrowed("division"),
245            Self::Figure => Cow::Borrowed("figure"),
246            Self::Folio => Cow::Borrowed("folio"),
247            Self::Line => Cow::Borrowed("line"),
248            Self::Lemma => Cow::Borrowed("lemma"),
249            Self::Note => Cow::Borrowed("note"),
250            Self::Number => Cow::Borrowed("number"),
251            Self::Opus => Cow::Borrowed("opus"),
252            Self::Page => Cow::Borrowed("page"),
253            Self::Paragraph => Cow::Borrowed("paragraph"),
254            Self::Subparagraph => Cow::Borrowed("subparagraph"),
255            Self::Subclause => Cow::Borrowed("subclause"),
256            Self::Subdivision => Cow::Borrowed("subdivision"),
257            Self::Subsection => Cow::Borrowed("subsection"),
258            Self::Part => Cow::Borrowed("part"),
259            Self::Problem => Cow::Borrowed("problem"),
260            Self::Proposition => Cow::Borrowed("proposition"),
261            Self::Recital => Cow::Borrowed("recital"),
262            Self::Schedule => Cow::Borrowed("schedule"),
263            Self::Section => Cow::Borrowed("section"),
264            Self::Surah => Cow::Borrowed("surah"),
265            Self::Theorem => Cow::Borrowed("theorem"),
266            Self::SubVerbo => Cow::Borrowed("sub-verbo"),
267            Self::Supplement => Cow::Borrowed("supplement"),
268            Self::Verse => Cow::Borrowed("verse"),
269            Self::Volume => Cow::Borrowed("volume"),
270            Self::VolumePeriodical => Cow::Borrowed("volume-periodical"),
271            Self::VolumeBook => Cow::Borrowed("volume-book"),
272            Self::Issue => Cow::Borrowed("issue"),
273            Self::Algorithm => Cow::Borrowed("algorithm"),
274            Self::Custom(value) => normalize_kind_key(value)
275                .map(Cow::Owned)
276                .unwrap_or_else(|| Cow::Borrowed(value.as_str())),
277        }
278    }
279
280    /// Parse a locator label from a known keyword or custom identifier.
281    ///
282    /// # Errors
283    ///
284    /// Returns an error when the input is empty or normalizes to an empty key.
285    pub fn from_key(value: &str) -> Result<Self, String> {
286        let canonical = normalize_kind_key(value)
287            .ok_or_else(|| "locator label must not be empty".to_string())?;
288        Ok(match canonical.as_str() {
289            "algorithm" => Self::Algorithm,
290            "book" => Self::Book,
291            "chapter" => Self::Chapter,
292            "clause" => Self::Clause,
293            "column" => Self::Column,
294            "corollary" => Self::Corollary,
295            "definition" => Self::Definition,
296            "division" => Self::Division,
297            "figure" => Self::Figure,
298            "folio" => Self::Folio,
299            "line" => Self::Line,
300            "lemma" => Self::Lemma,
301            "note" => Self::Note,
302            "number" => Self::Number,
303            "opus" => Self::Opus,
304            "page" => Self::Page,
305            "paragraph" => Self::Paragraph,
306            "part" => Self::Part,
307            "problem" => Self::Problem,
308            "proposition" => Self::Proposition,
309            "recital" => Self::Recital,
310            "schedule" => Self::Schedule,
311            "section" => Self::Section,
312            "subclause" => Self::Subclause,
313            "subdivision" => Self::Subdivision,
314            "subparagraph" => Self::Subparagraph,
315            "subsection" => Self::Subsection,
316            "sub-verbo" => Self::SubVerbo,
317            "supplement" => Self::Supplement,
318            "surah" => Self::Surah,
319            "theorem" => Self::Theorem,
320            "verse" => Self::Verse,
321            "volume" => Self::Volume,
322            "volume-book" => Self::VolumeBook,
323            "volume-periodical" => Self::VolumePeriodical,
324            "issue" => Self::Issue,
325            _ => Self::Custom(canonical),
326        })
327    }
328}
329
330impl PartialEq for LocatorType {
331    fn eq(&self, other: &Self) -> bool {
332        self.as_key().as_ref() == other.as_key().as_ref()
333    }
334}
335
336impl Eq for LocatorType {}
337
338impl Hash for LocatorType {
339    fn hash<H: Hasher>(&self, state: &mut H) {
340        self.as_key().as_ref().hash(state);
341    }
342}
343
344impl Serialize for LocatorType {
345    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
346    where
347        S: Serializer,
348    {
349        serializer.serialize_str(self.as_key().as_ref())
350    }
351}
352
353impl<'de> Deserialize<'de> for LocatorType {
354    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
355    where
356        D: Deserializer<'de>,
357    {
358        let value = String::deserialize(deserializer)?;
359        Self::from_key(&value).map_err(serde::de::Error::custom)
360    }
361}
362
363#[cfg(feature = "schema")]
364impl JsonSchema for LocatorType {
365    fn schema_name() -> std::borrow::Cow<'static, str> {
366        "LocatorType".into()
367    }
368
369    fn json_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
370        schemars::json_schema!({
371            "type": "string",
372            "description": "Known locator label keyword or custom kebab-case identifier."
373        })
374    }
375}
376
377/// A locator value that supports both plain strings and explicit plurality.
378///
379/// Plain strings use heuristic plural detection (checking for `-`, `–`, `,`, `&`).
380/// Use the explicit form to override when the heuristic fails (e.g., "figure A-3"
381/// should be singular despite containing a hyphen).
382#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
383#[cfg_attr(feature = "schema", derive(JsonSchema))]
384#[cfg_attr(feature = "bindings", derive(Type))]
385#[serde(untagged)]
386pub enum LocatorValue {
387    /// Plain string value with heuristic plural detection.
388    Text(String),
389    /// Explicit value with manual plural override.
390    Explicit {
391        /// The locator value string.
392        value: String,
393        /// Whether this locator is plural.
394        plural: bool,
395    },
396}
397
398impl LocatorValue {
399    /// Returns the raw value string.
400    pub fn value_str(&self) -> &str {
401        match self {
402            LocatorValue::Text(s) => s,
403            LocatorValue::Explicit { value, .. } => value,
404        }
405    }
406
407    /// Returns whether this locator value is plural.
408    ///
409    /// For `Text`, uses the heuristic (contains `-`, `–`, `,`, or `&`).
410    /// For `Explicit`, returns the specified `plural` field.
411    pub fn is_plural(&self) -> bool {
412        match self {
413            LocatorValue::Text(s) => {
414                s.contains('\u{2013}') || s.contains('-') || s.contains(',') || s.contains('&')
415            }
416            LocatorValue::Explicit { plural, .. } => *plural,
417        }
418    }
419}
420
421impl Default for LocatorValue {
422    fn default() -> Self {
423        LocatorValue::Text(String::new())
424    }
425}
426
427impl From<String> for LocatorValue {
428    fn from(s: String) -> Self {
429        LocatorValue::Text(s)
430    }
431}
432
433impl From<&str> for LocatorValue {
434    fn from(s: &str) -> Self {
435        LocatorValue::Text(s.to_string())
436    }
437}
438
439/// A single segment of a compound locator.
440///
441/// Pairs a locator type with its value, e.g. `{ label: chapter, value: "3" }`.
442#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
443#[cfg_attr(feature = "schema", derive(JsonSchema))]
444#[cfg_attr(feature = "bindings", derive(Type))]
445#[serde(rename_all = "kebab-case")]
446pub struct LocatorSegment {
447    /// The locator type for this segment.
448    pub label: LocatorType,
449    /// The locator value (e.g., "3", "42-45").
450    pub value: LocatorValue,
451}
452
453impl LocatorSegment {
454    /// Create a locator segment from a canonical label and value.
455    pub fn new(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
456        Self {
457            label,
458            value: value.into(),
459        }
460    }
461}
462
463/// A canonical citation locator.
464///
465/// Simple locators use the single-segment form, while compound locators use
466/// the explicit `segments` wrapper.
467#[derive(Debug, Clone, Serialize, PartialEq)]
468#[cfg_attr(feature = "bindings", derive(Type))]
469#[serde(untagged)]
470pub enum CitationLocator {
471    /// A single labeled locator.
472    Single(LocatorSegment),
473    /// Multiple ordered locator segments.
474    Compound {
475        /// Ordered locator segments.
476        segments: Vec<LocatorSegment>,
477    },
478}
479
480#[derive(Debug, Clone, Deserialize)]
481#[serde(untagged)]
482enum CitationLocatorRepr {
483    Single(LocatorSegment),
484    Compound { segments: Vec<LocatorSegment> },
485}
486
487impl<'de> Deserialize<'de> for CitationLocator {
488    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
489    where
490        D: serde::Deserializer<'de>,
491    {
492        use serde::de::Error;
493
494        match CitationLocatorRepr::deserialize(deserializer)? {
495            CitationLocatorRepr::Single(segment) => Ok(Self::Single(segment)),
496            CitationLocatorRepr::Compound { segments } => {
497                Self::compound(segments).map_err(D::Error::custom)
498            }
499        }
500    }
501}
502
503impl CitationLocator {
504    /// Create a single-segment locator.
505    pub fn single(label: LocatorType, value: impl Into<LocatorValue>) -> Self {
506        Self::Single(LocatorSegment::new(label, value))
507    }
508
509    /// Create a compound locator with two or more segments.
510    ///
511    /// # Errors
512    ///
513    /// Returns an error when fewer than two locator segments are supplied.
514    pub fn compound(segments: Vec<LocatorSegment>) -> Result<Self, &'static str> {
515        if segments.len() < 2 {
516            return Err("compound locators must contain at least two segments");
517        }
518        Ok(Self::Compound { segments })
519    }
520
521    /// Returns the ordered locator segments as a slice.
522    pub fn segments(&self) -> &[LocatorSegment] {
523        match self {
524            Self::Single(segment) => std::slice::from_ref(segment),
525            Self::Compound { segments } => segments.as_slice(),
526        }
527    }
528
529    /// Returns true if this locator contains multiple segments.
530    pub fn is_compound(&self) -> bool {
531        matches!(self, Self::Compound { .. })
532    }
533
534    /// Returns a stable string form used for locator comparison.
535    pub fn canonical_string(&self) -> String {
536        self.segments()
537            .iter()
538            .map(|segment| format!("{}:{}", segment.label.as_key(), segment.value.value_str()))
539            .collect::<Vec<_>>()
540            .join(",")
541    }
542}
543
544#[cfg(feature = "schema")]
545impl JsonSchema for CitationLocator {
546    fn schema_name() -> std::borrow::Cow<'static, str> {
547        "CitationLocator".into()
548    }
549
550    fn json_schema(generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
551        let single_schema = generator.subschema_for::<LocatorSegment>();
552        let compound_schema = schemars::json_schema!({
553            "type": "object",
554            "properties": {
555                "segments": generator.subschema_for::<Vec<LocatorSegment>>()
556            },
557            "required": ["segments"]
558        });
559        schemars::json_schema!({
560            "oneOf": [single_schema, compound_schema]
561        })
562    }
563}
564
565fn normalize_kind_key(value: &str) -> Option<String> {
566    let mut normalized = String::new();
567    let mut pending_dash = false;
568
569    for ch in value.trim().chars() {
570        if ch.is_ascii_alphanumeric() {
571            if pending_dash && !normalized.is_empty() {
572                normalized.push('-');
573            }
574            normalized.push(ch.to_ascii_lowercase());
575            pending_dash = false;
576        } else if !normalized.is_empty() {
577            pending_dash = true;
578        }
579    }
580
581    if normalized.is_empty() {
582        None
583    } else {
584        Some(normalized)
585    }
586}
587
588/// A single citation item referencing a bibliography entry.
589#[derive(Debug, Clone, Default, Deserialize, Serialize)]
590#[cfg_attr(feature = "schema", derive(JsonSchema))]
591#[cfg_attr(feature = "bindings", derive(Type))]
592#[serde(rename_all = "kebab-case")]
593pub struct CitationItem {
594    /// The reference ID (citekey).
595    pub id: String,
596    /// Canonical locator value for pinpoint citations.
597    #[serde(skip_serializing_if = "Option::is_none")]
598    pub locator: Option<CitationLocator>,
599    /// Prefix text before this item
600    #[serde(skip_serializing_if = "Option::is_none")]
601    pub prefix: Option<String>,
602    /// Suffix text after this item
603    #[serde(skip_serializing_if = "Option::is_none")]
604    pub suffix: Option<String>,
605    /// Explicit integral name-memory state override for this item.
606    #[serde(skip_serializing_if = "Option::is_none")]
607    pub integral_name_state: Option<IntegralNameState>,
608    /// Explicit org-abbreviation state override for this item.
609    #[serde(skip_serializing_if = "Option::is_none")]
610    pub org_abbreviation_state: Option<IntegralNameState>,
611}
612
613impl CitationItem {
614    /// Returns the canonical locator segments when present.
615    pub fn locator_segments(&self) -> Option<&[LocatorSegment]> {
616        self.locator.as_ref().map(CitationLocator::segments)
617    }
618}
619
620/// Normalize a textual locator string into the canonical locator model.
621pub fn normalize_locator_text(
622    locator: &str,
623    aliases: &[(String, LocatorType)],
624) -> Option<CitationLocator> {
625    let locator = locator.trim();
626    if locator.is_empty() {
627        return None;
628    }
629
630    let raw_segments = split_locator_segments(locator, aliases);
631    let segments: Vec<LocatorSegment> = raw_segments
632        .into_iter()
633        .filter_map(|segment| parse_locator_segment(segment, aliases))
634        .collect();
635
636    match segments.len() {
637        0 => None,
638        1 => {
639            let mut it = segments.into_iter();
640            Some(CitationLocator::Single(it.next()?))
641        }
642        _ => CitationLocator::compound(segments).ok(),
643    }
644}
645
646fn split_locator_segments<'a>(locator: &'a str, aliases: &[(String, LocatorType)]) -> Vec<&'a str> {
647    let mut parts = Vec::new();
648    let mut start = 0;
649
650    for (idx, ch) in locator.char_indices() {
651        if ch != ',' {
652            continue;
653        }
654
655        #[allow(
656            clippy::string_slice,
657            reason = "idx is a valid char boundary from char_indices()"
658        )]
659        let candidate = locator[idx + ch.len_utf8()..].trim_start();
660        if begins_with_locator_label(candidate, aliases) {
661            #[allow(
662                clippy::string_slice,
663                reason = "start and idx are valid char boundaries"
664            )]
665            parts.push(locator[start..idx].trim());
666            start = idx + ch.len_utf8();
667        }
668    }
669
670    #[allow(clippy::string_slice, reason = "start is a valid char boundary")]
671    parts.push(locator[start..].trim());
672    parts
673}
674
675fn parse_locator_segment(
676    segment: &str,
677    aliases: &[(String, LocatorType)],
678) -> Option<LocatorSegment> {
679    let segment = segment.trim();
680    if segment.is_empty() {
681        return None;
682    }
683
684    if let Some((label, rest)) = strip_locator_label(segment, aliases) {
685        let value = rest.trim_start_matches(':').trim();
686        if value.is_empty() {
687            return None;
688        }
689        return Some(LocatorSegment::new(label, value));
690    }
691
692    Some(LocatorSegment::new(LocatorType::Page, segment))
693}
694
695fn begins_with_locator_label(segment: &str, aliases: &[(String, LocatorType)]) -> bool {
696    strip_locator_label(segment, aliases).is_some()
697}
698
699fn strip_locator_label<'a>(
700    segment: &'a str,
701    aliases: &[(String, LocatorType)],
702) -> Option<(LocatorType, &'a str)> {
703    let lower = segment.to_lowercase();
704    let mut best: Option<(LocatorType, usize)> = None;
705
706    for (alias, label) in aliases {
707        if let Some(remainder) = lower.strip_prefix(alias)
708            && alias_boundary(remainder)
709        {
710            let alias_len = alias.len();
711            if best
712                .as_ref()
713                .is_none_or(|(_, best_len)| alias_len > *best_len)
714            {
715                best = Some((label.clone(), alias_len));
716            }
717        }
718    }
719
720    best.map(|(label, alias_len)| {
721        #[allow(clippy::string_slice, reason = "alias_len is the length of a prefix")]
722        (label, segment[alias_len..].trim_start())
723    })
724}
725
726fn alias_boundary(remainder: &str) -> bool {
727    remainder.is_empty()
728        || remainder.starts_with(':')
729        || remainder.starts_with('.')
730        || remainder.starts_with(char::is_whitespace)
731}
732
733#[cfg(test)]
734#[allow(
735    clippy::unwrap_used,
736    clippy::expect_used,
737    clippy::panic,
738    clippy::indexing_slicing,
739    clippy::todo,
740    clippy::unimplemented,
741    clippy::unreachable,
742    clippy::get_unwrap,
743    reason = "Panicking is acceptable and often desired in tests."
744)]
745mod tests {
746    use super::*;
747
748    #[test]
749    fn test_citation_deserialization() {
750        let json = r#"
751        {
752            "items": [
753                {
754                    "id": "kuhn1962"
755                }
756            ],
757            "mode": "integral"
758        }
759        "#;
760        let citation: Citation = serde_json::from_str(json).unwrap();
761        assert_eq!(citation.items.len(), 1);
762        assert_eq!(citation.items[0].id, "kuhn1962");
763        assert_eq!(citation.mode, CitationMode::Integral);
764    }
765
766    #[test]
767    fn test_citation_simple_constructor_defaults() {
768        let citation = Citation::simple("kuhn1962");
769
770        assert_eq!(citation.items.len(), 1);
771        assert_eq!(citation.items[0].id, "kuhn1962");
772        assert_eq!(citation.mode, CitationMode::NonIntegral);
773        assert_eq!(citation.position, None);
774        assert!(!citation.suppress_author);
775        assert_eq!(citation.note_number, None);
776        assert_eq!(citation.prefix, None);
777        assert_eq!(citation.suffix, None);
778    }
779
780    #[test]
781    fn test_citation_default_fields_are_omitted_in_serialization() {
782        let citation = Citation::simple("kuhn1962");
783        let json = serde_json::to_value(&citation).unwrap();
784        let object = json.as_object().unwrap();
785
786        assert!(!object.contains_key("mode"));
787        assert!(!object.contains_key("suppress-author"));
788
789        let explicit = Citation {
790            mode: CitationMode::Integral,
791            suppress_author: true,
792            ..citation
793        };
794        let explicit_json = serde_json::to_value(&explicit).unwrap();
795        let explicit_object = explicit_json.as_object().unwrap();
796
797        assert_eq!(explicit_object.get("mode").unwrap(), "integral");
798        assert_eq!(explicit_object.get("suppress-author").unwrap(), true);
799    }
800
801    #[test]
802    fn test_citation_item_with_locator() {
803        let json = r#"
804        {
805            "id": "kuhn1962",
806            "locator": {
807                "label": "page",
808                "value": "42-45"
809            }
810        }
811        "#;
812        let item: CitationItem = serde_json::from_str(json).unwrap();
813        assert_eq!(item.id, "kuhn1962");
814        assert_eq!(
815            item.locator,
816            Some(CitationLocator::single(LocatorType::Page, "42-45"))
817        );
818    }
819
820    #[test]
821    fn test_compound_locator_serde_roundtrip() {
822        let json = r#"
823        {
824            "id": "smith2020",
825            "locator": {
826                "segments": [
827                    { "label": "chapter", "value": "3" },
828                    { "label": "section", "value": "42" }
829                ]
830            }
831        }
832        "#;
833        let item: CitationItem = serde_json::from_str(json).unwrap();
834        let segs = item.locator.as_ref().unwrap().segments();
835        assert_eq!(segs.len(), 2);
836        assert_eq!(segs[0].label, LocatorType::Chapter);
837        assert_eq!(segs[0].value.value_str(), "3");
838        assert_eq!(segs[1].label, LocatorType::Section);
839        assert_eq!(segs[1].value.value_str(), "42");
840
841        // Round-trip
842        let serialized = serde_json::to_string(&item).unwrap();
843        let deserialized: CitationItem = serde_json::from_str(&serialized).unwrap();
844        assert_eq!(deserialized.locator, item.locator);
845    }
846
847    #[test]
848    fn test_compound_locator_rejects_single_segment() {
849        let err = CitationLocator::compound(vec![LocatorSegment::new(LocatorType::Page, "42")])
850            .expect_err("single-segment compound locator must be rejected");
851        assert!(err.contains("at least two"));
852    }
853
854    #[test]
855    fn test_citation_locator_canonical_string_is_stable() {
856        let locator = CitationLocator::compound(vec![
857            LocatorSegment::new(LocatorType::Page, "23"),
858            LocatorSegment::new(LocatorType::Line, "13"),
859        ])
860        .unwrap();
861
862        assert_eq!(locator.canonical_string(), "page:23,line:13");
863    }
864
865    #[test]
866    fn test_custom_locator_type_round_trips_as_plain_string() {
867        let json = r#"
868        {
869            "id": "score2024",
870            "locator": {
871                "label": "Movement",
872                "value": "II"
873            }
874        }
875        "#;
876
877        let item: CitationItem = serde_json::from_str(json).expect("custom locator should parse");
878        let locator = item.locator.expect("custom locator should exist");
879        let segment = &locator.segments()[0];
880
881        assert_eq!(segment.label, LocatorType::Custom("movement".to_string()));
882        let serialized = serde_json::to_value(&CitationItem {
883            id: "score2024".to_string(),
884            locator: Some(locator),
885            ..Default::default()
886        })
887        .expect("custom locator should serialize");
888
889        assert_eq!(serialized["locator"]["label"], "movement");
890    }
891
892    #[test]
893    fn test_custom_locator_type_normalizes_manual_construction() {
894        let locator = LocatorType::Custom("Reel Label".to_string());
895
896        assert_eq!(locator.as_key(), "reel-label");
897        assert_eq!(
898            locator,
899            LocatorType::from_key("reel-label").expect("known custom key should parse")
900        );
901        assert_eq!(
902            serde_json::to_string(&locator).expect("custom locator should serialize"),
903            "\"reel-label\""
904        );
905    }
906
907    #[test]
908    fn test_locator_segments_single() {
909        let item = CitationItem {
910            id: "test".to_string(),
911            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
912            ..Default::default()
913        };
914        let segments = item.locator_segments().unwrap();
915        assert_eq!(segments.len(), 1);
916        assert_eq!(segments[0].label, LocatorType::Page);
917    }
918
919    #[test]
920    fn test_locator_segments_none() {
921        let item = CitationItem {
922            id: "test".to_string(),
923            ..Default::default()
924        };
925        assert!(item.locator_segments().is_none());
926    }
927
928    #[test]
929    fn test_single_locator_serializes_without_segments_wrapper() {
930        let item = CitationItem {
931            id: "test".to_string(),
932            locator: Some(CitationLocator::single(LocatorType::Page, "42")),
933            ..Default::default()
934        };
935        let json = serde_json::to_value(&item).unwrap();
936        let locator = json
937            .as_object()
938            .unwrap()
939            .get("locator")
940            .and_then(serde_json::Value::as_object)
941            .unwrap();
942        assert!(locator.contains_key("label"));
943        assert!(!locator.contains_key("segments"));
944    }
945
946    #[test]
947    fn test_compound_locator_deserialization() {
948        let json = r#"
949        {
950            "id": "smith2020",
951            "locator": {
952                "segments": [
953                    { "label": "page", "value": "23" },
954                    { "label": "line", "value": "13" }
955                ]
956            }
957        }
958        "#;
959        let item: CitationItem = serde_json::from_str(json).unwrap();
960        let segs = item.locator.as_ref().unwrap().segments();
961        assert_eq!(segs.len(), 2);
962        assert_eq!(segs[0].label, LocatorType::Page);
963        assert_eq!(segs[0].value.value_str(), "23");
964        assert_eq!(segs[1].label, LocatorType::Line);
965        assert_eq!(segs[1].value.value_str(), "13");
966    }
967
968    #[test]
969    fn test_locator_value_explicit_plural_override() {
970        let json = r#"
971        {
972            "id": "test",
973            "locator": {
974                "label": "figure",
975                "value": {
976                    "value": "A-3",
977                    "plural": false
978                }
979            }
980        }
981        "#;
982        let item: CitationItem = serde_json::from_str(json).unwrap();
983        let segs = item.locator.as_ref().unwrap().segments();
984        assert_eq!(segs[0].value.value_str(), "A-3");
985        assert!(!segs[0].value.is_plural());
986    }
987
988    #[test]
989    fn test_locator_value_heuristic_plural() {
990        let lv_range = LocatorValue::from("42-45");
991        assert!(lv_range.is_plural());
992
993        let lv_single = LocatorValue::from("42");
994        assert!(!lv_single.is_plural());
995
996        let lv_en_dash = LocatorValue::from("42–45");
997        assert!(lv_en_dash.is_plural());
998
999        let lv_comma = LocatorValue::from("1, 3, 5");
1000        assert!(lv_comma.is_plural());
1001
1002        let lv_ampersand = LocatorValue::from("A & B");
1003        assert!(lv_ampersand.is_plural());
1004    }
1005
1006    #[test]
1007    fn test_normalize_locator_text_with_explicit_aliases() {
1008        let aliases = vec![
1009            ("page".to_string(), LocatorType::Page),
1010            ("p.".to_string(), LocatorType::Page),
1011            ("chapter".to_string(), LocatorType::Chapter),
1012            ("ch.".to_string(), LocatorType::Chapter),
1013            ("section".to_string(), LocatorType::Section),
1014            ("§".to_string(), LocatorType::Section),
1015        ];
1016
1017        // Bare number defaults to Page
1018        assert_eq!(
1019            normalize_locator_text("45", &aliases),
1020            Some(CitationLocator::single(LocatorType::Page, "45"))
1021        );
1022
1023        // Explicit label
1024        assert_eq!(
1025            normalize_locator_text("chapter 2", &aliases),
1026            Some(CitationLocator::single(LocatorType::Chapter, "2"))
1027        );
1028
1029        // Abbreviated label
1030        assert_eq!(
1031            normalize_locator_text("ch. 3", &aliases),
1032            Some(CitationLocator::single(LocatorType::Chapter, "3"))
1033        );
1034
1035        // Symbol label
1036        assert_eq!(
1037            normalize_locator_text("§ 4", &aliases),
1038            Some(CitationLocator::single(LocatorType::Section, "4"))
1039        );
1040
1041        // Compound locator
1042        let compound = normalize_locator_text("chapter 2, page 10", &aliases).unwrap();
1043        assert!(compound.is_compound());
1044        let segs = compound.segments();
1045        assert_eq!(segs[0].label, LocatorType::Chapter);
1046        assert_eq!(segs[1].label, LocatorType::Page);
1047
1048        // Empty or invalid input
1049        assert_eq!(normalize_locator_text("", &aliases), None);
1050        assert_eq!(normalize_locator_text("   ", &aliases), None);
1051        assert_eq!(normalize_locator_text("chapter:", &aliases), None);
1052    }
1053
1054    #[test]
1055    fn test_normalize_locator_text_with_abbreviated_aliases() {
1056        let aliases = vec![
1057            ("page".to_string(), LocatorType::Page),
1058            ("pp.".to_string(), LocatorType::Page),
1059            ("vol.".to_string(), LocatorType::Volume),
1060        ];
1061
1062        assert_eq!(
1063            normalize_locator_text("page 45", &aliases),
1064            Some(CitationLocator::single(LocatorType::Page, "45"))
1065        );
1066        assert_eq!(
1067            normalize_locator_text("pp. 10-12", &aliases),
1068            Some(CitationLocator::single(LocatorType::Page, "10-12"))
1069        );
1070        assert_eq!(
1071            normalize_locator_text("vol. 1", &aliases),
1072            Some(CitationLocator::single(LocatorType::Volume, "1"))
1073        );
1074    }
1075}