Skip to main content

dynomite/proto/memcache/
parser.rs

1//! Memcached text-protocol parser.
2//!
3//! The parser is a single byte-driven state machine for requests
4//! and another for responses, faithfully reproducing the state set
5//! and transitions of `memcache_parse_req` and `memcache_parse_rsp`
6//! in the reference engine. It mutates a [`Msg`] in place: the
7//! command tag, key list, value-length accumulator, and parser
8//! cursor are written back so the streaming caller can resume the
9//! machine on more bytes without re-allocating state.
10//!
11//! The parsers MUST NOT panic on any input. Invalid bytes are
12//! reported via [`MsgParseResult::Error`].
13
14// The parser truncates ASCII-decimal accumulators into fixed-width
15// counters that match the reference engine (`uint32_t` for vlen,
16// `usize` for cursor offsets). The allowance keeps the Rust port
17// faithful to the C casts.
18#![allow(clippy::cast_possible_truncation)]
19#![allow(clippy::too_many_arguments)]
20#![allow(clippy::match_same_arms)]
21#![allow(clippy::needless_continue)]
22// The state machine deliberately keeps the C `if (token == NULL)`
23// guard pattern; rewriting as `let-else` collapses two branches
24// the reference engine treats independently.
25#![allow(clippy::manual_let_else)]
26#![allow(clippy::redundant_else)]
27
28use super::commands::{
29    memcache_arithmetic, memcache_cas, memcache_delete, memcache_retrieval, memcache_storage,
30    memcache_touch,
31};
32use crate::msg::{KeyPos, Msg, MsgParseResult, MsgType};
33
34/// Maximum allowed Memcached key length in bytes.
35pub const MEMCACHE_MAX_KEY_LENGTH: usize = 250;
36
37/// Optional hash tag delimiters. When set, parsed keys carry the
38/// inner range between the delimiters as the routing tag.
39#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
40pub struct HashTag {
41    /// Opening byte of the hash tag.
42    pub open: u8,
43    /// Closing byte of the hash tag.
44    pub close: u8,
45}
46
47/// State alphabet for [`memcache_parse_req`].
48///
49/// The numeric values match the reference engine's request state
50/// indices so external parity tooling can compare them directly.
51#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
52#[repr(u32)]
53pub enum ReqState {
54    /// Initial state.
55    #[default]
56    Start = 0,
57    /// Reading the command keyword.
58    ReqType = 1,
59    /// Skipping spaces before the first key.
60    SpacesBeforeKey = 2,
61    /// Reading a key.
62    Key = 3,
63    /// Skipping spaces between keys (`get`/`gets`).
64    SpacesBeforeKeys = 4,
65    /// Skipping spaces before the storage flags field.
66    SpacesBeforeFlags = 5,
67    /// Reading the storage flags field.
68    Flags = 6,
69    /// Skipping spaces before the storage expiry field.
70    SpacesBeforeExpiry = 7,
71    /// Reading the storage expiry field.
72    Expiry = 8,
73    /// Skipping spaces before the value-length field.
74    SpacesBeforeVlen = 9,
75    /// Reading the value-length field.
76    Vlen = 10,
77    /// Skipping spaces before the CAS unique field.
78    SpacesBeforeCas = 11,
79    /// Reading the CAS unique field.
80    Cas = 12,
81    /// Awaiting LF before the value bytes.
82    RuntoVal = 13,
83    /// Consuming the value bytes.
84    Val = 14,
85    /// Skipping spaces before the arithmetic numeric argument.
86    SpacesBeforeNum = 15,
87    /// Reading the arithmetic numeric argument.
88    Num = 16,
89    /// Eating optional trailing bytes up to CR.
90    RuntoCrlf = 17,
91    /// Awaiting trailing CR.
92    Crlf = 18,
93    /// Reading the optional `noreply` token.
94    Noreply = 19,
95    /// State after consuming `noreply`.
96    AfterNoreply = 20,
97    /// Awaiting the trailing LF that terminates the request.
98    AlmostDone = 21,
99}
100
101impl ReqState {
102    fn from_u32(v: u32) -> Self {
103        match v {
104            1 => Self::ReqType,
105            2 => Self::SpacesBeforeKey,
106            3 => Self::Key,
107            4 => Self::SpacesBeforeKeys,
108            5 => Self::SpacesBeforeFlags,
109            6 => Self::Flags,
110            7 => Self::SpacesBeforeExpiry,
111            8 => Self::Expiry,
112            9 => Self::SpacesBeforeVlen,
113            10 => Self::Vlen,
114            11 => Self::SpacesBeforeCas,
115            12 => Self::Cas,
116            13 => Self::RuntoVal,
117            14 => Self::Val,
118            15 => Self::SpacesBeforeNum,
119            16 => Self::Num,
120            17 => Self::RuntoCrlf,
121            18 => Self::Crlf,
122            19 => Self::Noreply,
123            20 => Self::AfterNoreply,
124            21 => Self::AlmostDone,
125            _ => Self::Start,
126        }
127    }
128}
129
130/// State alphabet for [`memcache_parse_rsp`].
131#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
132#[repr(u32)]
133pub enum RspState {
134    /// Initial state.
135    #[default]
136    Start = 0,
137    /// Reading a numeric response (`incr`/`decr` reply).
138    RspNum = 1,
139    /// Reading a textual response keyword.
140    RspStr = 2,
141    /// Skipping spaces before the key in a `VALUE` reply.
142    SpacesBeforeKey = 3,
143    /// Reading the key portion of a `VALUE` reply.
144    Key = 4,
145    /// Skipping spaces before the flags field.
146    SpacesBeforeFlags = 5,
147    /// Reading the flags field.
148    Flags = 6,
149    /// Skipping spaces before the value-length field.
150    SpacesBeforeVlen = 7,
151    /// Reading the value-length field.
152    Vlen = 8,
153    /// Awaiting LF before the value bytes.
154    RuntoVal = 9,
155    /// Consuming the value bytes.
156    Val = 10,
157    /// Awaiting LF after the value.
158    ValLf = 11,
159    /// Reading the trailing `END` token.
160    End = 12,
161    /// Eating optional trailing bytes up to CR.
162    RuntoCrlf = 13,
163    /// Awaiting trailing CR.
164    Crlf = 14,
165    /// Awaiting the trailing LF that terminates the response.
166    AlmostDone = 15,
167}
168
169impl RspState {
170    fn from_u32(v: u32) -> Self {
171        match v {
172            1 => Self::RspNum,
173            2 => Self::RspStr,
174            3 => Self::SpacesBeforeKey,
175            4 => Self::Key,
176            5 => Self::SpacesBeforeFlags,
177            6 => Self::Flags,
178            7 => Self::SpacesBeforeVlen,
179            8 => Self::Vlen,
180            9 => Self::RuntoVal,
181            10 => Self::Val,
182            11 => Self::ValLf,
183            12 => Self::End,
184            13 => Self::RuntoCrlf,
185            14 => Self::Crlf,
186            15 => Self::AlmostDone,
187            _ => Self::Start,
188        }
189    }
190}
191
192const CR: u8 = b'\r';
193const LF: u8 = b'\n';
194
195fn classify_command(token: &[u8]) -> MsgType {
196    match token {
197        b"get" => MsgType::ReqMcGet,
198        b"gets" => MsgType::ReqMcGets,
199        b"set" => MsgType::ReqMcSet,
200        b"add" => MsgType::ReqMcAdd,
201        b"cas" => MsgType::ReqMcCas,
202        b"incr" => MsgType::ReqMcIncr,
203        b"decr" => MsgType::ReqMcDecr,
204        b"quit" => MsgType::ReqMcQuit,
205        b"touch" => MsgType::ReqMcTouch,
206        b"append" => MsgType::ReqMcAppend,
207        b"delete" => MsgType::ReqMcDelete,
208        b"prepend" => MsgType::ReqMcPrepend,
209        b"replace" => MsgType::ReqMcReplace,
210        _ => MsgType::Unknown,
211    }
212}
213
214fn classify_response(token: &[u8]) -> MsgType {
215    match token {
216        b"END" => MsgType::RspMcEnd,
217        b"VALUE" => MsgType::RspMcValue,
218        b"ERROR" => MsgType::RspMcError,
219        b"STORED" => MsgType::RspMcStored,
220        b"EXISTS" => MsgType::RspMcExists,
221        b"DELETED" => MsgType::RspMcDeleted,
222        b"TOUCHED" => MsgType::RspMcTouched,
223        b"NOT_FOUND" => MsgType::RspMcNotFound,
224        b"NOT_STORED" => MsgType::RspMcNotStored,
225        b"CLIENT_ERROR" => MsgType::RspMcClientError,
226        b"SERVER_ERROR" => MsgType::RspMcServerError,
227        _ => MsgType::Unknown,
228    }
229}
230
231fn make_keypos(input: &[u8], start: usize, end: usize, hash_tag: Option<HashTag>) -> KeyPos {
232    let bytes = input[start..end].to_vec();
233    if let Some(tag) = hash_tag {
234        if let Some(open_idx) = bytes.iter().position(|&b| b == tag.open) {
235            if let Some(close_offset) = bytes[open_idx + 1..].iter().position(|&b| b == tag.close) {
236                let tag_start = open_idx + 1;
237                let tag_end = open_idx + 1 + close_offset;
238                return KeyPos::new(bytes, tag_start..tag_end);
239            }
240        }
241    }
242    KeyPos::without_tag(bytes)
243}
244
245/// Parse a Memcached request from `input` and update `r` in place.
246///
247/// The function reproduces the reference engine's
248/// `memcache_parse_req` state machine. On success, `r.ty()` is set
249/// to the recognised command, parsed keys are appended to
250/// [`Msg::keys`], and the parser cursor (`parser_pos`) advances
251/// just past the trailing LF. On truncated input the function
252/// returns [`MsgParseResult::Again`] and stores the partial state
253/// on `r` for resumption. Invalid bytes return
254/// [`MsgParseResult::Error`].
255///
256/// `hash_tag`, when set, configures the routing-tag delimiters used
257/// when populating [`KeyPos::tag`].
258///
259/// # Examples
260///
261/// ```
262/// use dynomite::msg::{Msg, MsgParseResult, MsgType};
263/// use dynomite::proto::memcache::memcache_parse_req;
264///
265/// let mut r = Msg::new(0, MsgType::Unknown, true);
266/// let res = memcache_parse_req(&mut r, b"set foo 0 0 3\r\nbar\r\n");
267/// assert_eq!(res, MsgParseResult::Ok);
268/// assert_eq!(r.ty(), MsgType::ReqMcSet);
269/// assert_eq!(r.keys()[0].key(), b"foo");
270/// assert_eq!(r.vlen(), 3);
271/// ```
272///
273/// The state machine intentionally lives in a single function to
274/// match the reference engine's parser shape.
275#[allow(clippy::too_many_lines)]
276pub fn memcache_parse_req(r: &mut Msg, input: &[u8]) -> MsgParseResult {
277    memcache_parse_req_tagged(r, input, None)
278}
279
280/// Variant of [`memcache_parse_req`] that accepts an explicit
281/// hash-tag configuration.
282///
283/// # Examples
284///
285/// ```
286/// use dynomite::msg::{Msg, MsgType};
287/// use dynomite::proto::memcache::parser::{memcache_parse_req_tagged, HashTag};
288///
289/// let mut r = Msg::new(0, MsgType::Unknown, true);
290/// let tag = Some(HashTag { open: b'{', close: b'}' });
291/// let _ = memcache_parse_req_tagged(&mut r, b"get {abc}xyz\r\n", tag);
292/// assert_eq!(r.keys()[0].tag_bytes(), b"abc");
293/// ```
294#[allow(clippy::too_many_lines)]
295pub fn memcache_parse_req_tagged(
296    r: &mut Msg,
297    input: &[u8],
298    hash_tag: Option<HashTag>,
299) -> MsgParseResult {
300    if !r.is_request() {
301        r.set_parse_result(MsgParseResult::Error);
302        return MsgParseResult::Error;
303    }
304    let mut state = ReqState::from_u32(r.parser_state());
305    let mut p = r.parser_pos();
306    let mut token: Option<usize> = r.parser_token();
307    let mut vlen = r.vlen();
308    let mut ty = r.ty();
309    let mut is_read = r.flags().is_read;
310    let mut quit = r.flags().quit;
311    let mut expect_reply = r.flags().expect_datastore_reply;
312    let mut ntokens = r.ntokens();
313
314    'machine: while p < input.len() {
315        let ch = input[p];
316        match state {
317            ReqState::Start => {
318                if ch == b' ' {
319                    p += 1;
320                    continue;
321                }
322                if !ch.is_ascii_lowercase() {
323                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
324                }
325                token = Some(p);
326                state = ReqState::ReqType;
327                // Do not advance; re-enter ReqType on the same byte.
328            }
329            ReqState::ReqType => {
330                if ch == b' ' || ch == CR {
331                    let start = match token {
332                        Some(s) => s,
333                        None => {
334                            return finish_error(
335                                r, state, p, token, vlen, ty, is_read, quit, ntokens,
336                            );
337                        }
338                    };
339                    let cmd = &input[start..p];
340                    token = None;
341                    ty = classify_command(cmd);
342                    ntokens = ntokens.saturating_add(1);
343                    is_read = matches!(
344                        ty,
345                        MsgType::ReqMcGet | MsgType::ReqMcGets | MsgType::ReqMcQuit
346                    );
347                    if matches!(ty, MsgType::ReqMcQuit) {
348                        quit = true;
349                        // The C parser sets state to SW_CRLF and steps p back by one.
350                        state = ReqState::Crlf;
351                        // Do not advance; re-enter on this same byte.
352                        continue;
353                    }
354                    match ty {
355                        MsgType::ReqMcGet
356                        | MsgType::ReqMcGets
357                        | MsgType::ReqMcDelete
358                        | MsgType::ReqMcCas
359                        | MsgType::ReqMcSet
360                        | MsgType::ReqMcAdd
361                        | MsgType::ReqMcReplace
362                        | MsgType::ReqMcAppend
363                        | MsgType::ReqMcPrepend
364                        | MsgType::ReqMcIncr
365                        | MsgType::ReqMcDecr
366                        | MsgType::ReqMcTouch => {
367                            if ch == CR {
368                                return finish_error(
369                                    r, state, p, token, vlen, ty, is_read, quit, ntokens,
370                                );
371                            }
372                            state = ReqState::SpacesBeforeKey;
373                            p += 1;
374                            continue;
375                        }
376                        MsgType::Unknown => {
377                            return finish_error(
378                                r, state, p, token, vlen, ty, is_read, quit, ntokens,
379                            );
380                        }
381                        _ => {
382                            return finish_error(
383                                r, state, p, token, vlen, ty, is_read, quit, ntokens,
384                            );
385                        }
386                    }
387                } else if !ch.is_ascii_lowercase() {
388                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
389                } else {
390                    p += 1;
391                }
392            }
393            ReqState::SpacesBeforeKey => {
394                if ch == b' ' {
395                    p += 1;
396                } else {
397                    token = None;
398                    state = ReqState::Key;
399                    // Do not advance; re-process this byte under Key state.
400                }
401            }
402            ReqState::Key => {
403                if token.is_none() {
404                    token = Some(p);
405                }
406                if ch == b' ' || ch == CR {
407                    let start = token.expect("token recorded");
408                    let keylen = p - start;
409                    if keylen == 0 || keylen > MEMCACHE_MAX_KEY_LENGTH {
410                        return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
411                    }
412                    let kp = make_keypos(input, start, p, hash_tag);
413                    r.push_key(kp);
414                    ntokens = ntokens.saturating_add(1);
415                    token = None;
416                    let storage = memcache_storage(ty);
417                    let arithmetic = memcache_arithmetic(ty);
418                    let touch = memcache_touch(ty);
419                    let delete = memcache_delete(ty);
420                    let retrieval = memcache_retrieval(ty);
421                    if storage {
422                        state = ReqState::SpacesBeforeFlags;
423                    } else if arithmetic || touch {
424                        state = ReqState::SpacesBeforeNum;
425                    } else if delete {
426                        state = ReqState::RuntoCrlf;
427                    } else if retrieval {
428                        state = ReqState::SpacesBeforeKeys;
429                    } else {
430                        state = ReqState::RuntoCrlf;
431                    }
432                    if ch == CR {
433                        if storage || arithmetic {
434                            return finish_error(
435                                r, state, p, token, vlen, ty, is_read, quit, ntokens,
436                            );
437                        }
438                        // Re-enter on the CR byte (do not advance).
439                    } else {
440                        p += 1;
441                    }
442                } else {
443                    p += 1;
444                }
445            }
446            ReqState::SpacesBeforeKeys => {
447                match ch {
448                    b' ' => {
449                        p += 1;
450                    }
451                    CR => {
452                        state = ReqState::AlmostDone;
453                        p += 1;
454                    }
455                    _ => {
456                        token = None;
457                        state = ReqState::Key;
458                        // Do not advance.
459                    }
460                }
461            }
462            ReqState::SpacesBeforeFlags => {
463                if ch == b' ' {
464                    p += 1;
465                } else if ch.is_ascii_digit() {
466                    token = Some(p);
467                    state = ReqState::Flags;
468                    p += 1;
469                } else {
470                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
471                }
472            }
473            ReqState::Flags => {
474                if ch.is_ascii_digit() {
475                    p += 1;
476                } else if ch == b' ' {
477                    token = None;
478                    state = ReqState::SpacesBeforeExpiry;
479                    p += 1;
480                } else {
481                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
482                }
483            }
484            ReqState::SpacesBeforeExpiry => {
485                if ch == b' ' {
486                    p += 1;
487                } else if ch.is_ascii_digit() {
488                    token = Some(p);
489                    state = ReqState::Expiry;
490                    p += 1;
491                } else {
492                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
493                }
494            }
495            ReqState::Expiry => {
496                if ch.is_ascii_digit() {
497                    p += 1;
498                } else if ch == b' ' {
499                    token = None;
500                    state = ReqState::SpacesBeforeVlen;
501                    p += 1;
502                } else {
503                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
504                }
505            }
506            ReqState::SpacesBeforeVlen => {
507                if ch == b' ' {
508                    p += 1;
509                } else if ch.is_ascii_digit() {
510                    vlen = u32::from(ch - b'0');
511                    state = ReqState::Vlen;
512                    p += 1;
513                } else {
514                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
515                }
516            }
517            ReqState::Vlen => {
518                if ch.is_ascii_digit() {
519                    vlen = vlen.saturating_mul(10).saturating_add(u32::from(ch - b'0'));
520                    p += 1;
521                } else if memcache_cas(ty) {
522                    if ch != b' ' {
523                        return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
524                    }
525                    token = None;
526                    state = ReqState::SpacesBeforeCas;
527                    // Do not advance; re-enter on the same byte.
528                } else if ch == b' ' || ch == CR {
529                    token = None;
530                    state = ReqState::RuntoCrlf;
531                    // Do not advance.
532                } else {
533                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
534                }
535            }
536            ReqState::SpacesBeforeCas => {
537                if ch == b' ' {
538                    p += 1;
539                } else if ch.is_ascii_digit() {
540                    token = Some(p);
541                    state = ReqState::Cas;
542                    p += 1;
543                } else {
544                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
545                }
546            }
547            ReqState::Cas => {
548                if ch.is_ascii_digit() {
549                    p += 1;
550                } else if ch == b' ' || ch == CR {
551                    token = None;
552                    state = ReqState::RuntoCrlf;
553                    // Do not advance.
554                } else {
555                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
556                }
557            }
558            ReqState::RuntoVal => match ch {
559                LF => {
560                    state = ReqState::Val;
561                    p += 1;
562                }
563                _ => {
564                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
565                }
566            },
567            ReqState::Val => {
568                let m = p.saturating_add(vlen as usize);
569                if m >= input.len() {
570                    let consumed = input.len() - p;
571                    vlen = vlen.saturating_sub(consumed as u32);
572                    p = input.len();
573                    break 'machine;
574                }
575                if input[m] != CR {
576                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
577                }
578                p = m + 1;
579                state = ReqState::AlmostDone;
580            }
581            ReqState::SpacesBeforeNum => {
582                if ch == b' ' {
583                    p += 1;
584                } else if ch.is_ascii_digit() || ch == b'-' {
585                    token = Some(p);
586                    state = ReqState::Num;
587                    p += 1;
588                } else {
589                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
590                }
591            }
592            ReqState::Num => {
593                if ch.is_ascii_digit() {
594                    p += 1;
595                } else if ch == b' ' || ch == CR {
596                    token = None;
597                    state = ReqState::RuntoCrlf;
598                    // Do not advance.
599                } else {
600                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
601                }
602            }
603            ReqState::RuntoCrlf => match ch {
604                b' ' => {
605                    p += 1;
606                }
607                b'n' => {
608                    if memcache_storage(ty)
609                        || memcache_arithmetic(ty)
610                        || memcache_delete(ty)
611                        || memcache_touch(ty)
612                    {
613                        token = Some(p);
614                        state = ReqState::Noreply;
615                        p += 1;
616                    } else {
617                        return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
618                    }
619                }
620                CR => {
621                    if memcache_storage(ty) {
622                        state = ReqState::RuntoVal;
623                    } else {
624                        state = ReqState::AlmostDone;
625                    }
626                    p += 1;
627                }
628                _ => {
629                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
630                }
631            },
632            ReqState::Noreply => match ch {
633                b' ' | CR => {
634                    let start = match token {
635                        Some(s) => s,
636                        None => {
637                            return finish_error(
638                                r, state, p, token, vlen, ty, is_read, quit, ntokens,
639                            );
640                        }
641                    };
642                    if p - start == 7 && &input[start..p] == b"noreply" {
643                        token = None;
644                        expect_reply = false;
645                        state = ReqState::AfterNoreply;
646                        // Do not advance.
647                    } else {
648                        return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
649                    }
650                }
651                _ => {
652                    p += 1;
653                }
654            },
655            ReqState::AfterNoreply => match ch {
656                b' ' => {
657                    p += 1;
658                }
659                CR => {
660                    if memcache_storage(ty) {
661                        state = ReqState::RuntoVal;
662                    } else {
663                        state = ReqState::AlmostDone;
664                    }
665                    p += 1;
666                }
667                _ => {
668                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
669                }
670            },
671            ReqState::Crlf => match ch {
672                b' ' => {
673                    p += 1;
674                }
675                CR => {
676                    state = ReqState::AlmostDone;
677                    p += 1;
678                }
679                _ => {
680                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
681                }
682            },
683            ReqState::AlmostDone => match ch {
684                LF => {
685                    return finish_done(r, p + 1, ty, is_read, quit, expect_reply, ntokens, vlen);
686                }
687                _ => {
688                    return finish_error(r, state, p, token, vlen, ty, is_read, quit, ntokens);
689                }
690            },
691        }
692    }
693
694    // Reached end of input without completing.
695    r.set_parser_state(state as u32);
696    r.set_parser_pos(p);
697    r.set_parser_token(token);
698    r.set_vlen(vlen);
699    r.set_ntokens(ntokens);
700    if ty != MsgType::Unknown {
701        r.set_type(ty);
702    }
703    r.flags_mut().is_read = is_read;
704    r.flags_mut().quit = quit;
705    r.flags_mut().expect_datastore_reply = expect_reply;
706    r.set_parse_result(MsgParseResult::Again);
707    MsgParseResult::Again
708}
709
710#[allow(clippy::too_many_arguments)]
711fn finish_done(
712    r: &mut Msg,
713    next_pos: usize,
714    ty: MsgType,
715    is_read: bool,
716    quit: bool,
717    expect_reply: bool,
718    ntokens: u32,
719    vlen: u32,
720) -> MsgParseResult {
721    r.set_type(ty);
722    r.flags_mut().is_read = is_read;
723    r.flags_mut().quit = quit;
724    r.flags_mut().expect_datastore_reply = expect_reply;
725    r.set_ntokens(ntokens);
726    r.set_vlen(vlen);
727    r.set_parser_state(ReqState::Start as u32);
728    r.set_parser_pos(next_pos);
729    r.set_parser_token(None);
730    r.set_parse_result(MsgParseResult::Ok);
731    MsgParseResult::Ok
732}
733
734#[allow(clippy::too_many_arguments)]
735fn finish_error(
736    r: &mut Msg,
737    state: ReqState,
738    pos: usize,
739    token: Option<usize>,
740    vlen: u32,
741    ty: MsgType,
742    is_read: bool,
743    quit: bool,
744    ntokens: u32,
745) -> MsgParseResult {
746    r.set_parser_state(state as u32);
747    r.set_parser_pos(pos);
748    r.set_parser_token(token);
749    r.set_vlen(vlen);
750    r.set_ntokens(ntokens);
751    if ty != MsgType::Unknown {
752        r.set_type(ty);
753    }
754    r.flags_mut().is_read = is_read;
755    r.flags_mut().quit = quit;
756    r.set_parse_result(MsgParseResult::Error);
757    MsgParseResult::Error
758}
759
760/// Parse a Memcached response from `input` and update `r` in place.
761///
762/// On success the response type is recorded and the parser cursor
763/// advances just past the trailing LF. The function never panics
764/// on any byte sequence.
765///
766/// # Examples
767///
768/// ```
769/// use dynomite::msg::{Msg, MsgParseResult, MsgType};
770/// use dynomite::proto::memcache::memcache_parse_rsp;
771///
772/// let mut r = Msg::new(0, MsgType::Unknown, false);
773/// let res = memcache_parse_rsp(&mut r, b"STORED\r\n");
774/// assert_eq!(res, MsgParseResult::Ok);
775/// assert_eq!(r.ty(), MsgType::RspMcStored);
776/// ```
777///
778/// The state machine intentionally lives in a single function to
779/// match the reference engine.
780#[allow(clippy::too_many_lines)]
781pub fn memcache_parse_rsp(r: &mut Msg, input: &[u8]) -> MsgParseResult {
782    if r.is_request() {
783        r.set_parse_result(MsgParseResult::Error);
784        return MsgParseResult::Error;
785    }
786    let mut state = RspState::from_u32(r.parser_state());
787    let mut p = r.parser_pos();
788    let mut token: Option<usize> = r.parser_token();
789    let mut vlen = r.vlen();
790    let mut ty = r.ty();
791    let mut end_marker = r.end_marker();
792
793    while p < input.len() {
794        let ch = input[p];
795        match state {
796            RspState::Start => {
797                if ch.is_ascii_digit() {
798                    state = RspState::RspNum;
799                } else {
800                    state = RspState::RspStr;
801                }
802                // Do not advance; re-enter under the new state.
803            }
804            RspState::RspNum => {
805                if token.is_none() {
806                    token = Some(p);
807                }
808                if ch.is_ascii_digit() {
809                    p += 1;
810                } else if ch == b' ' || ch == CR {
811                    token = None;
812                    ty = MsgType::RspMcNum;
813                    state = RspState::Crlf;
814                    // Do not advance.
815                } else {
816                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
817                }
818            }
819            RspState::RspStr => {
820                if token.is_none() {
821                    token = Some(p);
822                }
823                if ch == b' ' || ch == CR {
824                    let start = token.expect("token recorded");
825                    let key_bytes = &input[start..p];
826                    ty = classify_response(key_bytes);
827                    if ty == MsgType::RspMcEnd {
828                        end_marker = Some(start);
829                    }
830                    match ty {
831                        MsgType::Unknown => {
832                            return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
833                        }
834                        MsgType::RspMcStored
835                        | MsgType::RspMcNotStored
836                        | MsgType::RspMcExists
837                        | MsgType::RspMcNotFound
838                        | MsgType::RspMcDeleted
839                        | MsgType::RspMcTouched
840                        | MsgType::RspMcEnd
841                        | MsgType::RspMcError => {
842                            state = RspState::Crlf;
843                        }
844                        MsgType::RspMcValue => {
845                            state = RspState::SpacesBeforeKey;
846                        }
847                        MsgType::RspMcClientError | MsgType::RspMcServerError => {
848                            state = RspState::RuntoCrlf;
849                        }
850                        _ => {
851                            return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
852                        }
853                    }
854                    // Do not advance; re-enter on the same byte.
855                } else {
856                    p += 1;
857                }
858            }
859            RspState::SpacesBeforeKey => {
860                if ch == b' ' {
861                    p += 1;
862                } else {
863                    state = RspState::Key;
864                    // Do not advance.
865                }
866            }
867            RspState::Key => {
868                if ch == b' ' {
869                    state = RspState::SpacesBeforeFlags;
870                }
871                p += 1;
872            }
873            RspState::SpacesBeforeFlags => {
874                if ch == b' ' {
875                    p += 1;
876                } else if ch.is_ascii_digit() {
877                    state = RspState::Flags;
878                    // Do not advance.
879                } else {
880                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
881                }
882            }
883            RspState::Flags => {
884                if ch.is_ascii_digit() {
885                    p += 1;
886                } else if ch == b' ' {
887                    state = RspState::SpacesBeforeVlen;
888                    p += 1;
889                } else {
890                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
891                }
892            }
893            RspState::SpacesBeforeVlen => {
894                if ch == b' ' {
895                    p += 1;
896                } else if ch.is_ascii_digit() {
897                    state = RspState::Vlen;
898                    vlen = 0;
899                    // Do not advance.
900                } else {
901                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
902                }
903            }
904            RspState::Vlen => {
905                if ch.is_ascii_digit() {
906                    vlen = vlen.saturating_mul(10).saturating_add(u32::from(ch - b'0'));
907                    p += 1;
908                } else if ch == b' ' || ch == CR {
909                    state = RspState::RuntoCrlf;
910                    // Do not advance.
911                } else {
912                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
913                }
914            }
915            RspState::RuntoVal => match ch {
916                LF => {
917                    state = RspState::Val;
918                    token = None;
919                    p += 1;
920                }
921                _ => {
922                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
923                }
924            },
925            RspState::Val => {
926                let m = p.saturating_add(vlen as usize);
927                if m >= input.len() {
928                    let consumed = input.len() - p;
929                    vlen = vlen.saturating_sub(consumed as u32);
930                    p = input.len();
931                    break;
932                }
933                if input[m] != CR {
934                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
935                }
936                p = m + 1;
937                state = RspState::ValLf;
938            }
939            RspState::ValLf => match ch {
940                LF => {
941                    state = RspState::RspStr;
942                    p += 1;
943                }
944                _ => {
945                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
946                }
947            },
948            RspState::End => {
949                if token.is_none() {
950                    if ch != b'E' {
951                        return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
952                    }
953                    token = Some(p);
954                    p += 1;
955                } else if ch == CR {
956                    let start = token.expect("token recorded");
957                    if p - start == 3 && &input[start..p] == b"END" {
958                        end_marker = Some(start);
959                        state = RspState::AlmostDone;
960                        token = None;
961                        p += 1;
962                    } else {
963                        return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
964                    }
965                } else {
966                    p += 1;
967                }
968            }
969            RspState::RuntoCrlf => match ch {
970                CR => {
971                    if ty == MsgType::RspMcValue {
972                        state = RspState::RuntoVal;
973                    } else {
974                        state = RspState::AlmostDone;
975                    }
976                    p += 1;
977                }
978                _ => {
979                    p += 1;
980                }
981            },
982            RspState::Crlf => match ch {
983                b' ' => {
984                    p += 1;
985                }
986                CR => {
987                    state = RspState::AlmostDone;
988                    p += 1;
989                }
990                _ => {
991                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
992                }
993            },
994            RspState::AlmostDone => match ch {
995                LF => {
996                    r.set_type(ty);
997                    r.set_vlen(vlen);
998                    r.set_end_marker(end_marker);
999                    r.set_parser_state(RspState::Start as u32);
1000                    r.set_parser_pos(p + 1);
1001                    r.set_parser_token(None);
1002                    r.set_parse_result(MsgParseResult::Ok);
1003                    return MsgParseResult::Ok;
1004                }
1005                _ => {
1006                    return finish_error_rsp(r, state, p, token, vlen, ty, end_marker);
1007                }
1008            },
1009        }
1010    }
1011
1012    r.set_parser_state(state as u32);
1013    r.set_parser_pos(p);
1014    r.set_parser_token(token);
1015    r.set_vlen(vlen);
1016    r.set_end_marker(end_marker);
1017    if ty != MsgType::Unknown {
1018        r.set_type(ty);
1019    }
1020    r.set_parse_result(MsgParseResult::Again);
1021    MsgParseResult::Again
1022}
1023
1024fn finish_error_rsp(
1025    r: &mut Msg,
1026    state: RspState,
1027    pos: usize,
1028    token: Option<usize>,
1029    vlen: u32,
1030    ty: MsgType,
1031    end_marker: Option<usize>,
1032) -> MsgParseResult {
1033    r.set_parser_state(state as u32);
1034    r.set_parser_pos(pos);
1035    r.set_parser_token(token);
1036    r.set_vlen(vlen);
1037    r.set_end_marker(end_marker);
1038    if ty != MsgType::Unknown {
1039        r.set_type(ty);
1040    }
1041    r.set_parse_result(MsgParseResult::Error);
1042    MsgParseResult::Error
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047    use super::*;
1048
1049    fn parse_req(input: &[u8]) -> Msg {
1050        let mut m = Msg::new(0, MsgType::Unknown, true);
1051        let _ = memcache_parse_req(&mut m, input);
1052        m
1053    }
1054
1055    fn parse_rsp(input: &[u8]) -> Msg {
1056        let mut m = Msg::new(0, MsgType::Unknown, false);
1057        let _ = memcache_parse_rsp(&mut m, input);
1058        m
1059    }
1060
1061    #[test]
1062    fn parse_get() {
1063        let m = parse_req(b"get key1\r\n");
1064        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1065        assert_eq!(m.ty(), MsgType::ReqMcGet);
1066        assert_eq!(m.keys()[0].key(), b"key1");
1067        assert!(m.flags().is_read);
1068    }
1069
1070    #[test]
1071    fn parse_set() {
1072        let m = parse_req(b"set key1 0 0 3\r\nval\r\n");
1073        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1074        assert_eq!(m.ty(), MsgType::ReqMcSet);
1075        assert_eq!(m.keys()[0].key(), b"key1");
1076        assert_eq!(m.vlen(), 3);
1077    }
1078
1079    #[test]
1080    fn parse_set_noreply() {
1081        let m = parse_req(b"set key1 0 0 3 noreply\r\nval\r\n");
1082        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1083        assert!(!m.flags().expect_datastore_reply);
1084    }
1085
1086    #[test]
1087    fn parse_delete() {
1088        let m = parse_req(b"delete key1\r\n");
1089        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1090        assert_eq!(m.ty(), MsgType::ReqMcDelete);
1091    }
1092
1093    #[test]
1094    fn parse_incr() {
1095        let m = parse_req(b"incr counter 1\r\n");
1096        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1097        assert_eq!(m.ty(), MsgType::ReqMcIncr);
1098    }
1099
1100    #[test]
1101    fn parse_quit() {
1102        let m = parse_req(b"quit\r\n");
1103        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1104        assert_eq!(m.ty(), MsgType::ReqMcQuit);
1105        assert!(m.flags().quit);
1106    }
1107
1108    #[test]
1109    fn parse_get_multikey() {
1110        let m = parse_req(b"get a b c\r\n");
1111        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1112        let keys: Vec<&[u8]> = m.keys().iter().map(crate::msg::KeyPos::key).collect();
1113        assert_eq!(keys, vec![&b"a"[..], b"b", b"c"]);
1114    }
1115
1116    #[test]
1117    fn parse_cas() {
1118        let m = parse_req(b"cas key1 0 0 3 7\r\nval\r\n");
1119        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1120        assert_eq!(m.ty(), MsgType::ReqMcCas);
1121    }
1122
1123    #[test]
1124    fn parse_too_long_key_errors() {
1125        let mut input = b"get ".to_vec();
1126        input.extend(std::iter::repeat_n(b'k', MEMCACHE_MAX_KEY_LENGTH + 1));
1127        input.extend_from_slice(b"\r\n");
1128        let m = parse_req(&input);
1129        assert_eq!(m.parse_result(), MsgParseResult::Error);
1130    }
1131
1132    #[test]
1133    fn parse_empty_key_errors() {
1134        let m = parse_req(b"get \r\n");
1135        assert_eq!(m.parse_result(), MsgParseResult::Error);
1136    }
1137
1138    #[test]
1139    fn parse_truncated_returns_again() {
1140        let m = parse_req(b"get key");
1141        assert_eq!(m.parse_result(), MsgParseResult::Again);
1142    }
1143
1144    #[test]
1145    fn parse_stored_response() {
1146        let m = parse_rsp(b"STORED\r\n");
1147        assert_eq!(m.ty(), MsgType::RspMcStored);
1148    }
1149
1150    #[test]
1151    fn parse_value_response() {
1152        let m = parse_rsp(b"VALUE key 0 3\r\nval\r\nEND\r\n");
1153        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1154        assert_eq!(m.ty(), MsgType::RspMcEnd);
1155    }
1156
1157    #[test]
1158    fn parse_numeric_response() {
1159        let m = parse_rsp(b"42\r\n");
1160        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1161        assert_eq!(m.ty(), MsgType::RspMcNum);
1162    }
1163
1164    #[test]
1165    fn parse_server_error_response() {
1166        let m = parse_rsp(b"SERVER_ERROR oops\r\n");
1167        assert_eq!(m.parse_result(), MsgParseResult::Ok);
1168        assert_eq!(m.ty(), MsgType::RspMcServerError);
1169    }
1170
1171    #[test]
1172    fn parse_response_unknown_keyword_errors() {
1173        let m = parse_rsp(b"BOGUS\r\n");
1174        assert_eq!(m.parse_result(), MsgParseResult::Error);
1175    }
1176}