Skip to main content

quick_xml/de/
map.rs

1//! Serde `Deserializer` module
2
3use crate::{
4    de::key::QNameDeserializer,
5    de::resolver::EntityResolver,
6    de::simple_type::SimpleTypeDeserializer,
7    de::text::TextDeserializer,
8    de::{DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
9    errors::serialize::DeError,
10    errors::Error,
11    events::attributes::IterState,
12    events::BytesStart,
13    name::QName,
14};
15use serde::de::value::BorrowedStrDeserializer;
16use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
17use std::borrow::Cow;
18use std::ops::Range;
19
20/// Defines a source that should be used to deserialize a value in the next call
21/// to [`next_value_seed()`](MapAccess::next_value_seed)
22#[derive(Debug, PartialEq)]
23enum ValueSource {
24    /// Source are not specified, because [`next_key_seed()`] not yet called.
25    /// This is an initial state and state after deserializing value
26    /// (after call of [`next_value_seed()`]).
27    ///
28    /// Attempt to call [`next_value_seed()`] while accessor in this state would
29    /// return a [`DeError::KeyNotRead`] error.
30    ///
31    /// [`next_key_seed()`]: MapAccess::next_key_seed
32    /// [`next_value_seed()`]: MapAccess::next_value_seed
33    Unknown,
34    /// Next value should be deserialized from an attribute value; value is located
35    /// at specified span.
36    Attribute(Range<usize>),
37    /// Value should be deserialized from the text content of the XML node, which
38    /// represented or by an ordinary text node, or by a CDATA node:
39    ///
40    /// ```xml
41    /// <any-tag>
42    ///     <key>text content</key>
43    /// <!--     ^^^^^^^^^^^^ - this will be used to deserialize map value -->
44    /// </any-tag>
45    /// ```
46    /// ```xml
47    /// <any-tag>
48    ///     <key><![CDATA[cdata content]]></key>
49    /// <!--              ^^^^^^^^^^^^^ - this will be used to deserialize a map value -->
50    /// </any-tag>
51    /// ```
52    Text,
53    /// Next value should be deserialized from an element with an any name, except
54    /// elements with a name matching one of the struct fields. Corresponding tag
55    /// name will always be associated with a field with name [`VALUE_KEY`].
56    ///
57    /// That state is set when call to [`peek()`] returns a [`Start`] event, which
58    /// [`name()`] is not listed in the [list of known fields] (which for a struct
59    /// is a list of field names, and for a map that is an empty list), _and_
60    /// struct has a field with a special name [`VALUE_KEY`].
61    ///
62    /// When in this state, next event, returned by [`next()`], will be a [`Start`],
63    /// which represents both a key, and a value. Value would be deserialized from
64    /// the whole element and how is will be done determined by the value deserializer.
65    /// The [`ElementMapAccess`] do not consume any events in that state.
66    ///
67    /// Because in that state any encountered `<tag>` is mapped to the [`VALUE_KEY`]
68    /// field, it is possible to use tag name as an enum discriminator, so `enum`s
69    /// can be deserialized from that XMLs:
70    ///
71    /// ```xml
72    /// <any-tag>
73    ///     <variant1>...</variant1>
74    /// <!-- ~~~~~~~~               - this data will determine that this is Enum::variant1 -->
75    /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
76    /// </any-tag>
77    /// ```
78    /// ```xml
79    /// <any-tag>
80    ///     <variant2>...</variant2>
81    /// <!-- ~~~~~~~~               - this data will determine that this is Enum::variant2 -->
82    /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
83    /// </any-tag>
84    /// ```
85    ///
86    /// both can be deserialized into
87    ///
88    /// ```ignore
89    /// enum Enum {
90    ///   variant1,
91    ///   variant2,
92    /// }
93    /// struct AnyName {
94    ///   #[serde(rename = "$value")]
95    ///   field: Enum,
96    /// }
97    /// ```
98    ///
99    /// That is possible, because value deserializer have access to the full content
100    /// of a `<variant1>...</variant1>` or `<variant2>...</variant2>` node, including
101    /// the tag name.
102    ///
103    /// [`Start`]: DeEvent::Start
104    /// [`peek()`]: Deserializer::peek()
105    /// [`next()`]: Deserializer::next()
106    /// [`name()`]: BytesStart::name()
107    /// [`Text`]: Self::Text
108    /// [list of known fields]: ElementMapAccess::fields
109    Content,
110    /// Next value should be deserialized from an element with a dedicated name.
111    /// If deserialized type is a sequence, then that sequence will collect all
112    /// elements with the same name until it will be filled. If not all elements
113    /// would be consumed, the rest will be ignored.
114    ///
115    /// That state is set when call to [`peek()`] returns a [`Start`] event, which
116    /// [`name()`] represents a field name. That name will be deserialized as a key.
117    ///
118    /// When in this state, next event, returned by [`next()`], will be a [`Start`],
119    /// which represents both a key, and a value. Value would be deserialized from
120    /// the whole element and how is will be done determined by the value deserializer.
121    /// The [`ElementMapAccess`] do not consume any events in that state.
122    ///
123    /// An illustration below shows, what data is used to deserialize key and value:
124    /// ```xml
125    /// <any-tag>
126    ///     <key>...</key>
127    /// <!-- ~~~           - this data will be used to deserialize a map key -->
128    /// <!--^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
129    /// </any-tag>
130    /// ```
131    ///
132    /// Although value deserializer will have access to the full content of a `<key>`
133    /// node (including the tag name), it will not get much benefits from that,
134    /// because tag name will always be fixed for a given map field (equal to a
135    /// field name). So, if the field type is an `enum`, it cannot select its
136    /// variant based on the tag name. If that is needed, then [`Content`] variant
137    /// of this enum should be used. Such usage is enabled by annotating a struct
138    /// field as "content" field, which implemented as given the field a special
139    /// [`VALUE_KEY`] name.
140    ///
141    /// [`Start`]: DeEvent::Start
142    /// [`peek()`]: Deserializer::peek()
143    /// [`next()`]: Deserializer::next()
144    /// [`name()`]: BytesStart::name()
145    /// [`Content`]: Self::Content
146    Nested,
147}
148
149////////////////////////////////////////////////////////////////////////////////////////////////////
150
151/// A deserializer that extracts map-like structures from an XML. This deserializer
152/// represents a one XML tag:
153///
154/// ```xml
155/// <tag>...</tag>
156/// ```
157///
158/// Name of this tag is stored in a [`Self::start`] property.
159///
160/// # Lifetimes
161///
162/// - `'de` lifetime represents a buffer, from which deserialized values can
163///   borrow their data. Depending on the underlying reader, there can be an
164///   internal buffer of deserializer (i.e. deserializer itself) or an input
165///   (in that case it is possible to approach zero-copy deserialization).
166///
167/// - `'d` lifetime represents a parent deserializer, which could own the data
168///   buffer.
169pub(crate) struct ElementMapAccess<'de, 'd, R, E>
170where
171    R: XmlRead<'de>,
172    E: EntityResolver,
173{
174    /// Tag -- owner of attributes
175    start: BytesStart<'de>,
176    de: &'d mut Deserializer<'de, R, E>,
177    /// State of the iterator over attributes. Contains the next position in the
178    /// inner `start` slice, from which next attribute should be parsed.
179    iter: IterState,
180    /// Current state of the accessor that determines what next call to API
181    /// methods should return.
182    source: ValueSource,
183    /// List of field names of the struct. It is empty for maps
184    fields: &'static [&'static str],
185    /// If `true`, then the deserialized struct has a field with a special name:
186    /// [`VALUE_KEY`]. That field should be deserialized from the whole content
187    /// of an XML node, including tag name:
188    ///
189    /// ```xml
190    /// <tag>value for VALUE_KEY field<tag>
191    /// ```
192    has_value_field: bool,
193    /// If `true`, then the deserialized struct has a field with a special name:
194    /// [`TEXT_KEY`].
195    has_text_field: bool,
196}
197
198impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E>
199where
200    R: XmlRead<'de>,
201    E: EntityResolver,
202{
203    /// Create a new ElementMapAccess
204    pub fn new(
205        de: &'d mut Deserializer<'de, R, E>,
206        start: BytesStart<'de>,
207        fields: &'static [&'static str],
208    ) -> Self {
209        Self {
210            de,
211            iter: IterState::new(start.name().as_ref().len(), false),
212            start,
213            source: ValueSource::Unknown,
214            fields,
215            has_value_field: fields.contains(&VALUE_KEY),
216            has_text_field: fields.contains(&TEXT_KEY),
217        }
218    }
219
220    /// Determines if subtree started with the specified event shoould be skipped.
221    ///
222    /// Used to map elements with `xsi:nil` attribute set to true to `None` in optional contexts.
223    ///
224    /// We need to handle two attributes:
225    /// - on parent element: `<map xsi:nil="true"><foo/></map>`
226    /// - on this element:   `<map><foo xsi:nil="true"/></map>`
227    ///
228    /// We check parent element too because `xsi:nil` affects only nested elements of the
229    /// tag where it is defined. We can map structure with fields mapped to attributes to
230    /// the `<map>` element and set to `None` all its optional elements.
231    fn should_skip_subtree(&self, start: &BytesStart) -> bool {
232        self.de.reader.reader.has_nil_attr(&self.start) || self.de.reader.reader.has_nil_attr(start)
233    }
234
235    /// Skips whitespaces when they are not preserved
236    #[inline]
237    fn skip_whitespaces(&mut self) -> Result<(), DeError> {
238        // TODO: respect the `xml:space` attribute and probably some deserialized type sign
239        self.de.skip_whitespaces()
240    }
241}
242
243impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E>
244where
245    R: XmlRead<'de>,
246    E: EntityResolver,
247{
248    type Error = DeError;
249
250    fn next_key_seed<K: DeserializeSeed<'de>>(
251        &mut self,
252        seed: K,
253    ) -> Result<Option<K::Value>, Self::Error> {
254        debug_assert_eq!(self.source, ValueSource::Unknown);
255
256        // FIXME: There error positions counted from the start of tag name - need global position
257        let slice = &self.start.buf;
258        let decoder = self.start.decoder();
259
260        if let Some(a) = self.iter.next(slice).transpose()? {
261            // try getting map from attributes (key= "value")
262            let (key, value) = a.into();
263            self.source = ValueSource::Attribute(value.unwrap_or_default());
264
265            // Attributes in mapping starts from @ prefix
266            // TODO: Customization point - may customize prefix
267            self.de.key_buf.clear();
268            self.de.key_buf.push('@');
269
270            let de =
271                QNameDeserializer::from_attr(QName(&slice[key]), decoder, &mut self.de.key_buf)?;
272            seed.deserialize(de).map(Some)
273        } else {
274            self.skip_whitespaces()?;
275            // try getting from events (<key>value</key>)
276            match self.de.peek()? {
277                // If we have dedicated "$text" field, it will not be passed to "$value" field
278                DeEvent::Text(_) if self.has_value_field && !self.has_text_field => {
279                    self.source = ValueSource::Content;
280                    // Deserialize `key` from special attribute name which means
281                    // that value should be taken from the text content of the
282                    // XML node
283                    let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
284                    seed.deserialize(de).map(Some)
285                }
286                DeEvent::Text(_) => {
287                    self.source = ValueSource::Text;
288                    // Deserialize `key` from special attribute name which means
289                    // that value should be taken from the text content of the
290                    // XML node
291                    let de = BorrowedStrDeserializer::<DeError>::new(TEXT_KEY);
292                    seed.deserialize(de).map(Some)
293                }
294                // Used to deserialize collections of enums, like:
295                // <root>
296                //   <A/>
297                //   <B/>
298                //   <C/>
299                // </root>
300                //
301                // into
302                //
303                // enum Enum { A, B, С }
304                // struct Root {
305                //     #[serde(rename = "$value")]
306                //     items: Vec<Enum>,
307                // }
308                // TODO: This should be handled by #[serde(flatten)]
309                // See https://github.com/serde-rs/serde/issues/1905
310                DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e)? => {
311                    self.source = ValueSource::Content;
312
313                    let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
314                    seed.deserialize(de).map(Some)
315                }
316                DeEvent::Start(e) => {
317                    self.source = ValueSource::Nested;
318
319                    let de = QNameDeserializer::from_elem(e)?;
320                    seed.deserialize(de).map(Some)
321                }
322                // Stop iteration after reaching a closing tag
323                // The matching tag name is guaranteed by the reader if our
324                // deserializer implementation is correct
325                DeEvent::End(e) => {
326                    debug_assert_eq!(self.start.name(), e.name());
327                    // Consume End
328                    self.de.next()?;
329                    Ok(None)
330                }
331                // We cannot get `Eof` legally, because we always inside of the
332                // opened tag `self.start`
333                DeEvent::Eof => {
334                    Err(Error::missed_end(self.start.name(), self.start.decoder()).into())
335                }
336            }
337        }
338    }
339
340    fn next_value_seed<K: DeserializeSeed<'de>>(
341        &mut self,
342        seed: K,
343    ) -> Result<K::Value, Self::Error> {
344        match std::mem::replace(&mut self.source, ValueSource::Unknown) {
345            ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
346                &self.start.buf,
347                value,
348                self.start.decoder(),
349            )),
350            // This arm processes the following XML shape:
351            // <any-tag>
352            //   text value
353            // </any-tag>
354            // The whole map represented by an `<any-tag>` element, the map key
355            // is implicit and equals to the `TEXT_KEY` constant, and the value
356            // is a `Text` event (the value deserializer will see that event)
357            // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
358            ValueSource::Text => match self.de.next()? {
359                DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
360                // SAFETY: We set `Text` only when we seen `Text`
361                _ => unreachable!(),
362            },
363            // This arm processes the following XML shape:
364            // <any-tag>
365            //   <any>...</any>
366            // </any-tag>
367            // The whole map represented by an `<any-tag>` element, the map key
368            // is implicit and equals to the `VALUE_KEY` constant, and the value
369            // is a `Start` event (the value deserializer will see that event)
370            ValueSource::Content => seed.deserialize(MapValueDeserializer {
371                map: self,
372                fixed_name: false,
373            }),
374            // This arm processes the following XML shape:
375            // <any-tag>
376            //   <tag>...</tag>
377            // </any-tag>
378            // The whole map represented by an `<any-tag>` element, the map key
379            // is a `tag`, and the value is a `Start` event (the value deserializer
380            // will see that event)
381            ValueSource::Nested => seed.deserialize(MapValueDeserializer {
382                map: self,
383                fixed_name: true,
384            }),
385            ValueSource::Unknown => Err(DeError::KeyNotRead),
386        }
387    }
388}
389
390////////////////////////////////////////////////////////////////////////////////////////////////////
391
392/// A deserializer for a value of map or struct. That deserializer slightly
393/// differently processes events for a primitive types and sequences than
394/// a [`Deserializer`].
395///
396/// This deserializer used to deserialize two kinds of fields:
397/// - usual fields with a dedicated name, such as `field_one` or `field_two`, in
398///   that case field [`Self::fixed_name`] is `true`;
399/// - the special `$value` field which represents any tag or a textual content
400///   in the XML which would be found in the document, in that case field
401///   [`Self::fixed_name`] is `false`.
402///
403/// This deserializer can see two kind of events at the start:
404/// - [`DeEvent::Text`]
405/// - [`DeEvent::Start`]
406///
407/// which represents two possible variants of items:
408/// ```xml
409/// <item>A tag item</item>
410/// A text item
411/// <yet another="tag item"/>
412/// ```
413///
414/// This deserializer are very similar to a [`ElementDeserializer`]. The only difference
415/// in the `deserialize_seq` method. This deserializer will act as an iterator
416/// over tags / text within it's parent tag, whereas the [`ElementDeserializer`]
417/// will represent sequences as an `xs:list`.
418///
419/// This deserializer processes items as following:
420/// - primitives (numbers, booleans, strings, characters) are deserialized either
421///   from a text content, or unwrapped from a one level of a tag. So, `123` and
422///   `<int>123</int>` both can be deserialized into an `u32`;
423/// - `Option`:
424///   - empty text of [`DeEvent::Text`] is deserialized as `None`;
425///   - everything else are deserialized as `Some` using the same deserializer,
426///     including `<tag/>` or `<tag></tag>`;
427/// - units (`()`) and unit structs consumes the whole text or element subtree;
428/// - newtype structs are deserialized by forwarding deserialization of inner type
429///   with the same deserializer;
430/// - sequences, tuples and tuple structs are deserialized by iterating within the
431///   parent tag and deserializing each tag or text content using [`ElementDeserializer`];
432/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
433/// - enums:
434///   - in case of [`DeEvent::Text`] event the text content is deserialized as
435///     a `$text` variant. Enum content is deserialized from the text using
436///     [`SimpleTypeDeserializer`];
437///   - in case of [`DeEvent::Start`] event the tag name is deserialized as
438///     an enum tag, and the content inside are deserialized as an enum content.
439///     Depending on a variant kind deserialization is performed as:
440///     - unit variants: consuming text content or a subtree;
441///     - newtype variants: forward deserialization to the inner type using
442///       this deserializer;
443///     - tuple variants: call [`deserialize_tuple`] of this deserializer;
444///     - struct variants: call [`deserialize_struct`] of this deserializer.
445///
446/// [`deserialize_tuple`]: #method.deserialize_tuple
447/// [`deserialize_struct`]: #method.deserialize_struct
448struct MapValueDeserializer<'de, 'd, 'm, R, E>
449where
450    R: XmlRead<'de>,
451    E: EntityResolver,
452{
453    /// Access to the map that created this deserializer. Gives access to the
454    /// context, such as list of fields, that current map known about.
455    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
456    /// Whether this deserializer was created for deserialization from an element
457    /// with fixed name, or the elements with different names or even text are allowed.
458    ///
459    /// If this field is `true`, we process `<tag>` element in the following XML shape:
460    ///
461    /// ```xml
462    /// <any-tag>
463    ///   <tag>...</tag>
464    /// </any-tag>
465    /// ```
466    ///
467    /// The whole map represented by an `<any-tag>` element, the map key is a `tag`,
468    /// and the value starts with is a `Start("tag")` (the value deserializer will
469    /// see that event first) and extended to the matching `End("tag")` event.
470    /// In order to deserialize primitives (such as `usize`) we need to allow to
471    /// look inside the one levels of tags, so the
472    ///
473    /// ```xml
474    /// <tag>42<tag>
475    /// ```
476    ///
477    /// could be deserialized into `42usize` without problems, and at the same time
478    ///
479    /// ```xml
480    /// <tag>
481    ///   <key1/>
482    ///   <key2/>
483    ///   <!--...-->
484    /// <tag>
485    /// ```
486    /// could be deserialized to a struct.
487    ///
488    /// If this field is `false`, we processes the one of following XML shapes:
489    ///
490    /// ```xml
491    /// <any-tag>
492    ///   text value
493    /// </any-tag>
494    /// ```
495    /// ```xml
496    /// <any-tag>
497    ///   <![CDATA[cdata value]]>
498    /// </any-tag>
499    /// ```
500    /// ```xml
501    /// <any-tag>
502    ///   <any>...</any>
503    /// </any-tag>
504    /// ```
505    ///
506    /// The whole map represented by an `<any-tag>` element, the map key is
507    /// implicit and equals to the [`VALUE_KEY`] constant, and the value is
508    /// a [`Text`], or a [`Start`] event (the value deserializer will see one of
509    /// those events). In the first two cases the value of this field do not matter
510    /// (because we already see the textual event and there no reasons to look
511    /// "inside" something), but in the last case the primitives should raise
512    /// a deserialization error, because that means that you trying to deserialize
513    /// the following struct:
514    ///
515    /// ```ignore
516    /// struct AnyName {
517    ///   #[serde(rename = "$value")]
518    ///   any_name: String,
519    /// }
520    /// ```
521    /// which means that `any_name` should get a content of the `<any-tag>` element.
522    ///
523    /// Changing this can be valuable for <https://github.com/tafia/quick-xml/issues/383>,
524    /// but those fields should be explicitly marked that they want to get any
525    /// possible markup as a `String` and that mark is different from marking them
526    /// as accepting "text content" which the currently `$text` means.
527    ///
528    /// [`Text`]: DeEvent::Text
529    /// [`Start`]: DeEvent::Start
530    fixed_name: bool,
531}
532
533impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E>
534where
535    R: XmlRead<'de>,
536    E: EntityResolver,
537{
538    /// Returns a next string as concatenated content of consequent [`Text`] and
539    /// [`CData`] events, used inside [`deserialize_primitives!()`].
540    ///
541    /// [`Text`]: crate::events::Event::Text
542    /// [`CData`]: crate::events::Event::CData
543    #[inline]
544    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
545        // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483
546        self.map.de.read_string_impl(self.fixed_name)
547    }
548}
549
550impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
551where
552    R: XmlRead<'de>,
553    E: EntityResolver,
554{
555    type Error = DeError;
556
557    deserialize_primitives!(mut);
558
559    #[inline]
560    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
561    where
562        V: Visitor<'de>,
563    {
564        self.map.de.deserialize_unit(visitor)
565    }
566
567    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
568    where
569        V: Visitor<'de>,
570    {
571        // We cannot use result of `peek()` directly because of borrow checker
572        let _ = self.map.de.peek()?;
573        match self.map.de.last_peeked() {
574            DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
575            DeEvent::Start(start) if self.map.should_skip_subtree(start) => {
576                self.map.de.skip_next_tree()?;
577                visitor.visit_none()
578            }
579            _ => visitor.visit_some(self),
580        }
581    }
582
583    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
584    /// with the same deserializer.
585    fn deserialize_newtype_struct<V>(
586        self,
587        _name: &'static str,
588        visitor: V,
589    ) -> Result<V::Value, Self::Error>
590    where
591        V: Visitor<'de>,
592    {
593        visitor.visit_newtype_struct(self)
594    }
595
596    /// Deserializes each `<tag>` in
597    /// ```xml
598    /// <any-tag>
599    ///   <tag>...</tag>
600    ///   <tag>...</tag>
601    ///   <tag>...</tag>
602    /// </any-tag>
603    /// ```
604    /// as a sequence item, where `<any-tag>` represents a Map in a [`Self::map`],
605    /// and a `<tag>` is a sequential field of that map.
606    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
607    where
608        V: Visitor<'de>,
609    {
610        let filter = if self.fixed_name {
611            match self.map.de.peek()? {
612                // Clone is cheap if event borrows from the input
613                DeEvent::Start(e) => TagFilter::Include(e.clone()),
614                // SAFETY: we use that deserializer with `fixed_name == true`
615                // only from the `ElementMapAccess::next_value_seed` and only when we
616                // peeked `Start` event
617                _ => unreachable!(),
618            }
619        } else {
620            TagFilter::Exclude(self.map.fields, self.map.has_text_field)
621        };
622        visitor.visit_seq(MapValueSeqAccess {
623            #[cfg(feature = "overlapped-lists")]
624            checkpoint: self.map.de.skip_checkpoint(),
625
626            map: self.map,
627            filter,
628        })
629    }
630
631    #[inline]
632    fn deserialize_struct<V>(
633        self,
634        name: &'static str,
635        fields: &'static [&'static str],
636        visitor: V,
637    ) -> Result<V::Value, Self::Error>
638    where
639        V: Visitor<'de>,
640    {
641        self.map.de.deserialize_struct(name, fields, visitor)
642    }
643
644    fn deserialize_enum<V>(
645        self,
646        _name: &'static str,
647        _variants: &'static [&'static str],
648        visitor: V,
649    ) -> Result<V::Value, Self::Error>
650    where
651        V: Visitor<'de>,
652    {
653        if self.fixed_name {
654            match self.map.de.next()? {
655                // Handles <field>UnitEnumVariant</field>
656                DeEvent::Start(e) => {
657                    // skip <field>, read text after it and ensure that it is ended by </field>
658                    let text = self.map.de.read_text(e.name())?;
659                    if text.is_empty() {
660                        // Map empty text (<field/>) to a special `$text` variant
661                        visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into()))
662                    } else {
663                        visitor.visit_enum(SimpleTypeDeserializer::from_text(text))
664                    }
665                }
666                // SAFETY: we use that deserializer with `fixed_name == true`
667                // only from the `MapAccess::next_value_seed` and only when we
668                // peeked `Start` event
669                _ => unreachable!(),
670            }
671        } else {
672            visitor.visit_enum(self)
673        }
674    }
675
676    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
677    where
678        V: Visitor<'de>,
679    {
680        match self.map.de.peek()? {
681            DeEvent::Text(_) => self.deserialize_str(visitor),
682            _ => self.deserialize_map(visitor),
683        }
684    }
685}
686
687impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
688where
689    R: XmlRead<'de>,
690    E: EntityResolver,
691{
692    type Error = DeError;
693    type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>;
694
695    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
696    where
697        V: DeserializeSeed<'de>,
698    {
699        let (name, is_text) = match self.map.de.peek()? {
700            DeEvent::Start(e) => (seed.deserialize(QNameDeserializer::from_elem(e)?)?, false),
701            DeEvent::Text(_) => (
702                seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
703                true,
704            ),
705            // SAFETY: we use that deserializer only when we peeked `Start` or `Text` event
706            _ => unreachable!(),
707        };
708        Ok((
709            name,
710            MapValueVariantAccess {
711                map: self.map,
712                is_text,
713            },
714        ))
715    }
716}
717
718struct MapValueVariantAccess<'de, 'd, 'm, R, E>
719where
720    R: XmlRead<'de>,
721    E: EntityResolver,
722{
723    /// Access to the map that created this enum accessor. Gives access to the
724    /// context, such as list of fields, that current map known about.
725    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
726    /// `true` if variant should be deserialized from a textual content
727    /// and `false` if from tag
728    is_text: bool,
729}
730
731impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E>
732where
733    R: XmlRead<'de>,
734    E: EntityResolver,
735{
736    type Error = DeError;
737
738    fn unit_variant(self) -> Result<(), Self::Error> {
739        match self.map.de.next()? {
740            // Consume subtree
741            DeEvent::Start(e) => self.map.de.read_to_end(e.name()),
742            // Does not needed to deserialize using SimpleTypeDeserializer, because
743            // it returns `()` when `deserialize_unit()` is requested
744            DeEvent::Text(_) => Ok(()),
745            // SAFETY: the other events are filtered in `variant_seed()`
746            _ => unreachable!("Only `Start` or `Text` events are possible here"),
747        }
748    }
749
750    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
751    where
752        T: DeserializeSeed<'de>,
753    {
754        if self.is_text {
755            match self.map.de.next()? {
756                DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
757                // SAFETY: the other events are filtered in `variant_seed()`
758                _ => unreachable!("Only `Text` events are possible here"),
759            }
760        } else {
761            seed.deserialize(MapValueDeserializer {
762                map: self.map,
763                // Because element name already was either mapped to a field name,
764                // or to a variant name, we should not treat it as variable
765                fixed_name: true,
766            })
767        }
768    }
769
770    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
771    where
772        V: Visitor<'de>,
773    {
774        if self.is_text {
775            match self.map.de.next()? {
776                DeEvent::Text(e) => {
777                    SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor)
778                }
779                // SAFETY: the other events are filtered in `variant_seed()`
780                _ => unreachable!("Only `Text` events are possible here"),
781            }
782        } else {
783            MapValueDeserializer {
784                map: self.map,
785                // Because element name already was either mapped to a field name,
786                // or to a variant name, we should not treat it as variable
787                fixed_name: true,
788            }
789            .deserialize_tuple(len, visitor)
790        }
791    }
792
793    fn struct_variant<V>(
794        self,
795        fields: &'static [&'static str],
796        visitor: V,
797    ) -> Result<V::Value, Self::Error>
798    where
799        V: Visitor<'de>,
800    {
801        match self.map.de.next()? {
802            DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)),
803            DeEvent::Text(e) => {
804                SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor)
805            }
806            // SAFETY: the other events are filtered in `variant_seed()`
807            _ => unreachable!("Only `Start` or `Text` events are possible here"),
808        }
809    }
810}
811
812////////////////////////////////////////////////////////////////////////////////////////////////////
813
814/// Check if tag `start` is included in the `fields` list. `decoder` is used to
815/// get a string representation of a tag.
816///
817/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
818fn not_in(fields: &'static [&'static str], start: &BytesStart) -> Result<bool, DeError> {
819    let tag = start.decoder().decode(start.local_name().into_inner())?;
820
821    Ok(fields.iter().all(|&field| field != tag.as_ref()))
822}
823
824/// A filter that determines, what tags should form a sequence.
825///
826/// There are two types of sequences:
827/// - sequence where each element represented by tags with the same name
828/// - sequence where each element can have a different tag
829///
830/// The first variant could represent a collection of structs, the second --
831/// a collection of enum variants.
832///
833/// In the second case we don't know what tag name should be expected as a
834/// sequence element, so we accept any element. Since the sequence are flattened
835/// into maps, we skip elements which have dedicated fields in a struct by using an
836/// `Exclude` filter that filters out elements with names matching field names
837/// from the struct.
838///
839/// # Lifetimes
840///
841/// `'de` represents a lifetime of the XML input, when filter stores the
842/// dedicated tag name
843#[derive(Debug)]
844enum TagFilter<'de> {
845    /// A `SeqAccess` interested only in tags with specified name to deserialize
846    /// an XML like this:
847    ///
848    /// ```xml
849    /// <...>
850    ///   <tag/>
851    ///   <tag/>
852    ///   <tag/>
853    ///   ...
854    /// </...>
855    /// ```
856    ///
857    /// The tag name is stored inside (`b"tag"` for that example)
858    Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
859    /// A `SeqAccess` interested in tags with any name, except explicitly listed.
860    /// Excluded tags are used as struct field names and therefore should not
861    /// fall into a `$value` category.
862    ///
863    /// The `bool` represents the having of a `$text` special field in fields array.
864    /// It is used to exclude text events when `$text` fields is defined together with
865    /// `$value` fieldб and `$value` accepts sequence.
866    Exclude(&'static [&'static str], bool),
867}
868
869impl<'de> TagFilter<'de> {
870    fn is_suitable(&self, start: &BytesStart) -> Result<bool, DeError> {
871        match self {
872            Self::Include(n) => Ok(n.name() == start.name()),
873            Self::Exclude(fields, _) => not_in(fields, start),
874        }
875    }
876    const fn need_skip_text(&self) -> bool {
877        match self {
878            // If we look only for tags, we should skip any $text keys
879            Self::Include(_) => true,
880            // If we look fo any data, we should exclude $text keys if it in the list
881            Self::Exclude(_, has_text_field) => *has_text_field,
882        }
883    }
884}
885
886////////////////////////////////////////////////////////////////////////////////////////////////////
887
888/// An accessor to sequence elements forming a value for struct field.
889/// Technically, this sequence is flattened out into structure and sequence
890/// elements are overlapped with other fields of a structure. Each call to
891/// [`Self::next_element_seed`] consumes a next sub-tree or consequent list
892/// of [`Text`] and [`CData`] events.
893///
894/// ```xml
895/// <>
896///   ...
897///   <item>The is the one item</item>
898///   This is <![CDATA[one another]]> item<!-- even when--> it splitted by comments
899///   <tag>...and that is the third!</tag>
900///   ...
901/// </>
902/// ```
903///
904/// Depending on [`Self::filter`], only some of that possible constructs would be
905/// an element.
906///
907/// [`Text`]: crate::events::Event::Text
908/// [`CData`]: crate::events::Event::CData
909struct MapValueSeqAccess<'de, 'd, 'm, R, E>
910where
911    R: XmlRead<'de>,
912    E: EntityResolver,
913{
914    /// Accessor to a map that creates this accessor and to a deserializer for
915    /// a sequence items.
916    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
917    /// Filter that determines whether a tag is a part of this sequence.
918    ///
919    /// When feature [`overlapped-lists`] is not activated, iteration will stop
920    /// when found a tag that does not pass this filter.
921    ///
922    /// When feature [`overlapped-lists`] is activated, all tags, that not pass
923    /// this check, will be skipped.
924    ///
925    /// [`overlapped-lists`]: ../../index.html#overlapped-lists
926    filter: TagFilter<'de>,
927
928    /// Checkpoint after which all skipped events should be returned. All events,
929    /// that was skipped before creating this checkpoint, will still stay buffered
930    /// and will not be returned
931    #[cfg(feature = "overlapped-lists")]
932    checkpoint: usize,
933}
934
935#[cfg(feature = "overlapped-lists")]
936impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E>
937where
938    R: XmlRead<'de>,
939    E: EntityResolver,
940{
941    fn drop(&mut self) {
942        self.map.de.start_replay(self.checkpoint);
943    }
944}
945
946impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E>
947where
948    R: XmlRead<'de>,
949    E: EntityResolver,
950{
951    type Error = DeError;
952
953    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, DeError>
954    where
955        T: DeserializeSeed<'de>,
956    {
957        loop {
958            self.map.skip_whitespaces()?;
959            break match self.map.de.peek()? {
960                // If we see a tag that we not interested, skip it
961                #[cfg(feature = "overlapped-lists")]
962                DeEvent::Start(e) if !self.filter.is_suitable(e)? => {
963                    self.map.de.skip()?;
964                    continue;
965                }
966                // Skip any text events if sequence expects only specific tag names
967                #[cfg(feature = "overlapped-lists")]
968                DeEvent::Text(_) if self.filter.need_skip_text() => {
969                    self.map.de.skip()?;
970                    continue;
971                }
972                // Stop iteration when list elements ends
973                #[cfg(not(feature = "overlapped-lists"))]
974                DeEvent::Start(e) if !self.filter.is_suitable(e)? => Ok(None),
975                #[cfg(not(feature = "overlapped-lists"))]
976                DeEvent::Text(_) if self.filter.need_skip_text() => Ok(None),
977
978                // Stop iteration after reaching a closing tag
979                // The matching tag name is guaranteed by the reader
980                DeEvent::End(e) => {
981                    debug_assert_eq!(self.map.start.name(), e.name());
982                    Ok(None)
983                }
984                // We cannot get `Eof` legally, because we always inside of the
985                // opened tag `self.map.start`
986                DeEvent::Eof => {
987                    Err(Error::missed_end(self.map.start.name(), self.map.start.decoder()).into())
988                }
989
990                DeEvent::Text(_) => match self.map.de.next()? {
991                    DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some),
992                    // SAFETY: we just checked that the next event is Text
993                    _ => unreachable!(),
994                },
995                DeEvent::Start(_) => match self.map.de.next()? {
996                    DeEvent::Start(start) => seed
997                        .deserialize(ElementDeserializer {
998                            start,
999                            de: self.map.de,
1000                        })
1001                        .map(Some),
1002                    // SAFETY: we just checked that the next event is Start
1003                    _ => unreachable!(),
1004                },
1005            };
1006        }
1007    }
1008}
1009
1010////////////////////////////////////////////////////////////////////////////////////////////////////
1011
1012/// A deserializer for a single tag item of a mixed sequence of tags and text.
1013///
1014/// This deserializer are very similar to a [`MapValueDeserializer`] (when it
1015/// processes the [`DeEvent::Start`] event). The only difference in the
1016/// [`deserialize_seq`] method. This deserializer will perform deserialization
1017/// from the textual content between start and end events, whereas the
1018/// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
1019///
1020/// This deserializer processes items as following:
1021/// - numbers are parsed from a text content between tags using [`FromStr`]. So,
1022///   `<int>123</int>` can be deserialized into an `u32`;
1023/// - booleans converted from a text content between tags according to the XML
1024///   [specification]:
1025///   - `"true"` and `"1"` converted to `true`;
1026///   - `"false"` and `"0"` converted to `false`;
1027/// - strings returned as a text content between tags;
1028/// - characters also returned as strings. If string contain more than one character
1029///   or empty, it is responsibility of a type to return an error;
1030/// - `Option` are always deserialized as `Some` using the same deserializer,
1031///   including `<tag/>` or `<tag></tag>`;
1032/// - units (`()`) and unit structs consumes the whole element subtree;
1033/// - newtype structs forwards deserialization to the inner type using
1034///   [`SimpleTypeDeserializer`];
1035/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
1036///   (this is the difference): text content between tags is passed to
1037///   [`SimpleTypeDeserializer`];
1038/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
1039/// - enums:
1040///   - the variant name is deserialized using [`QNameDeserializer`] from the element name;
1041///   - the content is deserialized using the same deserializer:
1042///     - unit variants: consuming a subtree and return `()`;
1043///     - newtype variants forwards deserialization to the inner type using
1044///       this deserializer;
1045///     - tuple variants: call [`deserialize_tuple`] of this deserializer;
1046///     - struct variants: call [`deserialize_struct`] of this deserializer.
1047///
1048/// [`deserialize_seq`]: #method.deserialize_seq
1049/// [`FromStr`]: std::str::FromStr
1050/// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean
1051/// [`deserialize_tuple`]: #method.deserialize_tuple
1052/// [`deserialize_struct`]: #method.deserialize_struct
1053struct ElementDeserializer<'de, 'd, R, E>
1054where
1055    R: XmlRead<'de>,
1056    E: EntityResolver,
1057{
1058    start: BytesStart<'de>,
1059    de: &'d mut Deserializer<'de, R, E>,
1060}
1061
1062impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E>
1063where
1064    R: XmlRead<'de>,
1065    E: EntityResolver,
1066{
1067    /// Returns a next string as concatenated content of consequent [`Text`] and
1068    /// [`CData`] events, used inside [`deserialize_primitives!()`].
1069    ///
1070    /// [`Text`]: crate::events::Event::Text
1071    /// [`CData`]: crate::events::Event::CData
1072    #[inline]
1073    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
1074        self.de.read_text(self.start.name())
1075    }
1076}
1077
1078impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E>
1079where
1080    R: XmlRead<'de>,
1081    E: EntityResolver,
1082{
1083    type Error = DeError;
1084
1085    deserialize_primitives!(mut);
1086
1087    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1088    where
1089        V: Visitor<'de>,
1090    {
1091        // Consume subtree
1092        self.de.read_to_end(self.start.name())?;
1093        visitor.visit_unit()
1094    }
1095
1096    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1097    where
1098        V: Visitor<'de>,
1099    {
1100        visitor.visit_some(self)
1101    }
1102
1103    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
1104    /// with this deserializer.
1105    fn deserialize_newtype_struct<V>(
1106        self,
1107        _name: &'static str,
1108        visitor: V,
1109    ) -> Result<V::Value, Self::Error>
1110    where
1111        V: Visitor<'de>,
1112    {
1113        visitor.visit_newtype_struct(self)
1114    }
1115
1116    /// This method deserializes a sequence inside of element that itself is a
1117    /// sequence element:
1118    ///
1119    /// ```xml
1120    /// <>
1121    ///   ...
1122    ///   <self>inner sequence</self>
1123    ///   <self>inner sequence</self>
1124    ///   <self>inner sequence</self>
1125    ///   ...
1126    /// </>
1127    /// ```
1128    fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
1129    where
1130        V: Visitor<'de>,
1131    {
1132        let text = self.read_string()?;
1133        SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
1134    }
1135
1136    fn deserialize_struct<V>(
1137        self,
1138        _name: &'static str,
1139        fields: &'static [&'static str],
1140        visitor: V,
1141    ) -> Result<V::Value, Self::Error>
1142    where
1143        V: Visitor<'de>,
1144    {
1145        visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields))
1146    }
1147
1148    fn deserialize_enum<V>(
1149        self,
1150        _name: &'static str,
1151        _variants: &'static [&'static str],
1152        visitor: V,
1153    ) -> Result<V::Value, Self::Error>
1154    where
1155        V: Visitor<'de>,
1156    {
1157        visitor.visit_enum(self)
1158    }
1159
1160    #[inline]
1161    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1162    where
1163        V: Visitor<'de>,
1164    {
1165        self.deserialize_map(visitor)
1166    }
1167}
1168
1169impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1170where
1171    R: XmlRead<'de>,
1172    E: EntityResolver,
1173{
1174    type Error = DeError;
1175    type Variant = Self;
1176
1177    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
1178    where
1179        V: DeserializeSeed<'de>,
1180    {
1181        let name = seed.deserialize(QNameDeserializer::from_elem(&self.start)?)?;
1182        Ok((name, self))
1183    }
1184}
1185
1186impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1187where
1188    R: XmlRead<'de>,
1189    E: EntityResolver,
1190{
1191    type Error = DeError;
1192
1193    fn unit_variant(self) -> Result<(), Self::Error> {
1194        // Consume subtree
1195        self.de.read_to_end(self.start.name())
1196    }
1197
1198    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
1199    where
1200        T: DeserializeSeed<'de>,
1201    {
1202        seed.deserialize(self)
1203    }
1204
1205    #[inline]
1206    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
1207    where
1208        V: Visitor<'de>,
1209    {
1210        self.deserialize_tuple(len, visitor)
1211    }
1212
1213    #[inline]
1214    fn struct_variant<V>(
1215        self,
1216        fields: &'static [&'static str],
1217        visitor: V,
1218    ) -> Result<V::Value, Self::Error>
1219    where
1220        V: Visitor<'de>,
1221    {
1222        self.deserialize_struct("", fields, visitor)
1223    }
1224}
1225
1226////////////////////////////////////////////////////////////////////////////////////////////////////
1227
1228#[test]
1229fn test_not_in() {
1230    use pretty_assertions::assert_eq;
1231
1232    let tag = BytesStart::new("tag");
1233
1234    assert_eq!(not_in(&[], &tag).unwrap(), true);
1235    assert_eq!(not_in(&["no", "such", "tags"], &tag).unwrap(), true);
1236    assert_eq!(not_in(&["some", "tag", "included"], &tag).unwrap(), false);
1237
1238    let tag_ns = BytesStart::new("ns1:tag");
1239    assert_eq!(not_in(&["no", "such", "tags"], &tag_ns).unwrap(), true);
1240    assert_eq!(
1241        not_in(&["some", "tag", "included"], &tag_ns).unwrap(),
1242        false
1243    );
1244    assert_eq!(
1245        not_in(&["some", "namespace", "ns1:tag"], &tag_ns).unwrap(),
1246        true
1247    );
1248}