quick_xml/de/map.rs
1//! Serde `Deserializer` module
2
3use crate::{
4 de::key::QNameDeserializer,
5 de::resolver::EntityResolver,
6 de::simple_type::SimpleTypeDeserializer,
7 de::text::TextDeserializer,
8 de::{DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
9 errors::serialize::DeError,
10 errors::Error,
11 events::attributes::IterState,
12 events::BytesStart,
13 name::QName,
14};
15use serde::de::value::BorrowedStrDeserializer;
16use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
17use std::borrow::Cow;
18use std::ops::Range;
19
20/// Defines a source that should be used to deserialize a value in the next call
21/// to [`next_value_seed()`](MapAccess::next_value_seed)
22#[derive(Debug, PartialEq)]
23enum ValueSource {
24 /// Source are not specified, because [`next_key_seed()`] not yet called.
25 /// This is an initial state and state after deserializing value
26 /// (after call of [`next_value_seed()`]).
27 ///
28 /// Attempt to call [`next_value_seed()`] while accessor in this state would
29 /// return a [`DeError::KeyNotRead`] error.
30 ///
31 /// [`next_key_seed()`]: MapAccess::next_key_seed
32 /// [`next_value_seed()`]: MapAccess::next_value_seed
33 Unknown,
34 /// Next value should be deserialized from an attribute value; value is located
35 /// at specified span.
36 Attribute(Range<usize>),
37 /// Value should be deserialized from the text content of the XML node, which
38 /// represented or by an ordinary text node, or by a CDATA node:
39 ///
40 /// ```xml
41 /// <any-tag>
42 /// <key>text content</key>
43 /// <!-- ^^^^^^^^^^^^ - this will be used to deserialize map value -->
44 /// </any-tag>
45 /// ```
46 /// ```xml
47 /// <any-tag>
48 /// <key><![CDATA[cdata content]]></key>
49 /// <!-- ^^^^^^^^^^^^^ - this will be used to deserialize a map value -->
50 /// </any-tag>
51 /// ```
52 Text,
53 /// Next value should be deserialized from an element with an any name, except
54 /// elements with a name matching one of the struct fields. Corresponding tag
55 /// name will always be associated with a field with name [`VALUE_KEY`].
56 ///
57 /// That state is set when call to [`peek()`] returns a [`Start`] event, which
58 /// [`name()`] is not listed in the [list of known fields] (which for a struct
59 /// is a list of field names, and for a map that is an empty list), _and_
60 /// struct has a field with a special name [`VALUE_KEY`].
61 ///
62 /// When in this state, next event, returned by [`next()`], will be a [`Start`],
63 /// which represents both a key, and a value. Value would be deserialized from
64 /// the whole element and how is will be done determined by the value deserializer.
65 /// The [`ElementMapAccess`] do not consume any events in that state.
66 ///
67 /// Because in that state any encountered `<tag>` is mapped to the [`VALUE_KEY`]
68 /// field, it is possible to use tag name as an enum discriminator, so `enum`s
69 /// can be deserialized from that XMLs:
70 ///
71 /// ```xml
72 /// <any-tag>
73 /// <variant1>...</variant1>
74 /// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant1 -->
75 /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
76 /// </any-tag>
77 /// ```
78 /// ```xml
79 /// <any-tag>
80 /// <variant2>...</variant2>
81 /// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant2 -->
82 /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
83 /// </any-tag>
84 /// ```
85 ///
86 /// both can be deserialized into
87 ///
88 /// ```ignore
89 /// enum Enum {
90 /// variant1,
91 /// variant2,
92 /// }
93 /// struct AnyName {
94 /// #[serde(rename = "$value")]
95 /// field: Enum,
96 /// }
97 /// ```
98 ///
99 /// That is possible, because value deserializer have access to the full content
100 /// of a `<variant1>...</variant1>` or `<variant2>...</variant2>` node, including
101 /// the tag name.
102 ///
103 /// [`Start`]: DeEvent::Start
104 /// [`peek()`]: Deserializer::peek()
105 /// [`next()`]: Deserializer::next()
106 /// [`name()`]: BytesStart::name()
107 /// [`Text`]: Self::Text
108 /// [list of known fields]: ElementMapAccess::fields
109 Content,
110 /// Next value should be deserialized from an element with a dedicated name.
111 /// If deserialized type is a sequence, then that sequence will collect all
112 /// elements with the same name until it will be filled. If not all elements
113 /// would be consumed, the rest will be ignored.
114 ///
115 /// That state is set when call to [`peek()`] returns a [`Start`] event, which
116 /// [`name()`] represents a field name. That name will be deserialized as a key.
117 ///
118 /// When in this state, next event, returned by [`next()`], will be a [`Start`],
119 /// which represents both a key, and a value. Value would be deserialized from
120 /// the whole element and how is will be done determined by the value deserializer.
121 /// The [`ElementMapAccess`] do not consume any events in that state.
122 ///
123 /// An illustration below shows, what data is used to deserialize key and value:
124 /// ```xml
125 /// <any-tag>
126 /// <key>...</key>
127 /// <!-- ~~~ - this data will be used to deserialize a map key -->
128 /// <!--^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
129 /// </any-tag>
130 /// ```
131 ///
132 /// Although value deserializer will have access to the full content of a `<key>`
133 /// node (including the tag name), it will not get much benefits from that,
134 /// because tag name will always be fixed for a given map field (equal to a
135 /// field name). So, if the field type is an `enum`, it cannot select its
136 /// variant based on the tag name. If that is needed, then [`Content`] variant
137 /// of this enum should be used. Such usage is enabled by annotating a struct
138 /// field as "content" field, which implemented as given the field a special
139 /// [`VALUE_KEY`] name.
140 ///
141 /// [`Start`]: DeEvent::Start
142 /// [`peek()`]: Deserializer::peek()
143 /// [`next()`]: Deserializer::next()
144 /// [`name()`]: BytesStart::name()
145 /// [`Content`]: Self::Content
146 Nested,
147}
148
149////////////////////////////////////////////////////////////////////////////////////////////////////
150
151/// A deserializer that extracts map-like structures from an XML. This deserializer
152/// represents a one XML tag:
153///
154/// ```xml
155/// <tag>...</tag>
156/// ```
157///
158/// Name of this tag is stored in a [`Self::start`] property.
159///
160/// # Lifetimes
161///
162/// - `'de` lifetime represents a buffer, from which deserialized values can
163/// borrow their data. Depending on the underlying reader, there can be an
164/// internal buffer of deserializer (i.e. deserializer itself) or an input
165/// (in that case it is possible to approach zero-copy deserialization).
166///
167/// - `'d` lifetime represents a parent deserializer, which could own the data
168/// buffer.
169pub(crate) struct ElementMapAccess<'de, 'd, R, E>
170where
171 R: XmlRead<'de>,
172 E: EntityResolver,
173{
174 /// Tag -- owner of attributes
175 start: BytesStart<'de>,
176 de: &'d mut Deserializer<'de, R, E>,
177 /// State of the iterator over attributes. Contains the next position in the
178 /// inner `start` slice, from which next attribute should be parsed.
179 iter: IterState,
180 /// Current state of the accessor that determines what next call to API
181 /// methods should return.
182 source: ValueSource,
183 /// List of field names of the struct. It is empty for maps
184 fields: &'static [&'static str],
185 /// If `true`, then the deserialized struct has a field with a special name:
186 /// [`VALUE_KEY`]. That field should be deserialized from the whole content
187 /// of an XML node, including tag name:
188 ///
189 /// ```xml
190 /// <tag>value for VALUE_KEY field<tag>
191 /// ```
192 has_value_field: bool,
193 /// If `true`, then the deserialized struct has a field with a special name:
194 /// [`TEXT_KEY`].
195 has_text_field: bool,
196}
197
198impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E>
199where
200 R: XmlRead<'de>,
201 E: EntityResolver,
202{
203 /// Create a new ElementMapAccess
204 pub fn new(
205 de: &'d mut Deserializer<'de, R, E>,
206 start: BytesStart<'de>,
207 fields: &'static [&'static str],
208 ) -> Self {
209 Self {
210 de,
211 iter: IterState::new(start.name().as_ref().len(), false),
212 start,
213 source: ValueSource::Unknown,
214 fields,
215 has_value_field: fields.contains(&VALUE_KEY),
216 has_text_field: fields.contains(&TEXT_KEY),
217 }
218 }
219
220 /// Determines if subtree started with the specified event shoould be skipped.
221 ///
222 /// Used to map elements with `xsi:nil` attribute set to true to `None` in optional contexts.
223 ///
224 /// We need to handle two attributes:
225 /// - on parent element: `<map xsi:nil="true"><foo/></map>`
226 /// - on this element: `<map><foo xsi:nil="true"/></map>`
227 ///
228 /// We check parent element too because `xsi:nil` affects only nested elements of the
229 /// tag where it is defined. We can map structure with fields mapped to attributes to
230 /// the `<map>` element and set to `None` all its optional elements.
231 fn should_skip_subtree(&self, start: &BytesStart) -> bool {
232 self.de.reader.reader.has_nil_attr(&self.start) || self.de.reader.reader.has_nil_attr(start)
233 }
234
235 /// Skips whitespaces when they are not preserved
236 #[inline]
237 fn skip_whitespaces(&mut self) -> Result<(), DeError> {
238 // TODO: respect the `xml:space` attribute and probably some deserialized type sign
239 self.de.skip_whitespaces()
240 }
241}
242
243impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E>
244where
245 R: XmlRead<'de>,
246 E: EntityResolver,
247{
248 type Error = DeError;
249
250 fn next_key_seed<K: DeserializeSeed<'de>>(
251 &mut self,
252 seed: K,
253 ) -> Result<Option<K::Value>, Self::Error> {
254 debug_assert_eq!(self.source, ValueSource::Unknown);
255
256 // FIXME: There error positions counted from the start of tag name - need global position
257 let slice = &self.start.buf;
258 let decoder = self.start.decoder();
259
260 if let Some(a) = self.iter.next(slice).transpose()? {
261 // try getting map from attributes (key= "value")
262 let (key, value) = a.into();
263 self.source = ValueSource::Attribute(value.unwrap_or_default());
264
265 // Attributes in mapping starts from @ prefix
266 // TODO: Customization point - may customize prefix
267 self.de.key_buf.clear();
268 self.de.key_buf.push('@');
269
270 let de =
271 QNameDeserializer::from_attr(QName(&slice[key]), decoder, &mut self.de.key_buf)?;
272 seed.deserialize(de).map(Some)
273 } else {
274 self.skip_whitespaces()?;
275 // try getting from events (<key>value</key>)
276 match self.de.peek()? {
277 // If we have dedicated "$text" field, it will not be passed to "$value" field
278 DeEvent::Text(_) if self.has_value_field && !self.has_text_field => {
279 self.source = ValueSource::Content;
280 // Deserialize `key` from special attribute name which means
281 // that value should be taken from the text content of the
282 // XML node
283 let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
284 seed.deserialize(de).map(Some)
285 }
286 DeEvent::Text(_) => {
287 self.source = ValueSource::Text;
288 // Deserialize `key` from special attribute name which means
289 // that value should be taken from the text content of the
290 // XML node
291 let de = BorrowedStrDeserializer::<DeError>::new(TEXT_KEY);
292 seed.deserialize(de).map(Some)
293 }
294 // Used to deserialize collections of enums, like:
295 // <root>
296 // <A/>
297 // <B/>
298 // <C/>
299 // </root>
300 //
301 // into
302 //
303 // enum Enum { A, B, С }
304 // struct Root {
305 // #[serde(rename = "$value")]
306 // items: Vec<Enum>,
307 // }
308 // TODO: This should be handled by #[serde(flatten)]
309 // See https://github.com/serde-rs/serde/issues/1905
310 DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e)? => {
311 self.source = ValueSource::Content;
312
313 let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
314 seed.deserialize(de).map(Some)
315 }
316 DeEvent::Start(e) => {
317 self.source = ValueSource::Nested;
318
319 let de = QNameDeserializer::from_elem(e)?;
320 seed.deserialize(de).map(Some)
321 }
322 // Stop iteration after reaching a closing tag
323 // The matching tag name is guaranteed by the reader if our
324 // deserializer implementation is correct
325 DeEvent::End(e) => {
326 debug_assert_eq!(self.start.name(), e.name());
327 // Consume End
328 self.de.next()?;
329 Ok(None)
330 }
331 // We cannot get `Eof` legally, because we always inside of the
332 // opened tag `self.start`
333 DeEvent::Eof => {
334 Err(Error::missed_end(self.start.name(), self.start.decoder()).into())
335 }
336 }
337 }
338 }
339
340 fn next_value_seed<K: DeserializeSeed<'de>>(
341 &mut self,
342 seed: K,
343 ) -> Result<K::Value, Self::Error> {
344 match std::mem::replace(&mut self.source, ValueSource::Unknown) {
345 ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
346 &self.start.buf,
347 value,
348 self.start.decoder(),
349 )),
350 // This arm processes the following XML shape:
351 // <any-tag>
352 // text value
353 // </any-tag>
354 // The whole map represented by an `<any-tag>` element, the map key
355 // is implicit and equals to the `TEXT_KEY` constant, and the value
356 // is a `Text` event (the value deserializer will see that event)
357 // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
358 ValueSource::Text => match self.de.next()? {
359 DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
360 // SAFETY: We set `Text` only when we seen `Text`
361 _ => unreachable!(),
362 },
363 // This arm processes the following XML shape:
364 // <any-tag>
365 // <any>...</any>
366 // </any-tag>
367 // The whole map represented by an `<any-tag>` element, the map key
368 // is implicit and equals to the `VALUE_KEY` constant, and the value
369 // is a `Start` event (the value deserializer will see that event)
370 ValueSource::Content => seed.deserialize(MapValueDeserializer {
371 map: self,
372 fixed_name: false,
373 }),
374 // This arm processes the following XML shape:
375 // <any-tag>
376 // <tag>...</tag>
377 // </any-tag>
378 // The whole map represented by an `<any-tag>` element, the map key
379 // is a `tag`, and the value is a `Start` event (the value deserializer
380 // will see that event)
381 ValueSource::Nested => seed.deserialize(MapValueDeserializer {
382 map: self,
383 fixed_name: true,
384 }),
385 ValueSource::Unknown => Err(DeError::KeyNotRead),
386 }
387 }
388}
389
390////////////////////////////////////////////////////////////////////////////////////////////////////
391
392/// A deserializer for a value of map or struct. That deserializer slightly
393/// differently processes events for a primitive types and sequences than
394/// a [`Deserializer`].
395///
396/// This deserializer used to deserialize two kinds of fields:
397/// - usual fields with a dedicated name, such as `field_one` or `field_two`, in
398/// that case field [`Self::fixed_name`] is `true`;
399/// - the special `$value` field which represents any tag or a textual content
400/// in the XML which would be found in the document, in that case field
401/// [`Self::fixed_name`] is `false`.
402///
403/// This deserializer can see two kind of events at the start:
404/// - [`DeEvent::Text`]
405/// - [`DeEvent::Start`]
406///
407/// which represents two possible variants of items:
408/// ```xml
409/// <item>A tag item</item>
410/// A text item
411/// <yet another="tag item"/>
412/// ```
413///
414/// This deserializer are very similar to a [`ElementDeserializer`]. The only difference
415/// in the `deserialize_seq` method. This deserializer will act as an iterator
416/// over tags / text within it's parent tag, whereas the [`ElementDeserializer`]
417/// will represent sequences as an `xs:list`.
418///
419/// This deserializer processes items as following:
420/// - primitives (numbers, booleans, strings, characters) are deserialized either
421/// from a text content, or unwrapped from a one level of a tag. So, `123` and
422/// `<int>123</int>` both can be deserialized into an `u32`;
423/// - `Option`:
424/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
425/// - everything else are deserialized as `Some` using the same deserializer,
426/// including `<tag/>` or `<tag></tag>`;
427/// - units (`()`) and unit structs consumes the whole text or element subtree;
428/// - newtype structs are deserialized by forwarding deserialization of inner type
429/// with the same deserializer;
430/// - sequences, tuples and tuple structs are deserialized by iterating within the
431/// parent tag and deserializing each tag or text content using [`ElementDeserializer`];
432/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
433/// - enums:
434/// - in case of [`DeEvent::Text`] event the text content is deserialized as
435/// a `$text` variant. Enum content is deserialized from the text using
436/// [`SimpleTypeDeserializer`];
437/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
438/// an enum tag, and the content inside are deserialized as an enum content.
439/// Depending on a variant kind deserialization is performed as:
440/// - unit variants: consuming text content or a subtree;
441/// - newtype variants: forward deserialization to the inner type using
442/// this deserializer;
443/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
444/// - struct variants: call [`deserialize_struct`] of this deserializer.
445///
446/// [`deserialize_tuple`]: #method.deserialize_tuple
447/// [`deserialize_struct`]: #method.deserialize_struct
448struct MapValueDeserializer<'de, 'd, 'm, R, E>
449where
450 R: XmlRead<'de>,
451 E: EntityResolver,
452{
453 /// Access to the map that created this deserializer. Gives access to the
454 /// context, such as list of fields, that current map known about.
455 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
456 /// Whether this deserializer was created for deserialization from an element
457 /// with fixed name, or the elements with different names or even text are allowed.
458 ///
459 /// If this field is `true`, we process `<tag>` element in the following XML shape:
460 ///
461 /// ```xml
462 /// <any-tag>
463 /// <tag>...</tag>
464 /// </any-tag>
465 /// ```
466 ///
467 /// The whole map represented by an `<any-tag>` element, the map key is a `tag`,
468 /// and the value starts with is a `Start("tag")` (the value deserializer will
469 /// see that event first) and extended to the matching `End("tag")` event.
470 /// In order to deserialize primitives (such as `usize`) we need to allow to
471 /// look inside the one levels of tags, so the
472 ///
473 /// ```xml
474 /// <tag>42<tag>
475 /// ```
476 ///
477 /// could be deserialized into `42usize` without problems, and at the same time
478 ///
479 /// ```xml
480 /// <tag>
481 /// <key1/>
482 /// <key2/>
483 /// <!--...-->
484 /// <tag>
485 /// ```
486 /// could be deserialized to a struct.
487 ///
488 /// If this field is `false`, we processes the one of following XML shapes:
489 ///
490 /// ```xml
491 /// <any-tag>
492 /// text value
493 /// </any-tag>
494 /// ```
495 /// ```xml
496 /// <any-tag>
497 /// <![CDATA[cdata value]]>
498 /// </any-tag>
499 /// ```
500 /// ```xml
501 /// <any-tag>
502 /// <any>...</any>
503 /// </any-tag>
504 /// ```
505 ///
506 /// The whole map represented by an `<any-tag>` element, the map key is
507 /// implicit and equals to the [`VALUE_KEY`] constant, and the value is
508 /// a [`Text`], or a [`Start`] event (the value deserializer will see one of
509 /// those events). In the first two cases the value of this field do not matter
510 /// (because we already see the textual event and there no reasons to look
511 /// "inside" something), but in the last case the primitives should raise
512 /// a deserialization error, because that means that you trying to deserialize
513 /// the following struct:
514 ///
515 /// ```ignore
516 /// struct AnyName {
517 /// #[serde(rename = "$value")]
518 /// any_name: String,
519 /// }
520 /// ```
521 /// which means that `any_name` should get a content of the `<any-tag>` element.
522 ///
523 /// Changing this can be valuable for <https://github.com/tafia/quick-xml/issues/383>,
524 /// but those fields should be explicitly marked that they want to get any
525 /// possible markup as a `String` and that mark is different from marking them
526 /// as accepting "text content" which the currently `$text` means.
527 ///
528 /// [`Text`]: DeEvent::Text
529 /// [`Start`]: DeEvent::Start
530 fixed_name: bool,
531}
532
533impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E>
534where
535 R: XmlRead<'de>,
536 E: EntityResolver,
537{
538 /// Returns a next string as concatenated content of consequent [`Text`] and
539 /// [`CData`] events, used inside [`deserialize_primitives!()`].
540 ///
541 /// [`Text`]: crate::events::Event::Text
542 /// [`CData`]: crate::events::Event::CData
543 #[inline]
544 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
545 // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483
546 self.map.de.read_string_impl(self.fixed_name)
547 }
548}
549
550impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
551where
552 R: XmlRead<'de>,
553 E: EntityResolver,
554{
555 type Error = DeError;
556
557 deserialize_primitives!(mut);
558
559 #[inline]
560 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
561 where
562 V: Visitor<'de>,
563 {
564 self.map.de.deserialize_unit(visitor)
565 }
566
567 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
568 where
569 V: Visitor<'de>,
570 {
571 // We cannot use result of `peek()` directly because of borrow checker
572 let _ = self.map.de.peek()?;
573 match self.map.de.last_peeked() {
574 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
575 DeEvent::Start(start) if self.map.should_skip_subtree(start) => {
576 self.map.de.skip_next_tree()?;
577 visitor.visit_none()
578 }
579 _ => visitor.visit_some(self),
580 }
581 }
582
583 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
584 /// with the same deserializer.
585 fn deserialize_newtype_struct<V>(
586 self,
587 _name: &'static str,
588 visitor: V,
589 ) -> Result<V::Value, Self::Error>
590 where
591 V: Visitor<'de>,
592 {
593 visitor.visit_newtype_struct(self)
594 }
595
596 /// Deserializes each `<tag>` in
597 /// ```xml
598 /// <any-tag>
599 /// <tag>...</tag>
600 /// <tag>...</tag>
601 /// <tag>...</tag>
602 /// </any-tag>
603 /// ```
604 /// as a sequence item, where `<any-tag>` represents a Map in a [`Self::map`],
605 /// and a `<tag>` is a sequential field of that map.
606 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
607 where
608 V: Visitor<'de>,
609 {
610 let filter = if self.fixed_name {
611 match self.map.de.peek()? {
612 // Clone is cheap if event borrows from the input
613 DeEvent::Start(e) => TagFilter::Include(e.clone()),
614 // SAFETY: we use that deserializer with `fixed_name == true`
615 // only from the `ElementMapAccess::next_value_seed` and only when we
616 // peeked `Start` event
617 _ => unreachable!(),
618 }
619 } else {
620 TagFilter::Exclude(self.map.fields, self.map.has_text_field)
621 };
622 visitor.visit_seq(MapValueSeqAccess {
623 #[cfg(feature = "overlapped-lists")]
624 checkpoint: self.map.de.skip_checkpoint(),
625
626 map: self.map,
627 filter,
628 })
629 }
630
631 #[inline]
632 fn deserialize_struct<V>(
633 self,
634 name: &'static str,
635 fields: &'static [&'static str],
636 visitor: V,
637 ) -> Result<V::Value, Self::Error>
638 where
639 V: Visitor<'de>,
640 {
641 self.map.de.deserialize_struct(name, fields, visitor)
642 }
643
644 fn deserialize_enum<V>(
645 self,
646 _name: &'static str,
647 _variants: &'static [&'static str],
648 visitor: V,
649 ) -> Result<V::Value, Self::Error>
650 where
651 V: Visitor<'de>,
652 {
653 if self.fixed_name {
654 match self.map.de.next()? {
655 // Handles <field>UnitEnumVariant</field>
656 DeEvent::Start(e) => {
657 // skip <field>, read text after it and ensure that it is ended by </field>
658 let text = self.map.de.read_text(e.name())?;
659 if text.is_empty() {
660 // Map empty text (<field/>) to a special `$text` variant
661 visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into()))
662 } else {
663 visitor.visit_enum(SimpleTypeDeserializer::from_text(text))
664 }
665 }
666 // SAFETY: we use that deserializer with `fixed_name == true`
667 // only from the `MapAccess::next_value_seed` and only when we
668 // peeked `Start` event
669 _ => unreachable!(),
670 }
671 } else {
672 visitor.visit_enum(self)
673 }
674 }
675
676 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
677 where
678 V: Visitor<'de>,
679 {
680 match self.map.de.peek()? {
681 DeEvent::Text(_) => self.deserialize_str(visitor),
682 _ => self.deserialize_map(visitor),
683 }
684 }
685}
686
687impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
688where
689 R: XmlRead<'de>,
690 E: EntityResolver,
691{
692 type Error = DeError;
693 type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>;
694
695 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
696 where
697 V: DeserializeSeed<'de>,
698 {
699 let (name, is_text) = match self.map.de.peek()? {
700 DeEvent::Start(e) => (seed.deserialize(QNameDeserializer::from_elem(e)?)?, false),
701 DeEvent::Text(_) => (
702 seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
703 true,
704 ),
705 // SAFETY: we use that deserializer only when we peeked `Start` or `Text` event
706 _ => unreachable!(),
707 };
708 Ok((
709 name,
710 MapValueVariantAccess {
711 map: self.map,
712 is_text,
713 },
714 ))
715 }
716}
717
718struct MapValueVariantAccess<'de, 'd, 'm, R, E>
719where
720 R: XmlRead<'de>,
721 E: EntityResolver,
722{
723 /// Access to the map that created this enum accessor. Gives access to the
724 /// context, such as list of fields, that current map known about.
725 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
726 /// `true` if variant should be deserialized from a textual content
727 /// and `false` if from tag
728 is_text: bool,
729}
730
731impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E>
732where
733 R: XmlRead<'de>,
734 E: EntityResolver,
735{
736 type Error = DeError;
737
738 fn unit_variant(self) -> Result<(), Self::Error> {
739 match self.map.de.next()? {
740 // Consume subtree
741 DeEvent::Start(e) => self.map.de.read_to_end(e.name()),
742 // Does not needed to deserialize using SimpleTypeDeserializer, because
743 // it returns `()` when `deserialize_unit()` is requested
744 DeEvent::Text(_) => Ok(()),
745 // SAFETY: the other events are filtered in `variant_seed()`
746 _ => unreachable!("Only `Start` or `Text` events are possible here"),
747 }
748 }
749
750 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
751 where
752 T: DeserializeSeed<'de>,
753 {
754 if self.is_text {
755 match self.map.de.next()? {
756 DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
757 // SAFETY: the other events are filtered in `variant_seed()`
758 _ => unreachable!("Only `Text` events are possible here"),
759 }
760 } else {
761 seed.deserialize(MapValueDeserializer {
762 map: self.map,
763 // Because element name already was either mapped to a field name,
764 // or to a variant name, we should not treat it as variable
765 fixed_name: true,
766 })
767 }
768 }
769
770 fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
771 where
772 V: Visitor<'de>,
773 {
774 if self.is_text {
775 match self.map.de.next()? {
776 DeEvent::Text(e) => {
777 SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor)
778 }
779 // SAFETY: the other events are filtered in `variant_seed()`
780 _ => unreachable!("Only `Text` events are possible here"),
781 }
782 } else {
783 MapValueDeserializer {
784 map: self.map,
785 // Because element name already was either mapped to a field name,
786 // or to a variant name, we should not treat it as variable
787 fixed_name: true,
788 }
789 .deserialize_tuple(len, visitor)
790 }
791 }
792
793 fn struct_variant<V>(
794 self,
795 fields: &'static [&'static str],
796 visitor: V,
797 ) -> Result<V::Value, Self::Error>
798 where
799 V: Visitor<'de>,
800 {
801 match self.map.de.next()? {
802 DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)),
803 DeEvent::Text(e) => {
804 SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor)
805 }
806 // SAFETY: the other events are filtered in `variant_seed()`
807 _ => unreachable!("Only `Start` or `Text` events are possible here"),
808 }
809 }
810}
811
812////////////////////////////////////////////////////////////////////////////////////////////////////
813
814/// Check if tag `start` is included in the `fields` list. `decoder` is used to
815/// get a string representation of a tag.
816///
817/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
818fn not_in(fields: &'static [&'static str], start: &BytesStart) -> Result<bool, DeError> {
819 let tag = start.decoder().decode(start.local_name().into_inner())?;
820
821 Ok(fields.iter().all(|&field| field != tag.as_ref()))
822}
823
824/// A filter that determines, what tags should form a sequence.
825///
826/// There are two types of sequences:
827/// - sequence where each element represented by tags with the same name
828/// - sequence where each element can have a different tag
829///
830/// The first variant could represent a collection of structs, the second --
831/// a collection of enum variants.
832///
833/// In the second case we don't know what tag name should be expected as a
834/// sequence element, so we accept any element. Since the sequence are flattened
835/// into maps, we skip elements which have dedicated fields in a struct by using an
836/// `Exclude` filter that filters out elements with names matching field names
837/// from the struct.
838///
839/// # Lifetimes
840///
841/// `'de` represents a lifetime of the XML input, when filter stores the
842/// dedicated tag name
843#[derive(Debug)]
844enum TagFilter<'de> {
845 /// A `SeqAccess` interested only in tags with specified name to deserialize
846 /// an XML like this:
847 ///
848 /// ```xml
849 /// <...>
850 /// <tag/>
851 /// <tag/>
852 /// <tag/>
853 /// ...
854 /// </...>
855 /// ```
856 ///
857 /// The tag name is stored inside (`b"tag"` for that example)
858 Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
859 /// A `SeqAccess` interested in tags with any name, except explicitly listed.
860 /// Excluded tags are used as struct field names and therefore should not
861 /// fall into a `$value` category.
862 ///
863 /// The `bool` represents the having of a `$text` special field in fields array.
864 /// It is used to exclude text events when `$text` fields is defined together with
865 /// `$value` fieldб and `$value` accepts sequence.
866 Exclude(&'static [&'static str], bool),
867}
868
869impl<'de> TagFilter<'de> {
870 fn is_suitable(&self, start: &BytesStart) -> Result<bool, DeError> {
871 match self {
872 Self::Include(n) => Ok(n.name() == start.name()),
873 Self::Exclude(fields, _) => not_in(fields, start),
874 }
875 }
876 const fn need_skip_text(&self) -> bool {
877 match self {
878 // If we look only for tags, we should skip any $text keys
879 Self::Include(_) => true,
880 // If we look fo any data, we should exclude $text keys if it in the list
881 Self::Exclude(_, has_text_field) => *has_text_field,
882 }
883 }
884}
885
886////////////////////////////////////////////////////////////////////////////////////////////////////
887
888/// An accessor to sequence elements forming a value for struct field.
889/// Technically, this sequence is flattened out into structure and sequence
890/// elements are overlapped with other fields of a structure. Each call to
891/// [`Self::next_element_seed`] consumes a next sub-tree or consequent list
892/// of [`Text`] and [`CData`] events.
893///
894/// ```xml
895/// <>
896/// ...
897/// <item>The is the one item</item>
898/// This is <![CDATA[one another]]> item<!-- even when--> it splitted by comments
899/// <tag>...and that is the third!</tag>
900/// ...
901/// </>
902/// ```
903///
904/// Depending on [`Self::filter`], only some of that possible constructs would be
905/// an element.
906///
907/// [`Text`]: crate::events::Event::Text
908/// [`CData`]: crate::events::Event::CData
909struct MapValueSeqAccess<'de, 'd, 'm, R, E>
910where
911 R: XmlRead<'de>,
912 E: EntityResolver,
913{
914 /// Accessor to a map that creates this accessor and to a deserializer for
915 /// a sequence items.
916 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
917 /// Filter that determines whether a tag is a part of this sequence.
918 ///
919 /// When feature [`overlapped-lists`] is not activated, iteration will stop
920 /// when found a tag that does not pass this filter.
921 ///
922 /// When feature [`overlapped-lists`] is activated, all tags, that not pass
923 /// this check, will be skipped.
924 ///
925 /// [`overlapped-lists`]: ../../index.html#overlapped-lists
926 filter: TagFilter<'de>,
927
928 /// Checkpoint after which all skipped events should be returned. All events,
929 /// that was skipped before creating this checkpoint, will still stay buffered
930 /// and will not be returned
931 #[cfg(feature = "overlapped-lists")]
932 checkpoint: usize,
933}
934
935#[cfg(feature = "overlapped-lists")]
936impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E>
937where
938 R: XmlRead<'de>,
939 E: EntityResolver,
940{
941 fn drop(&mut self) {
942 self.map.de.start_replay(self.checkpoint);
943 }
944}
945
946impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E>
947where
948 R: XmlRead<'de>,
949 E: EntityResolver,
950{
951 type Error = DeError;
952
953 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, DeError>
954 where
955 T: DeserializeSeed<'de>,
956 {
957 loop {
958 self.map.skip_whitespaces()?;
959 break match self.map.de.peek()? {
960 // If we see a tag that we not interested, skip it
961 #[cfg(feature = "overlapped-lists")]
962 DeEvent::Start(e) if !self.filter.is_suitable(e)? => {
963 self.map.de.skip()?;
964 continue;
965 }
966 // Skip any text events if sequence expects only specific tag names
967 #[cfg(feature = "overlapped-lists")]
968 DeEvent::Text(_) if self.filter.need_skip_text() => {
969 self.map.de.skip()?;
970 continue;
971 }
972 // Stop iteration when list elements ends
973 #[cfg(not(feature = "overlapped-lists"))]
974 DeEvent::Start(e) if !self.filter.is_suitable(e)? => Ok(None),
975 #[cfg(not(feature = "overlapped-lists"))]
976 DeEvent::Text(_) if self.filter.need_skip_text() => Ok(None),
977
978 // Stop iteration after reaching a closing tag
979 // The matching tag name is guaranteed by the reader
980 DeEvent::End(e) => {
981 debug_assert_eq!(self.map.start.name(), e.name());
982 Ok(None)
983 }
984 // We cannot get `Eof` legally, because we always inside of the
985 // opened tag `self.map.start`
986 DeEvent::Eof => {
987 Err(Error::missed_end(self.map.start.name(), self.map.start.decoder()).into())
988 }
989
990 DeEvent::Text(_) => match self.map.de.next()? {
991 DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some),
992 // SAFETY: we just checked that the next event is Text
993 _ => unreachable!(),
994 },
995 DeEvent::Start(_) => match self.map.de.next()? {
996 DeEvent::Start(start) => seed
997 .deserialize(ElementDeserializer {
998 start,
999 de: self.map.de,
1000 })
1001 .map(Some),
1002 // SAFETY: we just checked that the next event is Start
1003 _ => unreachable!(),
1004 },
1005 };
1006 }
1007 }
1008}
1009
1010////////////////////////////////////////////////////////////////////////////////////////////////////
1011
1012/// A deserializer for a single tag item of a mixed sequence of tags and text.
1013///
1014/// This deserializer are very similar to a [`MapValueDeserializer`] (when it
1015/// processes the [`DeEvent::Start`] event). The only difference in the
1016/// [`deserialize_seq`] method. This deserializer will perform deserialization
1017/// from the textual content between start and end events, whereas the
1018/// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
1019///
1020/// This deserializer processes items as following:
1021/// - numbers are parsed from a text content between tags using [`FromStr`]. So,
1022/// `<int>123</int>` can be deserialized into an `u32`;
1023/// - booleans converted from a text content between tags according to the XML
1024/// [specification]:
1025/// - `"true"` and `"1"` converted to `true`;
1026/// - `"false"` and `"0"` converted to `false`;
1027/// - strings returned as a text content between tags;
1028/// - characters also returned as strings. If string contain more than one character
1029/// or empty, it is responsibility of a type to return an error;
1030/// - `Option` are always deserialized as `Some` using the same deserializer,
1031/// including `<tag/>` or `<tag></tag>`;
1032/// - units (`()`) and unit structs consumes the whole element subtree;
1033/// - newtype structs forwards deserialization to the inner type using
1034/// [`SimpleTypeDeserializer`];
1035/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
1036/// (this is the difference): text content between tags is passed to
1037/// [`SimpleTypeDeserializer`];
1038/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
1039/// - enums:
1040/// - the variant name is deserialized using [`QNameDeserializer`] from the element name;
1041/// - the content is deserialized using the same deserializer:
1042/// - unit variants: consuming a subtree and return `()`;
1043/// - newtype variants forwards deserialization to the inner type using
1044/// this deserializer;
1045/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
1046/// - struct variants: call [`deserialize_struct`] of this deserializer.
1047///
1048/// [`deserialize_seq`]: #method.deserialize_seq
1049/// [`FromStr`]: std::str::FromStr
1050/// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean
1051/// [`deserialize_tuple`]: #method.deserialize_tuple
1052/// [`deserialize_struct`]: #method.deserialize_struct
1053struct ElementDeserializer<'de, 'd, R, E>
1054where
1055 R: XmlRead<'de>,
1056 E: EntityResolver,
1057{
1058 start: BytesStart<'de>,
1059 de: &'d mut Deserializer<'de, R, E>,
1060}
1061
1062impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E>
1063where
1064 R: XmlRead<'de>,
1065 E: EntityResolver,
1066{
1067 /// Returns a next string as concatenated content of consequent [`Text`] and
1068 /// [`CData`] events, used inside [`deserialize_primitives!()`].
1069 ///
1070 /// [`Text`]: crate::events::Event::Text
1071 /// [`CData`]: crate::events::Event::CData
1072 #[inline]
1073 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
1074 self.de.read_text(self.start.name())
1075 }
1076}
1077
1078impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E>
1079where
1080 R: XmlRead<'de>,
1081 E: EntityResolver,
1082{
1083 type Error = DeError;
1084
1085 deserialize_primitives!(mut);
1086
1087 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1088 where
1089 V: Visitor<'de>,
1090 {
1091 // Consume subtree
1092 self.de.read_to_end(self.start.name())?;
1093 visitor.visit_unit()
1094 }
1095
1096 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1097 where
1098 V: Visitor<'de>,
1099 {
1100 visitor.visit_some(self)
1101 }
1102
1103 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
1104 /// with this deserializer.
1105 fn deserialize_newtype_struct<V>(
1106 self,
1107 _name: &'static str,
1108 visitor: V,
1109 ) -> Result<V::Value, Self::Error>
1110 where
1111 V: Visitor<'de>,
1112 {
1113 visitor.visit_newtype_struct(self)
1114 }
1115
1116 /// This method deserializes a sequence inside of element that itself is a
1117 /// sequence element:
1118 ///
1119 /// ```xml
1120 /// <>
1121 /// ...
1122 /// <self>inner sequence</self>
1123 /// <self>inner sequence</self>
1124 /// <self>inner sequence</self>
1125 /// ...
1126 /// </>
1127 /// ```
1128 fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
1129 where
1130 V: Visitor<'de>,
1131 {
1132 let text = self.read_string()?;
1133 SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
1134 }
1135
1136 fn deserialize_struct<V>(
1137 self,
1138 _name: &'static str,
1139 fields: &'static [&'static str],
1140 visitor: V,
1141 ) -> Result<V::Value, Self::Error>
1142 where
1143 V: Visitor<'de>,
1144 {
1145 visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields))
1146 }
1147
1148 fn deserialize_enum<V>(
1149 self,
1150 _name: &'static str,
1151 _variants: &'static [&'static str],
1152 visitor: V,
1153 ) -> Result<V::Value, Self::Error>
1154 where
1155 V: Visitor<'de>,
1156 {
1157 visitor.visit_enum(self)
1158 }
1159
1160 #[inline]
1161 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1162 where
1163 V: Visitor<'de>,
1164 {
1165 self.deserialize_map(visitor)
1166 }
1167}
1168
1169impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1170where
1171 R: XmlRead<'de>,
1172 E: EntityResolver,
1173{
1174 type Error = DeError;
1175 type Variant = Self;
1176
1177 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
1178 where
1179 V: DeserializeSeed<'de>,
1180 {
1181 let name = seed.deserialize(QNameDeserializer::from_elem(&self.start)?)?;
1182 Ok((name, self))
1183 }
1184}
1185
1186impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1187where
1188 R: XmlRead<'de>,
1189 E: EntityResolver,
1190{
1191 type Error = DeError;
1192
1193 fn unit_variant(self) -> Result<(), Self::Error> {
1194 // Consume subtree
1195 self.de.read_to_end(self.start.name())
1196 }
1197
1198 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
1199 where
1200 T: DeserializeSeed<'de>,
1201 {
1202 seed.deserialize(self)
1203 }
1204
1205 #[inline]
1206 fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
1207 where
1208 V: Visitor<'de>,
1209 {
1210 self.deserialize_tuple(len, visitor)
1211 }
1212
1213 #[inline]
1214 fn struct_variant<V>(
1215 self,
1216 fields: &'static [&'static str],
1217 visitor: V,
1218 ) -> Result<V::Value, Self::Error>
1219 where
1220 V: Visitor<'de>,
1221 {
1222 self.deserialize_struct("", fields, visitor)
1223 }
1224}
1225
1226////////////////////////////////////////////////////////////////////////////////////////////////////
1227
1228#[test]
1229fn test_not_in() {
1230 use pretty_assertions::assert_eq;
1231
1232 let tag = BytesStart::new("tag");
1233
1234 assert_eq!(not_in(&[], &tag).unwrap(), true);
1235 assert_eq!(not_in(&["no", "such", "tags"], &tag).unwrap(), true);
1236 assert_eq!(not_in(&["some", "tag", "included"], &tag).unwrap(), false);
1237
1238 let tag_ns = BytesStart::new("ns1:tag");
1239 assert_eq!(not_in(&["no", "such", "tags"], &tag_ns).unwrap(), true);
1240 assert_eq!(
1241 not_in(&["some", "tag", "included"], &tag_ns).unwrap(),
1242 false
1243 );
1244 assert_eq!(
1245 not_in(&["some", "namespace", "ns1:tag"], &tag_ns).unwrap(),
1246 true
1247 );
1248}