Skip to main content

bpaf/
arg.rs

1use std::ffi::{OsStr, OsString};
2
3/// Preprocessed command line argument
4///
5/// [`OsString`] in Short/Long correspond to orignal command line item used for errors
6#[derive(Debug, Clone, Eq, PartialEq)]
7#[allow(clippy::enum_variant_names)]
8pub(crate) enum Arg {
9    /// short flag: `-f`
10    ///
11    /// bool indicates if following item is also part of this Short (created
12    Short(char, bool, OsString),
13
14    /// long flag: `--flag`
15    /// bool tells if it looks like --key=val or not
16    Long(String, bool, OsString),
17
18    /// "val" part of --key=val -k=val -kval
19    ArgWord(OsString),
20
21    /// separate word that can be command, positional or a separate argument to a flag
22    ///
23    /// Can start with `-` or `--`, doesn't have to be valid utf8
24    ///
25    /// `hello`
26    Word(OsString),
27
28    /// separate word that goes after `--`, strictly positional
29    ///
30    /// Can start with `-` or `--`, doesn't have to be valid utf8
31    PosWord(OsString),
32}
33
34impl Arg {
35    pub(crate) fn os_str(&self) -> &OsStr {
36        match self {
37            Arg::Short(_, _, s)
38            | Arg::Long(_, _, s)
39            | Arg::ArgWord(s)
40            | Arg::Word(s)
41            | Arg::PosWord(s) => s.as_ref(),
42        }
43    }
44
45    pub(crate) fn match_short(&self, val: char) -> bool {
46        match self {
47            Arg::Short(s, _, _) => *s == val,
48            Arg::ArgWord(_) | Arg::Long(_, _, _) | Arg::Word(_) | Arg::PosWord(_) => false,
49        }
50    }
51
52    pub(crate) fn match_long(&self, val: &str) -> bool {
53        match self {
54            Arg::Long(s, _, _) => *s == val,
55            Arg::Short(_, _, _) | Arg::ArgWord(_) | Arg::Word(_) | Arg::PosWord(_) => false,
56        }
57    }
58}
59
60// short flag disambiguations:
61//
62// Short flags | short arg
63// No          | No        | no problem
64// Yes         | No        | use flag
65// No          | Yes       | use arg
66// Yes         | Yes       | ask user?
67//
68// -a  - just a regular short flag: "-a"
69// -abc - assuming there are short flags a, b and c: "-a -b -c", assuming utf8 values AND there's no argument -a
70// -abc - assuming there's no -a -b -c: "-a bc"
71// -abc - assuming both short a b c AND there's argument -a - need to disambiguate  on a context level
72//
73// 1. parse argument into ambigous representation that can store both short flags and argument
74// 2. collect short flag/arg when entering the subparsre
75// 3. when reaching ambi
76//
77
78impl std::fmt::Display for Arg {
79    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80        match self {
81            Arg::Short(s, _, _) => write!(f, "-{}", s),
82            Arg::Long(l, _, _) => write!(f, "--{}", l),
83            Arg::ArgWord(w) | Arg::Word(w) | Arg::PosWord(w) => {
84                write!(f, "{}", w.to_string_lossy())
85            }
86        }
87    }
88}
89
90#[derive(Eq, PartialEq, Debug)]
91pub(crate) enum ArgType {
92    Short,
93    Long,
94}
95
96/// split [`OsString`] into argument specific bits
97///
98/// takes a possibly non-utf8 string looking like "--name=value" and splits it into bits:
99/// "--" - type, "name" - name, must be representable as utf8, "=" - optional, "value" - flag
100///
101/// dashes and equals sign are low codepoint values and - can look for them literally in a string.
102/// This probably means not supporting dashes with diacritics, but that's okay
103///
104/// name must be valid utf8 after conversion and must not include `=`
105///
106/// argument is optional and can be non valid utf8.
107///
108/// The idea is to split the [`OsString`] into opaque parts by looking only at the parts simple parts
109/// and let stdlib to handle the decoding of those parts.
110///
111/// performance wise this (at least on unix) works some small number percentage slower than the
112/// previous version
113///
114///
115/// Notation -fbar is ambigous and could mean either `-f -b -a -r` or `-f=bar`, resolve it into
116/// [`Arg::Ambiguity`] and let subparser disambiguate it later depending on available short flag and
117/// arguments
118pub(crate) fn split_os_argument(input: &std::ffi::OsStr) -> Option<(ArgType, String, Option<Arg>)> {
119    #[cfg(any(unix, windows))]
120    {
121        // OsString are sequences of smaller smaller elements - bytes in unix and
122        // possibly invalid utf16 items on windows
123        #[cfg(unix)]
124        type Elt = u8;
125        #[cfg(windows)]
126        type Elt = u16;
127
128        // reuse allocation on unix, don't reuse allocations on windows
129        // either case - pack a vector of elements back into OsString
130        fn os_from_vec(vec: Vec<Elt>) -> OsString {
131            #[cfg(unix)]
132            {
133                <OsString as std::os::unix::ffi::OsStringExt>::from_vec(vec)
134            }
135            #[cfg(windows)]
136            {
137                <OsString as std::os::windows::ffi::OsStringExt>::from_wide(&vec)
138            }
139        }
140
141        // try to decode elements into a String
142        fn str_from_vec(vec: Vec<Elt>) -> Option<String> {
143            Some(os_from_vec(vec).to_str()?.to_owned())
144        }
145
146        // but in either case dashes and equals are just literal values just with different width
147        const DASH: Elt = b'-' as Elt;
148        const EQUALS: Elt = b'=' as Elt;
149
150        // preallocate something to store the name. oversized but avoids extra allocations/copying
151        let mut name = Vec::with_capacity(input.len());
152
153        let mut items;
154        #[cfg(unix)]
155        {
156            items = std::os::unix::ffi::OsStrExt::as_bytes(input)
157                .iter()
158                .copied();
159        }
160        #[cfg(windows)]
161        {
162            items = std::os::windows::ffi::OsStrExt::encode_wide(input);
163        }
164
165        // first item must be dash, otherwise it's positional or a flag value
166        if items.next()? != DASH {
167            return None;
168        }
169
170        // second item may or may not be, but should be present
171        let ty;
172        match items.next()? {
173            DASH => ty = ArgType::Long,
174            val => {
175                ty = ArgType::Short;
176                name.push(val);
177            }
178        }
179
180        // keep collecting until = or the end of the input
181        loop {
182            match items.next() {
183                Some(EQUALS) => {
184                    if ty == ArgType::Short && name.len() > 1 {
185                        let mut body = name.drain(1..).collect::<Vec<_>>();
186                        body.push(EQUALS);
187                        body.extend(items);
188                        name.truncate(1);
189                        let os = Arg::ArgWord(os_from_vec(body));
190                        return Some((ty, str_from_vec(name)?, Some(os)));
191                    }
192                    break;
193                }
194                Some(val) => name.push(val),
195                None => {
196                    if name.is_empty() {
197                        return None;
198                    }
199                    return Some((ty, str_from_vec(name)?, None));
200                }
201            }
202        }
203
204        let name = str_from_vec(name)?;
205        let word = {
206            let os = os_from_vec(items.collect());
207            Arg::ArgWord(os)
208        };
209        Some((ty, name, Some(word)))
210    }
211    #[cfg(not(any(unix, windows)))]
212    {
213        split_os_argument_fallback(input)
214    }
215}
216
217/// similar to [`split_os_argument`] but only works for utf8 values, used as a fallback function
218/// on non windows/unix OSes
219#[cfg(any(all(not(windows), not(unix)), test))]
220pub(crate) fn split_os_argument_fallback(
221    input: &std::ffi::OsStr,
222) -> Option<(ArgType, String, Option<Arg>)> {
223    // fallback supports only valid utf8 os strings, matches old behavior
224    let string = input.to_str()?;
225
226    let mut chars = string.chars();
227    let mut name = String::with_capacity(string.len());
228
229    // first character must be dash, otherwise it's positional or a flag value
230    if chars.next()? != '-' {
231        return None;
232    }
233
234    // second character may or may not be
235    let ty;
236    match chars.next()? {
237        '-' => ty = ArgType::Long,
238        val => {
239            ty = ArgType::Short;
240            name.push(val);
241        }
242    }
243
244    // collect the argument's name up to '=' or until the end
245    // if it's a flag
246    loop {
247        match chars.next() {
248            Some('=') => {
249                if ty == ArgType::Short && name.len() > 1 {
250                    let mut body = name.drain(1..).collect::<String>();
251                    body.push('=');
252                    body.extend(chars);
253                    name.truncate(1);
254                    let os = Arg::ArgWord(OsString::from(body));
255                    return Some((ty, name, Some(os)));
256                }
257                break;
258            }
259
260            Some(val) => name.push(val),
261            None => {
262                if name.is_empty() {
263                    return None;
264                }
265                return Some((ty, name, None));
266            }
267        }
268    }
269
270    Some((
271        ty,
272        name,
273        Some(Arg::ArgWord(OsString::from(chars.collect::<String>()))),
274    ))
275}