storage/webstorage/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod engines;
6
7use std::borrow::ToOwned;
8use std::collections::BTreeMap;
9use std::collections::hash_map::Entry;
10use std::path::PathBuf;
11use std::sync::Arc;
12use std::thread;
13
14use base::generic_channel::{self, GenericReceiver, GenericSender};
15use base::id::WebViewId;
16use base::threadpool::ThreadPool;
17use base::{read_json_from_file, write_json_to_file};
18use log::warn;
19use malloc_size_of::MallocSizeOf;
20use malloc_size_of_derive::MallocSizeOf;
21use net_traits::pub_domains::registered_domain_name;
22use profile_traits::mem::{
23    ProcessReports, ProfilerChan as MemProfilerChan, Report, ReportKind, perform_memory_report,
24};
25use profile_traits::path;
26use rustc_hash::FxHashMap;
27use serde::{Deserialize, Serialize};
28use servo_config::pref;
29use servo_url::{ImmutableOrigin, ServoUrl};
30use storage_traits::webstorage_thread::{OriginDescriptor, WebStorageThreadMsg, WebStorageType};
31use uuid::Uuid;
32
33use crate::webstorage::engines::WebStorageEngine;
34use crate::webstorage::engines::sqlite::SqliteEngine;
35
36const QUOTA_SIZE_LIMIT: usize = 5 * 1024 * 1024;
37
38pub trait WebStorageThreadFactory {
39    fn new(config_dir: Option<PathBuf>, mem_profiler_chan: MemProfilerChan) -> Self;
40}
41
42impl WebStorageThreadFactory for GenericSender<WebStorageThreadMsg> {
43    /// Create a storage thread
44    fn new(
45        config_dir: Option<PathBuf>,
46        mem_profiler_chan: MemProfilerChan,
47    ) -> GenericSender<WebStorageThreadMsg> {
48        let (chan, port) = generic_channel::channel().unwrap();
49        let chan2 = chan.clone();
50        thread::Builder::new()
51            .name("WebStorageManager".to_owned())
52            .spawn(move || {
53                mem_profiler_chan.run_with_memory_reporting(
54                    || WebStorageManager::new(port, config_dir).start(),
55                    String::from("storage-reporter"),
56                    chan2,
57                    WebStorageThreadMsg::CollectMemoryReport,
58                );
59            })
60            .expect("Thread spawning failed");
61        chan
62    }
63}
64
65#[derive(Deserialize, Serialize)]
66pub struct StorageOrigins {
67    // TODO: Consider grouping by eTLD+1
68    // TODO: Consider ImmutableOrigin instead of String for tracking origins
69    origin_descriptors: FxHashMap<String, OriginDescriptor>,
70}
71
72impl StorageOrigins {
73    fn new() -> Self {
74        StorageOrigins {
75            origin_descriptors: FxHashMap::default(),
76        }
77    }
78
79    /// Ensures that an origin descriptor exists for the given origin.
80    ///
81    /// Returns `true` if a new origin descriptor was created, or `false` if
82    /// one already existed.
83    fn ensure_origin_descriptor(&mut self, origin: &ImmutableOrigin) -> bool {
84        let origin = origin.ascii_serialization();
85        match self.origin_descriptors.entry(origin.clone()) {
86            Entry::Occupied(_) => false,
87            Entry::Vacant(entry) => {
88                entry.insert(OriginDescriptor::new(origin));
89                true
90            },
91        }
92    }
93
94    fn origin_descriptors(&self) -> Vec<OriginDescriptor> {
95        self.origin_descriptors.values().cloned().collect()
96    }
97
98    fn take_origins_for_sites(&mut self, sites: &[String]) -> Vec<ImmutableOrigin> {
99        // TODO: This can use `extract_if` once MSVR is bumbed (>=1.88)
100
101        let mut result = Vec::new();
102
103        self.origin_descriptors.retain(|_, descriptor| {
104            let url =
105                ServoUrl::parse(&descriptor.name).expect("Should always be able to parse origins.");
106
107            let Some(domain) = registered_domain_name(&url) else {
108                warn!("Failed to get a registered domain name for: {url}");
109                return true;
110            };
111            let domain = domain.to_string();
112
113            if sites.contains(&domain) {
114                result.push(url.origin());
115                false
116            } else {
117                true
118            }
119        });
120
121        result
122    }
123}
124
125#[derive(Clone, Default, MallocSizeOf)]
126pub struct OriginEntry {
127    tree: BTreeMap<String, String>,
128    size: usize,
129}
130
131impl OriginEntry {
132    pub fn inner(&self) -> &BTreeMap<String, String> {
133        &self.tree
134    }
135
136    pub fn insert(&mut self, key: String, value: String) -> Option<String> {
137        let old_value = self.tree.insert(key.clone(), value.clone());
138        let size_change = match &old_value {
139            Some(old) => value.len() as isize - old.len() as isize,
140            None => (key.len() + value.len()) as isize,
141        };
142        self.size = (self.size as isize + size_change) as usize;
143        old_value
144    }
145
146    pub fn remove(&mut self, key: &str) -> Option<String> {
147        let old_value = self.tree.remove(key);
148        if let Some(old) = &old_value {
149            self.size -= key.len() + old.len();
150        }
151        old_value
152    }
153
154    pub fn clear(&mut self) {
155        self.tree.clear();
156        self.size = 0;
157    }
158
159    pub fn size(&self) -> usize {
160        self.size
161    }
162}
163
164struct WebStorageEnvironment<E: WebStorageEngine> {
165    engine: E,
166    data: OriginEntry,
167}
168
169impl<E: WebStorageEngine> MallocSizeOf for WebStorageEnvironment<E> {
170    fn size_of(&self, ops: &mut malloc_size_of::MallocSizeOfOps) -> usize {
171        self.data.size_of(ops)
172    }
173}
174
175impl<E: WebStorageEngine> WebStorageEnvironment<E> {
176    fn new(engine: E) -> Self {
177        WebStorageEnvironment {
178            data: engine.load().unwrap_or_default(),
179            engine,
180        }
181    }
182
183    fn clear(&mut self) {
184        self.data.clear();
185        let _ = self.engine.clear();
186    }
187
188    fn delete(&mut self, key: &str) {
189        let _ = self.engine.delete(key);
190    }
191
192    fn set(&mut self, key: &str, value: &str) {
193        let _ = self.engine.set(key, value);
194    }
195}
196
197impl<E: WebStorageEngine> Drop for WebStorageEnvironment<E> {
198    fn drop(&mut self) {
199        self.engine.save(&self.data);
200    }
201}
202
203struct WebStorageManager {
204    port: GenericReceiver<WebStorageThreadMsg>,
205    session_storage_origins: StorageOrigins,
206    local_storage_origins: StorageOrigins,
207    session_data: FxHashMap<WebViewId, FxHashMap<ImmutableOrigin, OriginEntry>>,
208    config_dir: Option<PathBuf>,
209    thread_pool: Arc<ThreadPool>,
210    environments: FxHashMap<ImmutableOrigin, WebStorageEnvironment<SqliteEngine>>,
211}
212
213impl WebStorageManager {
214    fn new(
215        port: GenericReceiver<WebStorageThreadMsg>,
216        config_dir: Option<PathBuf>,
217    ) -> WebStorageManager {
218        let mut local_storage_origins = StorageOrigins::new();
219        if let Some(ref config_dir) = config_dir {
220            read_json_from_file(&mut local_storage_origins, config_dir, "localstorage.json");
221        }
222        // Uses an estimate of the system cpus to process Webstorage transactions
223        // See https://doc.rust-lang.org/stable/std/thread/fn.available_parallelism.html
224        // If no information can be obtained about the system, uses 4 threads as a default
225        let thread_count = thread::available_parallelism()
226            .map(|i| i.get())
227            .unwrap_or(pref!(threadpools_fallback_worker_num) as usize)
228            .min(pref!(threadpools_webstorage_workers_max).max(1) as usize);
229        WebStorageManager {
230            port,
231            session_storage_origins: StorageOrigins::new(),
232            local_storage_origins,
233            session_data: FxHashMap::default(),
234            config_dir,
235            thread_pool: Arc::new(ThreadPool::new(thread_count, "WebStorage".to_string())),
236            environments: FxHashMap::default(),
237        }
238    }
239}
240
241impl WebStorageManager {
242    fn start(&mut self) {
243        loop {
244            match self.port.recv().unwrap() {
245                WebStorageThreadMsg::Length(sender, storage_type, webview_id, url) => {
246                    self.length(sender, storage_type, webview_id, url)
247                },
248                WebStorageThreadMsg::Key(sender, storage_type, webview_id, url, index) => {
249                    self.key(sender, storage_type, webview_id, url, index)
250                },
251                WebStorageThreadMsg::Keys(sender, storage_type, webview_id, url) => {
252                    self.keys(sender, storage_type, webview_id, url)
253                },
254                WebStorageThreadMsg::SetItem(
255                    sender,
256                    storage_type,
257                    webview_id,
258                    url,
259                    name,
260                    value,
261                ) => {
262                    self.set_item(sender, storage_type, webview_id, url, name, value);
263                },
264                WebStorageThreadMsg::GetItem(sender, storage_type, webview_id, url, name) => {
265                    self.request_item(sender, storage_type, webview_id, url, name)
266                },
267                WebStorageThreadMsg::RemoveItem(sender, storage_type, webview_id, url, name) => {
268                    self.remove_item(sender, storage_type, webview_id, url, name);
269                },
270                WebStorageThreadMsg::Clear(sender, storage_type, webview_id, url) => {
271                    self.clear(sender, storage_type, webview_id, url);
272                },
273                WebStorageThreadMsg::Clone {
274                    sender,
275                    src: src_webview_id,
276                    dest: dest_webview_id,
277                } => {
278                    self.clone(src_webview_id, dest_webview_id);
279                    let _ = sender.send(());
280                },
281                WebStorageThreadMsg::ListOrigins(sender, storage_type) => {
282                    let _ = sender.send(self.origin_descriptors(storage_type));
283                },
284                WebStorageThreadMsg::ClearDataForSites(sender, storage_type, sites) => {
285                    self.clear_data_for_sites(storage_type, &sites);
286                    let _ = sender.send(());
287                },
288                WebStorageThreadMsg::CollectMemoryReport(sender) => {
289                    let reports = self.collect_memory_reports();
290                    sender.send(ProcessReports::new(reports));
291                },
292                WebStorageThreadMsg::Exit(sender) => {
293                    // Nothing to do since we save localstorage set eagerly.
294                    let _ = sender.send(());
295                    break;
296                },
297            }
298        }
299    }
300
301    fn collect_memory_reports(&self) -> Vec<Report> {
302        let mut reports = vec![];
303        perform_memory_report(|ops| {
304            reports.push(Report {
305                path: path!["storage", "local"],
306                kind: ReportKind::ExplicitJemallocHeapSize,
307                size: self.environments.size_of(ops),
308            });
309
310            reports.push(Report {
311                path: path!["storage", "session"],
312                kind: ReportKind::ExplicitJemallocHeapSize,
313                size: self.session_data.size_of(ops),
314            });
315        });
316        reports
317    }
318
319    fn save_local_storage_origins(&self) {
320        if let Some(ref config_dir) = self.config_dir {
321            write_json_to_file(&self.local_storage_origins, config_dir, "localstorage.json");
322        }
323    }
324
325    fn get_origin_location(&self, origin: &ImmutableOrigin) -> Option<PathBuf> {
326        match &self.config_dir {
327            Some(config_dir) => {
328                const NAMESPACE_SERVO_WEBSTORAGE: &uuid::Uuid = &Uuid::from_bytes([
329                    0x37, 0x9e, 0x56, 0xb0, 0x1a, 0x76, 0x44, 0xc5, 0xa4, 0xdb, 0xe2, 0x18, 0xc5,
330                    0xc8, 0xa3, 0x5d,
331                ]);
332                let origin_uuid = Uuid::new_v5(
333                    NAMESPACE_SERVO_WEBSTORAGE,
334                    origin.ascii_serialization().as_bytes(),
335                );
336                Some(config_dir.join("webstorage").join(origin_uuid.to_string()))
337            },
338            None => None,
339        }
340    }
341
342    fn add_new_environment(&mut self, origin: &ImmutableOrigin) {
343        let origin_location = self.get_origin_location(origin);
344
345        let engine = SqliteEngine::new(&origin_location, self.thread_pool.clone()).unwrap();
346        let environment = WebStorageEnvironment::new(engine);
347        self.environments.insert(origin.clone(), environment);
348    }
349
350    fn get_environment(
351        &mut self,
352        origin: &ImmutableOrigin,
353    ) -> &WebStorageEnvironment<SqliteEngine> {
354        if self.environments.contains_key(origin) {
355            return self.environments.get(origin).unwrap();
356        }
357
358        self.add_new_environment(origin);
359
360        self.environments.get(origin).unwrap()
361    }
362
363    fn get_environment_mut(
364        &mut self,
365        origin: &ImmutableOrigin,
366    ) -> &mut WebStorageEnvironment<SqliteEngine> {
367        if self.environments.contains_key(origin) {
368            return self.environments.get_mut(origin).unwrap();
369        }
370
371        self.add_new_environment(origin);
372
373        self.environments.get_mut(origin).unwrap()
374    }
375
376    fn select_data(
377        &mut self,
378        storage_type: WebStorageType,
379        webview_id: WebViewId,
380        origin: ImmutableOrigin,
381    ) -> Option<&OriginEntry> {
382        match storage_type {
383            WebStorageType::Session => self
384                .session_data
385                .get(&webview_id)
386                .and_then(|origin_map| origin_map.get(&origin)),
387            WebStorageType::Local => {
388                // FIXME: Selecting data for read only operations should not
389                // create a new origin descriptor. However, this currently
390                // needs to happen because get_environment always creates an
391                // environment, even for read only operations.
392                if self.local_storage_origins.ensure_origin_descriptor(&origin) {
393                    self.save_local_storage_origins();
394                }
395                Some(&self.get_environment(&origin).data)
396            },
397        }
398    }
399
400    fn select_data_mut(
401        &mut self,
402        storage_type: WebStorageType,
403        webview_id: WebViewId,
404        origin: ImmutableOrigin,
405    ) -> Option<&mut OriginEntry> {
406        match storage_type {
407            WebStorageType::Session => self
408                .session_data
409                .get_mut(&webview_id)
410                .and_then(|origin_map| origin_map.get_mut(&origin)),
411            WebStorageType::Local => {
412                // FIXME: Selecting data for read only operations should not
413                // create a new origin descriptor. However, this currently
414                // needs to happen because get_environment always creates an
415                // environment, even for read only operations.
416                if self.local_storage_origins.ensure_origin_descriptor(&origin) {
417                    self.save_local_storage_origins();
418                }
419                Some(&mut self.get_environment_mut(&origin).data)
420            },
421        }
422    }
423
424    fn ensure_data_mut(
425        &mut self,
426        storage_type: WebStorageType,
427        webview_id: WebViewId,
428        origin: ImmutableOrigin,
429    ) -> &mut OriginEntry {
430        match storage_type {
431            WebStorageType::Session => {
432                self.session_storage_origins
433                    .ensure_origin_descriptor(&origin);
434                self.session_data
435                    .entry(webview_id)
436                    .or_default()
437                    .entry(origin)
438                    .or_default()
439            },
440            WebStorageType::Local => {
441                if self.local_storage_origins.ensure_origin_descriptor(&origin) {
442                    self.save_local_storage_origins();
443                }
444                &mut self.get_environment_mut(&origin).data
445            },
446        }
447    }
448
449    fn length(
450        &mut self,
451        sender: GenericSender<usize>,
452        storage_type: WebStorageType,
453        webview_id: WebViewId,
454        url: ServoUrl,
455    ) {
456        let data = self.select_data(storage_type, webview_id, url.origin());
457        sender
458            .send(data.map_or(0, |entry| entry.inner().len()))
459            .unwrap();
460    }
461
462    fn key(
463        &mut self,
464        sender: GenericSender<Option<String>>,
465        storage_type: WebStorageType,
466        webview_id: WebViewId,
467        url: ServoUrl,
468        index: u32,
469    ) {
470        let data = self.select_data(storage_type, webview_id, url.origin());
471        let key = data
472            .and_then(|entry| entry.inner().keys().nth(index as usize))
473            .cloned();
474        sender.send(key).unwrap();
475    }
476
477    fn keys(
478        &mut self,
479        sender: GenericSender<Vec<String>>,
480        storage_type: WebStorageType,
481        webview_id: WebViewId,
482        url: ServoUrl,
483    ) {
484        let data = self.select_data(storage_type, webview_id, url.origin());
485        let keys = data.map_or(vec![], |entry| entry.inner().keys().cloned().collect());
486
487        sender.send(keys).unwrap();
488    }
489
490    /// Sends Ok(changed, Some(old_value)) in case there was a previous
491    /// value with the same key name but with different value name
492    /// otherwise sends Err(()) to indicate that the operation would result in
493    /// exceeding the quota limit
494    fn set_item(
495        &mut self,
496        sender: GenericSender<Result<(bool, Option<String>), ()>>,
497        storage_type: WebStorageType,
498        webview_id: WebViewId,
499        url: ServoUrl,
500        name: String,
501        value: String,
502    ) {
503        let entry = self.ensure_data_mut(storage_type, webview_id, url.origin());
504        let total_size = entry.size();
505
506        let mut new_total_size = total_size + value.len();
507        if let Some(old_value) = entry.inner().get(&name) {
508            new_total_size -= old_value.len();
509        } else {
510            new_total_size += name.len();
511        }
512
513        let message = if new_total_size > QUOTA_SIZE_LIMIT {
514            Err(())
515        } else {
516            let result =
517                entry
518                    .insert(name.clone(), value.clone())
519                    .map_or(Ok((true, None)), |old| {
520                        if old == value {
521                            Ok((false, None))
522                        } else {
523                            Ok((true, Some(old)))
524                        }
525                    });
526            if storage_type == WebStorageType::Local {
527                let env = self.get_environment_mut(&url.origin());
528                env.set(&name, &value);
529            }
530            result
531        };
532        sender.send(message).unwrap();
533    }
534
535    fn request_item(
536        &mut self,
537        sender: GenericSender<Option<String>>,
538        storage_type: WebStorageType,
539        webview_id: WebViewId,
540        url: ServoUrl,
541        name: String,
542    ) {
543        let data = self.select_data(storage_type, webview_id, url.origin());
544        sender
545            .send(data.and_then(|entry| entry.inner().get(&name)).cloned())
546            .unwrap();
547    }
548
549    /// Sends Some(old_value) in case there was a previous value with the key name, otherwise sends None
550    fn remove_item(
551        &mut self,
552        sender: GenericSender<Option<String>>,
553        storage_type: WebStorageType,
554        webview_id: WebViewId,
555        url: ServoUrl,
556        name: String,
557    ) {
558        let data = self.select_data_mut(storage_type, webview_id, url.origin());
559        let old_value = data.and_then(|entry| entry.remove(&name));
560        sender.send(old_value).unwrap();
561        if storage_type == WebStorageType::Local {
562            let env = self.get_environment_mut(&url.origin());
563            env.delete(&name);
564        }
565    }
566
567    fn clear(
568        &mut self,
569        sender: GenericSender<bool>,
570        storage_type: WebStorageType,
571        webview_id: WebViewId,
572        url: ServoUrl,
573    ) {
574        let data = self.select_data_mut(storage_type, webview_id, url.origin());
575        sender
576            .send(data.is_some_and(|entry| {
577                if !entry.inner().is_empty() {
578                    entry.clear();
579                    true
580                } else {
581                    false
582                }
583            }))
584            .unwrap();
585        if storage_type == WebStorageType::Local {
586            let env = self.get_environment_mut(&url.origin());
587            env.clear();
588        }
589    }
590
591    fn clone(&mut self, src_webview_id: WebViewId, dest_webview_id: WebViewId) {
592        let Some(src_origin_entries) = self.session_data.get(&src_webview_id) else {
593            return;
594        };
595
596        let dest_origin_entries = src_origin_entries.clone();
597        self.session_data
598            .insert(dest_webview_id, dest_origin_entries);
599    }
600
601    fn origin_descriptors(&mut self, storage_type: WebStorageType) -> Vec<OriginDescriptor> {
602        match storage_type {
603            WebStorageType::Session => self.session_storage_origins.origin_descriptors(),
604            WebStorageType::Local => self.local_storage_origins.origin_descriptors(),
605        }
606    }
607
608    fn clear_data_for_sites(&mut self, storage_type: WebStorageType, sites: &[String]) {
609        match storage_type {
610            WebStorageType::Session => {
611                let origins = self.session_storage_origins.take_origins_for_sites(sites);
612
613                self.session_data.retain(|_, origins_map| {
614                    for origin in &origins {
615                        origins_map.remove(origin);
616                    }
617                    !origins_map.is_empty()
618                });
619            },
620            WebStorageType::Local => {
621                let origins = self.local_storage_origins.take_origins_for_sites(sites);
622
623                if self.config_dir.is_some() {
624                    for origin in origins {
625                        self.environments.remove(&origin);
626
627                        let origin_location = self
628                            .get_origin_location(&origin)
629                            .expect("Should always be able to get origin location.");
630
631                        if let Err(error) = std::fs::remove_dir_all(&origin_location) {
632                            warn!("Failed to delete origin location: {:?}", error);
633                            self.local_storage_origins.ensure_origin_descriptor(&origin);
634                        }
635                    }
636
637                    self.save_local_storage_origins();
638                } else {
639                    for origin in origins {
640                        self.environments.remove(&origin);
641                    }
642                }
643            },
644        }
645    }
646}