Skip to main content

storage/webstorage/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod engines;
6
7use std::borrow::ToOwned;
8use std::collections::BTreeMap;
9use std::collections::hash_map::Entry;
10use std::path::PathBuf;
11use std::sync::Arc;
12use std::thread;
13
14use log::warn;
15use malloc_size_of::MallocSizeOf;
16use malloc_size_of_derive::MallocSizeOf;
17use net_traits::pub_domains::registered_domain_name;
18use profile_traits::mem::{
19    ProcessReports, ProfilerChan as MemProfilerChan, Report, ReportKind, perform_memory_report,
20};
21use profile_traits::path;
22use rustc_hash::FxHashMap;
23use serde::{Deserialize, Serialize};
24use servo_base::generic_channel::{self, GenericReceiver, GenericSender};
25use servo_base::id::WebViewId;
26use servo_base::threadpool::ThreadPool;
27use servo_base::{read_json_from_file, write_json_to_file};
28use servo_url::{ImmutableOrigin, ServoUrl};
29use storage_traits::webstorage_thread::{OriginDescriptor, WebStorageThreadMsg, WebStorageType};
30use uuid::Uuid;
31
32use crate::webstorage::engines::WebStorageEngine;
33use crate::webstorage::engines::sqlite::SqliteEngine;
34
35const QUOTA_SIZE_LIMIT: usize = 5 * 1024 * 1024;
36
37pub trait WebStorageThreadFactory {
38    fn new(
39        config_dir: Option<PathBuf>,
40        mem_profiler_chan: MemProfilerChan,
41        reporter_name: String,
42    ) -> Self;
43}
44
45impl WebStorageThreadFactory for GenericSender<WebStorageThreadMsg> {
46    /// Create a storage thread
47    fn new(
48        config_dir: Option<PathBuf>,
49        mem_profiler_chan: MemProfilerChan,
50        reporter_name: String,
51    ) -> GenericSender<WebStorageThreadMsg> {
52        let (chan, port) = generic_channel::channel().unwrap();
53        let chan2 = chan.clone();
54        thread::Builder::new()
55            .name("WebStorageManager".to_owned())
56            .spawn(move || {
57                mem_profiler_chan.run_with_memory_reporting(
58                    || WebStorageManager::new(port, config_dir).start(),
59                    reporter_name,
60                    chan2,
61                    WebStorageThreadMsg::CollectMemoryReport,
62                );
63            })
64            .expect("Thread spawning failed");
65        chan
66    }
67}
68
69#[derive(Deserialize, MallocSizeOf, Serialize)]
70pub struct StorageOrigins {
71    // TODO: Consider grouping by eTLD+1
72    // TODO: Consider ImmutableOrigin instead of String for tracking origins
73    origin_descriptors: FxHashMap<String, OriginDescriptor>,
74}
75
76impl StorageOrigins {
77    fn new() -> Self {
78        StorageOrigins {
79            origin_descriptors: FxHashMap::default(),
80        }
81    }
82
83    /// Ensures that an origin descriptor exists for the given origin.
84    ///
85    /// Returns `true` if a new origin descriptor was created, or `false` if
86    /// one already existed.
87    fn ensure_origin_descriptor(&mut self, origin: &ImmutableOrigin) -> bool {
88        let origin = origin.ascii_serialization();
89        match self.origin_descriptors.entry(origin.clone()) {
90            Entry::Occupied(_) => false,
91            Entry::Vacant(entry) => {
92                entry.insert(OriginDescriptor::new(origin));
93                true
94            },
95        }
96    }
97
98    fn origin_descriptors(&self) -> Vec<OriginDescriptor> {
99        self.origin_descriptors.values().cloned().collect()
100    }
101
102    fn take_origins_for_sites(&mut self, sites: &[String]) -> Vec<ImmutableOrigin> {
103        // TODO: This can use `extract_if` once MSVR is bumbed (>=1.88)
104
105        let mut result = Vec::new();
106
107        self.origin_descriptors.retain(|_, descriptor| {
108            let url =
109                ServoUrl::parse(&descriptor.name).expect("Should always be able to parse origins.");
110
111            let Some(domain) = registered_domain_name(&url) else {
112                warn!("Failed to get a registered domain name for: {url}");
113                return true;
114            };
115            let domain = domain.to_string();
116
117            if sites.contains(&domain) {
118                result.push(url.origin());
119                false
120            } else {
121                true
122            }
123        });
124
125        result
126    }
127}
128
129#[derive(Clone, Default, MallocSizeOf)]
130pub struct OriginEntry {
131    tree: BTreeMap<String, String>,
132    size: usize,
133}
134
135impl OriginEntry {
136    pub fn inner(&self) -> &BTreeMap<String, String> {
137        &self.tree
138    }
139
140    pub fn insert(&mut self, key: String, value: String) -> Option<String> {
141        let old_value = self.tree.insert(key.clone(), value.clone());
142        let size_change = match &old_value {
143            Some(old) => value.len() as isize - old.len() as isize,
144            None => (key.len() + value.len()) as isize,
145        };
146        self.size = (self.size as isize + size_change) as usize;
147        old_value
148    }
149
150    pub fn remove(&mut self, key: &str) -> Option<String> {
151        let old_value = self.tree.remove(key);
152        if let Some(old) = &old_value {
153            self.size -= key.len() + old.len();
154        }
155        old_value
156    }
157
158    pub fn clear(&mut self) {
159        self.tree.clear();
160        self.size = 0;
161    }
162
163    pub fn size(&self) -> usize {
164        self.size
165    }
166}
167
168struct WebStorageEnvironment<E: WebStorageEngine> {
169    engine: E,
170    data: OriginEntry,
171}
172
173impl<E: WebStorageEngine> MallocSizeOf for WebStorageEnvironment<E> {
174    fn size_of(&self, ops: &mut malloc_size_of::MallocSizeOfOps) -> usize {
175        self.data.size_of(ops)
176    }
177}
178
179impl<E: WebStorageEngine> WebStorageEnvironment<E> {
180    fn new(engine: E) -> Self {
181        WebStorageEnvironment {
182            data: engine.load().unwrap_or_default(),
183            engine,
184        }
185    }
186
187    fn clear(&mut self) {
188        self.data.clear();
189        let _ = self.engine.clear();
190    }
191
192    fn delete(&mut self, key: &str) {
193        let _ = self.engine.delete(key);
194    }
195
196    fn set(&mut self, key: &str, value: &str) {
197        let _ = self.engine.set(key, value);
198    }
199}
200
201impl<E: WebStorageEngine> Drop for WebStorageEnvironment<E> {
202    fn drop(&mut self) {
203        self.engine.save(&self.data);
204    }
205}
206
207struct WebStorageManager {
208    port: GenericReceiver<WebStorageThreadMsg>,
209    session_storage_origins: StorageOrigins,
210    local_storage_origins: StorageOrigins,
211    session_data: FxHashMap<WebViewId, FxHashMap<ImmutableOrigin, OriginEntry>>,
212    config_dir: Option<PathBuf>,
213    thread_pool: Arc<ThreadPool>,
214    environments: FxHashMap<ImmutableOrigin, WebStorageEnvironment<SqliteEngine>>,
215}
216
217impl WebStorageManager {
218    fn new(
219        port: GenericReceiver<WebStorageThreadMsg>,
220        config_dir: Option<PathBuf>,
221    ) -> WebStorageManager {
222        let mut local_storage_origins = StorageOrigins::new();
223        if let Some(ref config_dir) = config_dir {
224            read_json_from_file(&mut local_storage_origins, config_dir, "localstorage.json");
225        }
226        WebStorageManager {
227            port,
228            session_storage_origins: StorageOrigins::new(),
229            local_storage_origins,
230            session_data: FxHashMap::default(),
231            config_dir,
232            thread_pool: ThreadPool::global(),
233            environments: FxHashMap::default(),
234        }
235    }
236}
237
238impl WebStorageManager {
239    fn start(&mut self) {
240        loop {
241            match self.port.recv().unwrap() {
242                WebStorageThreadMsg::Length(sender, storage_type, webview_id, url) => {
243                    self.length(sender, storage_type, webview_id, url)
244                },
245                WebStorageThreadMsg::Key(sender, storage_type, webview_id, url, index) => {
246                    self.key(sender, storage_type, webview_id, url, index)
247                },
248                WebStorageThreadMsg::Keys(sender, storage_type, webview_id, url) => {
249                    self.keys(sender, storage_type, webview_id, url)
250                },
251                WebStorageThreadMsg::SetItem(
252                    sender,
253                    storage_type,
254                    webview_id,
255                    url,
256                    name,
257                    value,
258                ) => {
259                    self.set_item(sender, storage_type, webview_id, url, name, value);
260                },
261                WebStorageThreadMsg::GetItem(sender, storage_type, webview_id, url, name) => {
262                    self.request_item(sender, storage_type, webview_id, url, name)
263                },
264                WebStorageThreadMsg::RemoveItem(sender, storage_type, webview_id, url, name) => {
265                    self.remove_item(sender, storage_type, webview_id, url, name);
266                },
267                WebStorageThreadMsg::Clear(sender, storage_type, webview_id, url) => {
268                    self.clear(sender, storage_type, webview_id, url);
269                },
270                WebStorageThreadMsg::Clone {
271                    sender,
272                    src: src_webview_id,
273                    dest: dest_webview_id,
274                } => {
275                    self.clone(src_webview_id, dest_webview_id);
276                    let _ = sender.send(());
277                },
278                WebStorageThreadMsg::ListOrigins(sender, storage_type) => {
279                    let _ = sender.send(self.origin_descriptors(storage_type));
280                },
281                WebStorageThreadMsg::ClearDataForSites(sender, storage_type, sites) => {
282                    self.clear_data_for_sites(storage_type, &sites);
283                    let _ = sender.send(());
284                },
285                WebStorageThreadMsg::CollectMemoryReport(sender) => {
286                    let reports = self.collect_memory_reports();
287                    sender.send(ProcessReports::new(reports));
288                },
289                WebStorageThreadMsg::Exit(sender) => {
290                    // Nothing to do since we save localstorage set eagerly.
291                    let _ = sender.send(());
292                    break;
293                },
294            }
295        }
296    }
297
298    fn collect_memory_reports(&self) -> Vec<Report> {
299        let mut reports = vec![];
300        perform_memory_report(|ops| {
301            reports.push(Report {
302                path: path!["storage", "local"],
303                kind: ReportKind::ExplicitJemallocHeapSize,
304                size: self.environments.size_of(ops) + self.local_storage_origins.size_of(ops),
305            });
306
307            reports.push(Report {
308                path: path!["storage", "session"],
309                kind: ReportKind::ExplicitJemallocHeapSize,
310                size: self.session_data.size_of(ops) + self.session_storage_origins.size_of(ops),
311            });
312        });
313        reports
314    }
315
316    fn save_local_storage_origins(&self) {
317        if let Some(ref config_dir) = self.config_dir {
318            write_json_to_file(&self.local_storage_origins, config_dir, "localstorage.json");
319        }
320    }
321
322    fn get_origin_location(&self, origin: &ImmutableOrigin) -> Option<PathBuf> {
323        match &self.config_dir {
324            Some(config_dir) => {
325                const NAMESPACE_SERVO_WEBSTORAGE: &uuid::Uuid = &Uuid::from_bytes([
326                    0x37, 0x9e, 0x56, 0xb0, 0x1a, 0x76, 0x44, 0xc5, 0xa4, 0xdb, 0xe2, 0x18, 0xc5,
327                    0xc8, 0xa3, 0x5d,
328                ]);
329                let origin_uuid = Uuid::new_v5(
330                    NAMESPACE_SERVO_WEBSTORAGE,
331                    origin.ascii_serialization().as_bytes(),
332                );
333                Some(config_dir.join("webstorage").join(origin_uuid.to_string()))
334            },
335            None => None,
336        }
337    }
338
339    fn add_new_environment(&mut self, origin: &ImmutableOrigin) -> Result<(), rusqlite::Error> {
340        let origin_location = self.get_origin_location(origin);
341
342        let engine = SqliteEngine::new(&origin_location, self.thread_pool.clone())?;
343        let environment = WebStorageEnvironment::new(engine);
344        self.environments.insert(origin.clone(), environment);
345        Ok(())
346    }
347
348    fn get_environment(
349        &mut self,
350        origin: &ImmutableOrigin,
351    ) -> Result<&WebStorageEnvironment<SqliteEngine>, rusqlite::Error> {
352        if self.environments.contains_key(origin) {
353            return Ok(self
354                .environments
355                .get(origin)
356                .expect("environment should exist after contains_key check"));
357        }
358
359        self.add_new_environment(origin)?;
360
361        Ok(self
362            .environments
363            .get(origin)
364            .expect("environment should exist after add_new_environment"))
365    }
366
367    fn get_environment_mut(
368        &mut self,
369        origin: &ImmutableOrigin,
370    ) -> Result<&mut WebStorageEnvironment<SqliteEngine>, rusqlite::Error> {
371        if self.environments.contains_key(origin) {
372            return Ok(self
373                .environments
374                .get_mut(origin)
375                .expect("environment should exist after contains_key check"));
376        }
377
378        self.add_new_environment(origin)?;
379
380        Ok(self
381            .environments
382            .get_mut(origin)
383            .expect("environment should exist after add_new_environment"))
384    }
385
386    fn select_data(
387        &mut self,
388        storage_type: WebStorageType,
389        webview_id: WebViewId,
390        origin: ImmutableOrigin,
391    ) -> Option<&OriginEntry> {
392        match storage_type {
393            WebStorageType::Session => self
394                .session_data
395                .get(&webview_id)
396                .and_then(|origin_map| origin_map.get(&origin)),
397            WebStorageType::Local => {
398                // FIXME: Selecting data for read only operations should not
399                // create a new origin descriptor. However, this currently
400                // needs to happen because get_environment always creates an
401                // environment, even for read only operations.
402                if self.local_storage_origins.ensure_origin_descriptor(&origin) {
403                    self.save_local_storage_origins();
404                }
405                match self.get_environment(&origin) {
406                    Ok(env) => Some(&env.data),
407                    Err(e) => {
408                        warn!("Failed to get storage environment: {:?}", e);
409                        None
410                    },
411                }
412            },
413        }
414    }
415
416    fn select_data_mut(
417        &mut self,
418        storage_type: WebStorageType,
419        webview_id: WebViewId,
420        origin: ImmutableOrigin,
421    ) -> Option<&mut OriginEntry> {
422        match storage_type {
423            WebStorageType::Session => self
424                .session_data
425                .get_mut(&webview_id)
426                .and_then(|origin_map| origin_map.get_mut(&origin)),
427            WebStorageType::Local => {
428                // FIXME: Selecting data for read only operations should not
429                // create a new origin descriptor. However, this currently
430                // needs to happen because get_environment always creates an
431                // environment, even for read only operations.
432                if self.local_storage_origins.ensure_origin_descriptor(&origin) {
433                    self.save_local_storage_origins();
434                }
435                match self.get_environment_mut(&origin) {
436                    Ok(env) => Some(&mut env.data),
437                    Err(e) => {
438                        warn!("Failed to get storage environment: {:?}", e);
439                        None
440                    },
441                }
442            },
443        }
444    }
445
446    fn ensure_data_mut(
447        &mut self,
448        storage_type: WebStorageType,
449        webview_id: WebViewId,
450        origin: ImmutableOrigin,
451    ) -> Option<&mut OriginEntry> {
452        match storage_type {
453            WebStorageType::Session => {
454                self.session_storage_origins
455                    .ensure_origin_descriptor(&origin);
456                Some(
457                    self.session_data
458                        .entry(webview_id)
459                        .or_default()
460                        .entry(origin)
461                        .or_default(),
462                )
463            },
464            WebStorageType::Local => {
465                if self.local_storage_origins.ensure_origin_descriptor(&origin) {
466                    self.save_local_storage_origins();
467                }
468                match self.get_environment_mut(&origin) {
469                    Ok(env) => Some(&mut env.data),
470                    Err(e) => {
471                        warn!("Failed to get storage environment: {:?}", e);
472                        None
473                    },
474                }
475            },
476        }
477    }
478
479    fn length(
480        &mut self,
481        sender: GenericSender<usize>,
482        storage_type: WebStorageType,
483        webview_id: WebViewId,
484        origin: ImmutableOrigin,
485    ) {
486        let data = self.select_data(storage_type, webview_id, origin);
487        sender
488            .send(data.map_or(0, |entry| entry.inner().len()))
489            .unwrap();
490    }
491
492    fn key(
493        &mut self,
494        sender: GenericSender<Option<String>>,
495        storage_type: WebStorageType,
496        webview_id: WebViewId,
497        origin: ImmutableOrigin,
498        index: u32,
499    ) {
500        let data = self.select_data(storage_type, webview_id, origin);
501        let key = data
502            .and_then(|entry| entry.inner().keys().nth(index as usize))
503            .cloned();
504        sender.send(key).unwrap();
505    }
506
507    fn keys(
508        &mut self,
509        sender: GenericSender<Vec<String>>,
510        storage_type: WebStorageType,
511        webview_id: WebViewId,
512        origin: ImmutableOrigin,
513    ) {
514        let data = self.select_data(storage_type, webview_id, origin);
515        let keys = data.map_or(vec![], |entry| entry.inner().keys().cloned().collect());
516
517        sender.send(keys).unwrap();
518    }
519
520    /// Sends Ok(changed, Some(old_value)) in case there was a previous
521    /// value with the same key name but with different value name
522    /// otherwise sends Err(()) to indicate that the operation would result in
523    /// exceeding the quota limit
524    fn set_item(
525        &mut self,
526        sender: GenericSender<Result<(bool, Option<String>), ()>>,
527        storage_type: WebStorageType,
528        webview_id: WebViewId,
529        origin: ImmutableOrigin,
530        name: String,
531        value: String,
532    ) {
533        let Some(entry) = self.ensure_data_mut(storage_type, webview_id, origin.clone()) else {
534            sender.send(Err(())).unwrap();
535            return;
536        };
537        let total_size = entry.size();
538
539        let mut new_total_size = total_size + value.len();
540        if let Some(old_value) = entry.inner().get(&name) {
541            new_total_size -= old_value.len();
542        } else {
543            new_total_size += name.len();
544        }
545
546        let message = if new_total_size > QUOTA_SIZE_LIMIT {
547            Err(())
548        } else {
549            let result =
550                entry
551                    .insert(name.clone(), value.clone())
552                    .map_or(Ok((true, None)), |old| {
553                        if old == value {
554                            Ok((false, None))
555                        } else {
556                            Ok((true, Some(old)))
557                        }
558                    });
559            if storage_type == WebStorageType::Local &&
560                let Ok(env) = self.get_environment_mut(&origin)
561            {
562                env.set(&name, &value);
563            }
564            result
565        };
566        sender.send(message).unwrap();
567    }
568
569    fn request_item(
570        &mut self,
571        sender: GenericSender<Option<String>>,
572        storage_type: WebStorageType,
573        webview_id: WebViewId,
574        origin: ImmutableOrigin,
575        name: String,
576    ) {
577        let data = self.select_data(storage_type, webview_id, origin);
578        sender
579            .send(data.and_then(|entry| entry.inner().get(&name)).cloned())
580            .unwrap();
581    }
582
583    /// Sends Some(old_value) in case there was a previous value with the key name, otherwise sends None
584    fn remove_item(
585        &mut self,
586        sender: GenericSender<Option<String>>,
587        storage_type: WebStorageType,
588        webview_id: WebViewId,
589        origin: ImmutableOrigin,
590        name: String,
591    ) {
592        let data = self.select_data_mut(storage_type, webview_id, origin.clone());
593        let old_value = data.and_then(|entry| entry.remove(&name));
594        sender.send(old_value).unwrap();
595        if storage_type == WebStorageType::Local &&
596            let Ok(env) = self.get_environment_mut(&origin)
597        {
598            env.delete(&name);
599        }
600    }
601
602    fn clear(
603        &mut self,
604        sender: GenericSender<bool>,
605        storage_type: WebStorageType,
606        webview_id: WebViewId,
607        origin: ImmutableOrigin,
608    ) {
609        let data = self.select_data_mut(storage_type, webview_id, origin.clone());
610        sender
611            .send(data.is_some_and(|entry| {
612                if !entry.inner().is_empty() {
613                    entry.clear();
614                    true
615                } else {
616                    false
617                }
618            }))
619            .unwrap();
620        if storage_type == WebStorageType::Local &&
621            let Ok(env) = self.get_environment_mut(&origin)
622        {
623            env.clear();
624        }
625    }
626
627    fn clone(&mut self, src_webview_id: WebViewId, dest_webview_id: WebViewId) {
628        let Some(src_origin_entries) = self.session_data.get(&src_webview_id) else {
629            return;
630        };
631
632        let dest_origin_entries = src_origin_entries.clone();
633        self.session_data
634            .insert(dest_webview_id, dest_origin_entries);
635    }
636
637    fn origin_descriptors(&mut self, storage_type: WebStorageType) -> Vec<OriginDescriptor> {
638        match storage_type {
639            WebStorageType::Session => self.session_storage_origins.origin_descriptors(),
640            WebStorageType::Local => self.local_storage_origins.origin_descriptors(),
641        }
642    }
643
644    fn clear_data_for_sites(&mut self, storage_type: WebStorageType, sites: &[String]) {
645        match storage_type {
646            WebStorageType::Session => {
647                let origins = self.session_storage_origins.take_origins_for_sites(sites);
648
649                self.session_data.retain(|_, origins_map| {
650                    for origin in &origins {
651                        origins_map.remove(origin);
652                    }
653                    !origins_map.is_empty()
654                });
655            },
656            WebStorageType::Local => {
657                let origins = self.local_storage_origins.take_origins_for_sites(sites);
658
659                if self.config_dir.is_some() {
660                    for origin in origins {
661                        self.environments.remove(&origin);
662
663                        let origin_location = self
664                            .get_origin_location(&origin)
665                            .expect("Should always be able to get origin location.");
666
667                        if let Err(error) = std::fs::remove_dir_all(&origin_location) {
668                            warn!("Failed to delete origin location: {:?}", error);
669                            self.local_storage_origins.ensure_origin_descriptor(&origin);
670                        }
671                    }
672
673                    self.save_local_storage_origins();
674                } else {
675                    for origin in origins {
676                        self.environments.remove(&origin);
677                    }
678                }
679            },
680        }
681    }
682}