matrix_sdk_indexeddb/crypto_store/
indexeddb_serializer.rs

1// Copyright 2023 The Matrix.org Foundation C.I.C.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use base64::{
18    alphabet,
19    engine::{general_purpose, GeneralPurpose},
20    Engine,
21};
22use gloo_utils::format::JsValueSerdeExt;
23use matrix_sdk_crypto::CryptoStoreError;
24use matrix_sdk_store_encryption::{EncryptedValueBase64, StoreCipher};
25use serde::{de::DeserializeOwned, Deserialize, Serialize};
26use wasm_bindgen::JsValue;
27use web_sys::IdbKeyRange;
28use zeroize::Zeroizing;
29
30use crate::{safe_encode::SafeEncode, IndexeddbCryptoStoreError};
31
32type Result<A, E = IndexeddbCryptoStoreError> = std::result::Result<A, E>;
33
34const BASE64: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, general_purpose::NO_PAD);
35
36/// Handles the functionality of serializing and encrypting data for the
37/// indexeddb store.
38pub struct IndexeddbSerializer {
39    store_cipher: Option<Arc<StoreCipher>>,
40}
41
42#[derive(Debug, Deserialize, Serialize)]
43#[serde(untagged)]
44pub enum MaybeEncrypted {
45    Encrypted(EncryptedValueBase64),
46    Unencrypted(String),
47}
48
49impl IndexeddbSerializer {
50    pub fn new(store_cipher: Option<Arc<StoreCipher>>) -> Self {
51        Self { store_cipher }
52    }
53
54    /// Hash the given key securely for the given tablename, using the store
55    /// cipher.
56    ///
57    /// First calls [`SafeEncode::as_encoded_string`]
58    /// on the `key` to encode it into a formatted string.
59    ///
60    /// Then, if a cipher is configured, hashes the formatted key and returns
61    /// the hash encoded as unpadded base64.
62    ///
63    /// If no cipher is configured, just returns the formatted key.
64    ///
65    /// This is faster than [`Self::serialize_value`] and reliably gives the
66    /// same output for the same input, making it suitable for index keys.
67    pub fn encode_key<T>(&self, table_name: &str, key: T) -> JsValue
68    where
69        T: SafeEncode,
70    {
71        self.encode_key_as_string(table_name, key).into()
72    }
73
74    /// Hash the given key securely for the given tablename, using the store
75    /// cipher.
76    ///
77    /// The same as [`Self::encode_key`], but stops short of converting the
78    /// resulting base64 string into a JsValue
79    pub fn encode_key_as_string<T>(&self, table_name: &str, key: T) -> String
80    where
81        T: SafeEncode,
82    {
83        match &self.store_cipher {
84            Some(cipher) => key.as_secure_string(table_name, cipher),
85            None => key.as_encoded_string(),
86        }
87    }
88
89    pub fn encode_to_range<T>(
90        &self,
91        table_name: &str,
92        key: T,
93    ) -> Result<IdbKeyRange, IndexeddbCryptoStoreError>
94    where
95        T: SafeEncode,
96    {
97        match &self.store_cipher {
98            Some(cipher) => key.encode_to_range_secure(table_name, cipher),
99            None => key.encode_to_range(),
100        }
101        .map_err(|e| IndexeddbCryptoStoreError::DomException {
102            code: 0,
103            name: "IdbKeyRangeMakeError".to_owned(),
104            message: e,
105        })
106    }
107
108    /// Encode the value for storage as a value in indexeddb.
109    ///
110    /// A thin wrapper around [`IndexeddbSerializer::maybe_encrypt_value`]:
111    /// encrypts the given object, and then turns the [`MaybeEncrypted`]
112    /// result into a JS object for storage in indexeddb.
113    pub fn serialize_value(
114        &self,
115        value: &impl Serialize,
116    ) -> Result<JsValue, IndexeddbCryptoStoreError> {
117        let serialized = self.maybe_encrypt_value(value)?;
118        Ok(serde_wasm_bindgen::to_value(&serialized)?)
119    }
120
121    /// Encode the value for storage as a value in indexeddb.
122    ///
123    /// Returns a byte vector which is either the JSON serialisation of the
124    /// value, or an encrypted version thereof.
125    ///
126    /// Avoid using this in new code. Prefer
127    /// [`IndexeddbSerializer::serialize_value`] or
128    /// [`IndexeddbSerializer::maybe_encrypt_value`].
129    pub fn serialize_value_as_bytes(
130        &self,
131        value: &impl Serialize,
132    ) -> Result<Vec<u8>, CryptoStoreError> {
133        match &self.store_cipher {
134            Some(cipher) => cipher.encrypt_value(value).map_err(CryptoStoreError::backend),
135            None => serde_json::to_vec(value).map_err(CryptoStoreError::backend),
136        }
137    }
138
139    /// Encode an object for storage as a value in indexeddb.
140    ///
141    /// First serializes the object as JSON bytes.
142    ///
143    /// Then, if a cipher is set, encrypts the JSON with a nonce into binary
144    /// blobs, and base64-encodes the blobs.
145    ///
146    /// If no cipher is set, just base64-encodes the JSON bytes.
147    ///
148    /// Finally, returns an object encapsulating the result.
149    pub fn maybe_encrypt_value<T: Serialize>(
150        &self,
151        value: T,
152    ) -> Result<MaybeEncrypted, CryptoStoreError> {
153        // First serialize the object as JSON.
154        let serialized = serde_json::to_vec(&value).map_err(CryptoStoreError::backend)?;
155
156        // Then either encrypt the JSON, or just base64-encode it.
157        Ok(match &self.store_cipher {
158            Some(cipher) => MaybeEncrypted::Encrypted(
159                cipher.encrypt_value_base64_data(serialized).map_err(CryptoStoreError::backend)?,
160            ),
161            None => MaybeEncrypted::Unencrypted(BASE64.encode(serialized)),
162        })
163    }
164
165    /// Decode a value that was previously encoded with
166    /// [`Self::serialize_value`].
167    pub fn deserialize_value<T: DeserializeOwned>(
168        &self,
169        value: JsValue,
170    ) -> Result<T, IndexeddbCryptoStoreError> {
171        // Objects which are serialized nowadays should be represented as a
172        // `MaybeEncrypted`. However, `serialize_value` previously used a
173        // different format, so we need to handle that in case we have old data.
174        //
175        // If we can convert the JsValue into a `MaybeEncrypted`, then it's probably one
176        // of those.
177        //
178        // - `MaybeEncrypted::Encrypted` becomes a JS object with properties {`version`,
179        //   `nonce`, `ciphertext`}.
180        //
181        // - `MaybeEncrypted::Unencrypted` becomes a JS string containing base64 text.
182        //
183        // Otherwise, it probably uses our old serialization format:
184        //
185        // - Encrypted values were: serialized to an array of JSON bytes; encrypted to
186        //   an array of u8 bytes; stored in a Rust object; serialized (again) into an
187        //   array of JSON bytes. Net result is a JS array.
188        //
189        // - Unencrypted values were serialized to JSON, then deserialized into a
190        //   javascript object/string/array/bool.
191        //
192        // Note that there are several potential ambiguities here:
193        //
194        // - A JS string could either be a legacy unencrypted value, or a
195        //   `MaybeEncrypted::Unencrypted`. However, the only thing that actually got
196        //   stored as a string under the legacy system was `backup_key_v1`, and that is
197        //   special-cased not to use this path — so if we can convert it into a
198        //   `MaybeEncrypted::Unencrypted`, then we assume it is one.
199        //
200        // - A JS array could be either a legacy encrypted value or a legacy unencrypted
201        //   value. We can tell the difference by whether we have a `cipher`.
202        //
203        // - A JS object could be either a legacy unencrypted value or a
204        //   `MaybeEncrypted::Encrypted`. We assume that no legacy JS objects have the
205        //   properties to be successfully decoded into a `MaybeEncrypted::Encrypted`.
206
207        // First check if it looks like a `MaybeEncrypted`, of either type.
208        if let Ok(maybe_encrypted) = serde_wasm_bindgen::from_value(value.clone()) {
209            return Ok(self.maybe_decrypt_value(maybe_encrypted)?);
210        }
211
212        // Otherwise, fall back to the legacy deserializer.
213        self.deserialize_legacy_value(value)
214    }
215
216    /// Decode a value that was encoded with an old version of
217    /// `serialize_value`.
218    ///
219    /// This should only be used on values from an old database which are known
220    /// to be serialized with the old format.
221    pub fn deserialize_legacy_value<T: DeserializeOwned>(
222        &self,
223        value: JsValue,
224    ) -> Result<T, IndexeddbCryptoStoreError> {
225        match &self.store_cipher {
226            Some(cipher) => {
227                if !value.is_array() {
228                    return Err(IndexeddbCryptoStoreError::CryptoStoreError(
229                        CryptoStoreError::UnpicklingError,
230                    ));
231                }
232
233                // Looks like legacy encrypted format.
234                //
235                // `value` is a JS-side array containing the byte values. Turn it into a
236                // rust-side Vec<u8>, then decrypt.
237                let value: Vec<u8> = serde_wasm_bindgen::from_value(value)?;
238                Ok(cipher.decrypt_value(&value).map_err(CryptoStoreError::backend)?)
239            }
240
241            None => {
242                // Legacy unencrypted format could be just about anything; just try
243                // JSON-serializing the value, then deserializing it into the
244                // desired type.
245                //
246                // Note that the stored data was actually encoded by JSON-serializing it, and
247                // then deserializing the JSON into Javascript objects — so, for
248                // example, `HashMap`s are converted into Javascript Objects
249                // (whose keys are always strings) rather than Maps (whose keys
250                // can be other things). `serde_wasm_bindgen::from_value` will complain about
251                // such things. The correct thing to do is to go *back* to JSON
252                // and then deserialize into Rust again, which is what `JsValue::into_serde`
253                // does.
254                Ok(value.into_serde()?)
255            }
256        }
257    }
258
259    /// Decode a value that was previously encoded with
260    /// [`Self::serialize_value_as_bytes`]
261    pub fn deserialize_value_from_bytes<T: DeserializeOwned>(
262        &self,
263        value: &[u8],
264    ) -> Result<T, CryptoStoreError> {
265        if let Some(cipher) = &self.store_cipher {
266            cipher.decrypt_value(value).map_err(CryptoStoreError::backend)
267        } else {
268            serde_json::from_slice(value).map_err(CryptoStoreError::backend)
269        }
270    }
271
272    /// Decode a value that was previously encoded with
273    /// [`Self::maybe_encrypt_value`]
274    pub fn maybe_decrypt_value<T: DeserializeOwned>(
275        &self,
276        value: MaybeEncrypted,
277    ) -> Result<T, CryptoStoreError> {
278        // First extract the plaintext JSON, either by decrypting or un-base64-ing.
279        let plaintext = Zeroizing::new(match (&self.store_cipher, value) {
280            (Some(cipher), MaybeEncrypted::Encrypted(enc)) => {
281                cipher.decrypt_value_base64_data(enc).map_err(CryptoStoreError::backend)?
282            }
283            (None, MaybeEncrypted::Unencrypted(unc)) => {
284                BASE64.decode(unc).map_err(CryptoStoreError::backend)?
285            }
286
287            _ => return Err(CryptoStoreError::UnpicklingError),
288        });
289
290        // Then deserialize the JSON.
291        Ok(serde_json::from_slice(&plaintext)?)
292    }
293}
294
295#[cfg(all(test, target_arch = "wasm32"))]
296mod tests {
297    use std::{collections::BTreeMap, sync::Arc};
298
299    use gloo_utils::format::JsValueSerdeExt;
300    use matrix_sdk_store_encryption::StoreCipher;
301    use matrix_sdk_test::async_test;
302    use serde::{Deserialize, Serialize};
303    use serde_json::json;
304    use wasm_bindgen::JsValue;
305
306    use super::IndexeddbSerializer;
307
308    wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
309
310    /// Test that `serialize_value`/`deserialize_value` will round-trip, when a
311    /// cipher is in use.
312    #[async_test]
313    async fn test_serialize_deserialize_with_cipher() {
314        let serializer = IndexeddbSerializer::new(Some(Arc::new(StoreCipher::new().unwrap())));
315
316        let obj = make_test_object();
317        let serialized = serializer.serialize_value(&obj).expect("could not serialize");
318        let deserialized: TestStruct =
319            serializer.deserialize_value(serialized).expect("could not deserialize");
320
321        assert_eq!(obj, deserialized);
322    }
323
324    /// Test that `serialize_value`/`deserialize_value` will round-trip, when no
325    /// cipher is in use.
326    #[async_test]
327    async fn test_serialize_deserialize_no_cipher() {
328        let serializer = IndexeddbSerializer::new(None);
329        let obj = make_test_object();
330        let serialized = serializer.serialize_value(&obj).expect("could not serialize");
331        let deserialized: TestStruct =
332            serializer.deserialize_value(serialized).expect("could not deserialize");
333
334        assert_eq!(obj, deserialized);
335    }
336
337    /// Test that `deserialize_value` can decode a value that was encoded with
338    /// an old implementation of `serialize_value`, when a cipher is in use.
339    #[async_test]
340    async fn test_deserialize_old_serialized_value_with_cipher() {
341        let cipher = test_cipher();
342        let obj = make_test_object();
343
344        // Follow the old format for encoding:
345        //  1. Encode as JSON, in a Vec<u8> of bytes
346        //  2. Encrypt
347        //  3. JSON-encode to another Vec<u8>
348        //  4. Turn the Vec into a Javascript array of numbers.
349        let data = serde_json::to_vec(&obj).unwrap();
350        let data = cipher.encrypt_value_data(data).unwrap();
351        let data = serde_json::to_vec(&data).unwrap();
352        let serialized = JsValue::from_serde(&data).unwrap();
353
354        // Now, try deserializing with `deserialize_value`, and check we get the right
355        // thing.
356        let serializer = IndexeddbSerializer::new(Some(Arc::new(cipher)));
357        let deserialized: TestStruct =
358            serializer.deserialize_value(serialized).expect("could not deserialize");
359
360        assert_eq!(obj, deserialized);
361    }
362
363    /// Test that `deserialize_value` can decode a value that was encoded with
364    /// an old implementation of `serialize_value`, when no cipher is in use.
365    #[async_test]
366    async fn test_deserialize_old_serialized_value_no_cipher() {
367        // An example of an object which was serialized using the old-format
368        // `serialize_value`.
369        let json = json!({ "id":0, "name": "test", "map": { "0": "test" }});
370        let serialized = js_sys::JSON::parse(&json.to_string()).unwrap();
371
372        let serializer = IndexeddbSerializer::new(None);
373        let deserialized: TestStruct =
374            serializer.deserialize_value(serialized).expect("could not deserialize");
375
376        assert_eq!(make_test_object(), deserialized);
377    }
378
379    /// Test that `deserialize_value` can decode an array value that was encoded
380    /// with an old implementation of `serialize_value`, when no cipher is
381    /// in use.
382    #[async_test]
383    async fn test_deserialize_old_serialized_array_no_cipher() {
384        let json = json!([1, 2, 3, 4]);
385        let serialized = js_sys::JSON::parse(&json.to_string()).unwrap();
386
387        let serializer = IndexeddbSerializer::new(None);
388        let deserialized: Vec<u8> =
389            serializer.deserialize_value(serialized).expect("could not deserialize");
390
391        assert_eq!(vec![1, 2, 3, 4], deserialized);
392    }
393
394    /// Test that `deserialize_value` can decode a value encoded with
395    /// `maybe_encrypt_value`, when a cipher is in use.
396    #[async_test]
397    async fn test_maybe_encrypt_deserialize_with_cipher() {
398        let serializer = IndexeddbSerializer::new(Some(Arc::new(StoreCipher::new().unwrap())));
399
400        let obj = make_test_object();
401        let serialized = serializer.maybe_encrypt_value(&obj).expect("could not serialize");
402        let serialized = serde_wasm_bindgen::to_value(&serialized).unwrap();
403
404        let deserialized: TestStruct =
405            serializer.deserialize_value(serialized).expect("could not deserialize");
406
407        assert_eq!(obj, deserialized);
408    }
409
410    /// Test that `deserialize_value` can decode a value encoded with
411    /// `maybe_encrypt_value`, when no cipher is in use.
412    #[async_test]
413    async fn test_maybe_encrypt_deserialize_no_cipher() {
414        let serializer = IndexeddbSerializer::new(None);
415        let obj = make_test_object();
416        let serialized = serializer.maybe_encrypt_value(&obj).expect("could not serialize");
417        let serialized = serde_wasm_bindgen::to_value(&serialized).unwrap();
418        let deserialized: TestStruct =
419            serializer.deserialize_value(serialized).expect("could not deserialize");
420
421        assert_eq!(obj, deserialized);
422    }
423
424    /// Test that `maybe_encrypt_value`/`maybe_decrypt_value` will round-trip,
425    /// when a cipher is in use.
426    #[async_test]
427    async fn test_maybe_encrypt_decrypt_with_cipher() {
428        let serializer = IndexeddbSerializer::new(Some(Arc::new(StoreCipher::new().unwrap())));
429
430        let obj = make_test_object();
431        let serialized = serializer.maybe_encrypt_value(&obj).expect("could not serialize");
432        let deserialized: TestStruct =
433            serializer.maybe_decrypt_value(serialized).expect("could not deserialize");
434
435        assert_eq!(obj, deserialized);
436    }
437
438    /// Test that `maybe_encrypt_value`/`maybe_decrypt_value` will round-trip,
439    /// when no cipher is in use.
440    #[async_test]
441    async fn test_maybe_encrypt_decrypt_no_cipher() {
442        let serializer = IndexeddbSerializer::new(None);
443
444        let obj = make_test_object();
445        let serialized = serializer.maybe_encrypt_value(&obj).expect("could not serialize");
446        let deserialized: TestStruct =
447            serializer.maybe_decrypt_value(serialized).expect("could not deserialize");
448
449        assert_eq!(obj, deserialized);
450    }
451
452    #[derive(Serialize, Deserialize, PartialEq, Debug)]
453    struct TestStruct {
454        id: u32,
455        name: String,
456
457        // A map, whose keys are not strings. This is an edge-case we previously got wrong. Maps
458        // are represented differently in JSON from Javascript objects, and that particularly
459        // matters when their keys are not strings.
460        map: BTreeMap<u8, String>,
461    }
462
463    fn make_test_object() -> TestStruct {
464        TestStruct { id: 0, name: "test".to_owned(), map: BTreeMap::from([(0, "test".to_owned())]) }
465    }
466
467    /// Build a [`StoreCipher`] using a hardcoded key.
468    fn test_cipher() -> StoreCipher {
469        StoreCipher::import_with_key(
470            &[0u8; 32],
471            &[
472                130, 168, 107, 100, 102, 95, 105, 110, 102, 111, 164, 78, 111, 110, 101, 175, 99,
473                105, 112, 104, 101, 114, 116, 101, 120, 116, 95, 105, 110, 102, 111, 129, 176, 67,
474                104, 97, 67, 104, 97, 50, 48, 80, 111, 108, 121, 49, 51, 48, 53, 130, 165, 110,
475                111, 110, 99, 101, 220, 0, 24, 13, 204, 160, 204, 133, 204, 180, 204, 224, 204,
476                158, 95, 14, 94, 204, 133, 110, 3, 204, 225, 204, 174, 54, 204, 144, 204, 205, 204,
477                190, 204, 155, 74, 118, 81, 87, 204, 156, 170, 99, 105, 112, 104, 101, 114, 116,
478                101, 120, 116, 220, 0, 80, 204, 226, 204, 205, 58, 101, 88, 204, 141, 204, 218, 2,
479                112, 204, 252, 48, 204, 169, 204, 233, 58, 4, 60, 96, 66, 22, 204, 192, 4, 4, 63,
480                109, 204, 157, 204, 166, 17, 55, 85, 102, 89, 204, 145, 110, 204, 250, 39, 18, 19,
481                204, 191, 204, 156, 71, 204, 142, 75, 204, 251, 204, 218, 204, 130, 204, 132, 204,
482                240, 86, 204, 141, 77, 64, 204, 132, 204, 241, 204, 177, 12, 204, 224, 102, 106, 4,
483                204, 141, 89, 101, 30, 45, 38, 105, 104, 204, 156, 96, 204, 203, 204, 224, 34, 125,
484                204, 157, 204, 160, 38, 204, 158, 204, 155, 16, 204, 150,
485            ],
486        )
487        .unwrap()
488    }
489}