From f1882302695c4c92c705091c41b5be364d5868a1 Mon Sep 17 00:00:00 2001 From: Torsten Grote Date: Mon, 16 Sep 2024 11:24:05 -0300 Subject: [PATCH] =?UTF-8?q?Add=20padding=20to=20all=20blobs=20using=20the?= =?UTF-8?q?=20Padm=C3=A9=20algorithm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../seedvault/transport/SnapshotManager.kt | 20 +++++- .../seedvault/transport/backup/BlobCreator.kt | 27 +++++++- .../seedvault/transport/backup/Padding.kt | 28 +++++++++ .../seedvault/transport/restore/Loader.kt | 48 +++++++++++++- .../transport/backup/BlobCreatorTest.kt | 62 +++++++++++++++++++ .../seedvault/transport/backup/PaddingTest.kt | 32 ++++++++++ doc/README.md | 30 ++++++--- 7 files changed, 229 insertions(+), 18 deletions(-) create mode 100644 app/src/main/java/com/stevesoltys/seedvault/transport/backup/Padding.kt create mode 100644 app/src/test/java/com/stevesoltys/seedvault/transport/backup/PaddingTest.kt diff --git a/app/src/main/java/com/stevesoltys/seedvault/transport/SnapshotManager.kt b/app/src/main/java/com/stevesoltys/seedvault/transport/SnapshotManager.kt index 9ee20534..5a3cd002 100644 --- a/app/src/main/java/com/stevesoltys/seedvault/transport/SnapshotManager.kt +++ b/app/src/main/java/com/stevesoltys/seedvault/transport/SnapshotManager.kt @@ -17,6 +17,7 @@ import org.calyxos.seedvault.core.toHexString import java.io.ByteArrayOutputStream import java.io.File import java.io.IOException +import java.nio.ByteBuffer /** * Manages interactions with snapshots, such as loading, saving and removing them. @@ -66,14 +67,27 @@ internal class SnapshotManager( */ @Throws(IOException::class) suspend fun saveSnapshot(snapshot: Snapshot) { + // compress payload and get size + val payloadStream = ByteArrayOutputStream() + ZstdOutputStream(payloadStream).use { zstdOutputStream -> + snapshot.writeTo(zstdOutputStream) + } + val payloadSize = payloadStream.size() + val payloadSizeBytes = ByteBuffer.allocate(4).putInt(payloadSize).array() + + // encrypt compressed payload and assemble entire blob val byteStream = ByteArrayOutputStream() byteStream.write(VERSION.toInt()) crypto.newEncryptingStream(byteStream, crypto.getAdForVersion()).use { cryptoStream -> - ZstdOutputStream(cryptoStream).use { zstdOutputStream -> - snapshot.writeTo(zstdOutputStream) - } + cryptoStream.write(payloadSizeBytes) + cryptoStream.write(payloadStream.toByteArray()) + // not adding any padding here, because it doesn't matter for snapshots } + payloadStream.reset() val bytes = byteStream.toByteArray() + byteStream.reset() + + // compute hash and save blob val sha256 = crypto.sha256(bytes).toHexString() val snapshotHandle = AppBackupFileType.Snapshot(crypto.repoId, sha256) backendManager.backend.save(snapshotHandle).use { outputStream -> diff --git a/app/src/main/java/com/stevesoltys/seedvault/transport/backup/BlobCreator.kt b/app/src/main/java/com/stevesoltys/seedvault/transport/backup/BlobCreator.kt index 90e40503..0bc0a431 100644 --- a/app/src/main/java/com/stevesoltys/seedvault/transport/backup/BlobCreator.kt +++ b/app/src/main/java/com/stevesoltys/seedvault/transport/backup/BlobCreator.kt @@ -13,12 +13,14 @@ import com.stevesoltys.seedvault.crypto.Crypto import com.stevesoltys.seedvault.header.VERSION import com.stevesoltys.seedvault.proto.Snapshot.Blob import com.stevesoltys.seedvault.proto.SnapshotKt.blob +import com.stevesoltys.seedvault.transport.backup.Padding.getPadTo import okio.Buffer import okio.buffer import okio.sink import org.calyxos.seedvault.chunker.Chunk import org.calyxos.seedvault.core.backends.AppBackupFileType import java.io.IOException +import java.nio.ByteBuffer /** * Creates and uploads new blobs to the current backend. @@ -28,6 +30,7 @@ internal class BlobCreator( private val backendManager: BackendManager, ) { + private val payloadBuffer = Buffer() private val buffer = Buffer() /** @@ -36,14 +39,31 @@ internal class BlobCreator( @WorkerThread @Throws(IOException::class) suspend fun createNewBlob(chunk: Chunk): Blob { + // ensure buffers are cleared + payloadBuffer.clear() buffer.clear() + + // compress payload and get size + ZstdOutputStream(payloadBuffer.outputStream()).use { zstdOutputStream -> + zstdOutputStream.write(chunk.data) + } + val payloadSize = payloadBuffer.size.toInt() + val payloadSizeBytes = ByteBuffer.allocate(4).putInt(payloadSize).array() + val paddingSize = getPadTo(payloadSize) - payloadSize + + // encrypt compressed payload and assemble entire blob val bufferStream = buffer.outputStream() bufferStream.write(VERSION.toInt()) crypto.newEncryptingStream(bufferStream, crypto.getAdForVersion()).use { cryptoStream -> - ZstdOutputStream(cryptoStream).use { zstdOutputStream -> - zstdOutputStream.write(chunk.data) - } + cryptoStream.write(payloadSizeBytes) + payloadBuffer.writeTo(cryptoStream) + // add padding + // we could just write 0s, but because of defense in depth, we use random bytes + cryptoStream.write(crypto.getRandomBytes(paddingSize)) } + payloadBuffer.clear() + + // compute hash and save blob val sha256ByteString = buffer.sha256() val handle = AppBackupFileType.Blob(crypto.repoId, sha256ByteString.hex()) // TODO for later: implement a backend wrapper that handles retries for transient errors @@ -54,6 +74,7 @@ internal class BlobCreator( outputBuffer.flush() length } + buffer.clear() return blob { id = ByteString.copyFrom(sha256ByteString.asByteBuffer()) length = size.toInt() diff --git a/app/src/main/java/com/stevesoltys/seedvault/transport/backup/Padding.kt b/app/src/main/java/com/stevesoltys/seedvault/transport/backup/Padding.kt new file mode 100644 index 00000000..a868c24e --- /dev/null +++ b/app/src/main/java/com/stevesoltys/seedvault/transport/backup/Padding.kt @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: 2024 The Calyx Institute + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.stevesoltys.seedvault.transport.backup + +import kotlin.math.floor +import kotlin.math.log2 +import kotlin.math.pow + +object Padding { + + /** + * Pads the given [size] using the [Padmé algorithm](https://lbarman.ch/blog/padme/). + * + * @param size unpadded object length + * @return the padded object length + */ + fun getPadTo(size: Int): Int { + val e = floor(log2(size.toFloat())) + val s = floor(log2(e)) + 1 + val lastBits = e - s + val bitMask = (2.toFloat().pow(lastBits) - 1).toInt() + return (size + bitMask) and bitMask.inv() + } + +} diff --git a/app/src/main/java/com/stevesoltys/seedvault/transport/restore/Loader.kt b/app/src/main/java/com/stevesoltys/seedvault/transport/restore/Loader.kt index c165070a..0c295d9f 100644 --- a/app/src/main/java/com/stevesoltys/seedvault/transport/restore/Loader.kt +++ b/app/src/main/java/com/stevesoltys/seedvault/transport/restore/Loader.kt @@ -17,8 +17,11 @@ import org.calyxos.seedvault.core.backends.AppBackupFileType import org.calyxos.seedvault.core.toHexString import java.io.ByteArrayInputStream import java.io.File +import java.io.FilterInputStream +import java.io.IOException import java.io.InputStream import java.io.SequenceInputStream +import java.nio.ByteBuffer import java.security.GeneralSecurityException import java.util.Enumeration @@ -97,8 +100,49 @@ internal class Loader( val ad = crypto.getAdForVersion(version) // skip first version byte when creating cipherText stream val byteStream = ByteArrayInputStream(cipherText, 1, cipherText.size - 1) - // decrypt and decompress cipherText stream and parse snapshot - return ZstdInputStream(crypto.newDecryptingStream(byteStream, ad)) + // decrypt, de-pad and decompress cipherText stream + val decryptingStream = crypto.newDecryptingStream(byteStream, ad) + val paddedStream = PaddedInputStream(decryptingStream) + return ZstdInputStream(paddedStream) } } + +private class PaddedInputStream(inputStream: InputStream) : FilterInputStream(inputStream) { + + val size: Int + var bytesRead: Int = 0 + + init { + val sizeBytes = ByteArray(4) + val bytesRead = inputStream.read(sizeBytes) + if (bytesRead != 4) { + throw IOException("Could not read padding size: ${sizeBytes.toHexString()}") + } + size = ByteBuffer.wrap(sizeBytes).getInt() + } + + override fun read(): Int { + if (bytesRead >= size) return -1 + return getReadBytes(super.read()) + } + + override fun read(b: ByteArray, off: Int, len: Int): Int { + if (bytesRead >= size) return -1 + if (bytesRead + len >= size) { + return getReadBytes(super.read(b, off, size - bytesRead)) + } + return getReadBytes(super.read(b, off, len)) + } + + override fun available(): Int { + return size - bytesRead + } + + private fun getReadBytes(read: Int): Int { + if (read == -1) return -1 + bytesRead += read + if (bytesRead > size) return -1 + return read + } +} diff --git a/app/src/test/java/com/stevesoltys/seedvault/transport/backup/BlobCreatorTest.kt b/app/src/test/java/com/stevesoltys/seedvault/transport/backup/BlobCreatorTest.kt index 0b11a7a5..9aea051e 100644 --- a/app/src/test/java/com/stevesoltys/seedvault/transport/backup/BlobCreatorTest.kt +++ b/app/src/test/java/com/stevesoltys/seedvault/transport/backup/BlobCreatorTest.kt @@ -6,7 +6,9 @@ package com.stevesoltys.seedvault.transport.backup import com.stevesoltys.seedvault.backend.BackendManager +import com.stevesoltys.seedvault.getRandomByteArray import com.stevesoltys.seedvault.transport.TransportTest +import com.stevesoltys.seedvault.transport.restore.Loader import io.mockk.coEvery import io.mockk.every import io.mockk.mockk @@ -16,10 +18,13 @@ import org.calyxos.seedvault.chunker.Chunk import org.calyxos.seedvault.core.backends.AppBackupFileType import org.calyxos.seedvault.core.backends.Backend import org.calyxos.seedvault.core.toHexString +import org.junit.jupiter.api.Assertions.assertArrayEquals import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertNotEquals import org.junit.jupiter.api.Test +import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream +import java.io.InputStream import java.io.OutputStream import java.security.MessageDigest import kotlin.random.Random @@ -32,6 +37,7 @@ internal class BlobCreatorTest : TransportTest() { private val ad = Random.nextBytes(1) private val passThroughOutputStream = slot() + private val passThroughInputStream = slot() private val blobHandle = slot() @Test @@ -42,11 +48,15 @@ internal class BlobCreatorTest : TransportTest() { val chunk2 = Chunk(0L, data2.size, data2, "doesn't matter here") val outputStream1 = ByteArrayOutputStream() val outputStream2 = ByteArrayOutputStream() + val paddingNum = slot() every { crypto.getAdForVersion() } returns ad every { crypto.newEncryptingStream(capture(passThroughOutputStream), ad) } answers { passThroughOutputStream.captured // not really encrypting here } + every { crypto.getRandomBytes(capture(paddingNum)) } answers { + getRandomByteArray(paddingNum.captured) + } every { crypto.repoId } returns repoId every { backendManager.backend } returns backend @@ -79,4 +89,56 @@ internal class BlobCreatorTest : TransportTest() { assertEquals(outputStream2.size(), blob2.length) assertEquals(data2.size, blob2.uncompressedLength) } + + @Test + fun `create and load blob`() = runBlocking { + val data = getRandomByteArray(Random.nextInt(8 * 1024 * 1024)) + val chunk = Chunk(0L, data.size, data, "doesn't matter here") + val outputStream = ByteArrayOutputStream() + val paddingNum = slot() + + every { crypto.getAdForVersion() } returns ad + every { crypto.newEncryptingStream(capture(passThroughOutputStream), ad) } answers { + passThroughOutputStream.captured // not really encrypting here + } + every { crypto.getRandomBytes(capture(paddingNum)) } answers { + getRandomByteArray(paddingNum.captured) + } + every { crypto.repoId } returns repoId + every { backendManager.backend } returns backend + + // create blob + coEvery { backend.save(capture(blobHandle)) } returns outputStream + val blob = blobCreator.createNewBlob(chunk) + // check that file content hash matches snapshot hash + val messageDigest = MessageDigest.getInstance("SHA-256") + val hash = messageDigest.digest(outputStream.toByteArray()).toHexString() + assertEquals(hash, blobHandle.captured.name) + + // check blob metadata + assertEquals(hash, blob.id.hexFromProto()) + assertEquals(outputStream.size(), blob.length) + assertEquals(data.size, blob.uncompressedLength) + + // prepare blob loading + val blobHandle = AppBackupFileType.Blob(repoId, hash) + coEvery { + backend.load(blobHandle) + } returns ByteArrayInputStream(outputStream.toByteArray()) + every { + crypto.sha256(outputStream.toByteArray()) + } returns messageDigest.digest(outputStream.toByteArray()) // same hash came out + every { + crypto.newDecryptingStream(capture(passThroughInputStream), ad) + } answers { + passThroughInputStream.captured // not really decrypting here + } + + // load blob + val loader = Loader(crypto, backendManager) // need a real loader + loader.loadFile(blobHandle).use { inputStream -> + // data came back out + assertArrayEquals(data, inputStream.readAllBytes()) + } + } } diff --git a/app/src/test/java/com/stevesoltys/seedvault/transport/backup/PaddingTest.kt b/app/src/test/java/com/stevesoltys/seedvault/transport/backup/PaddingTest.kt new file mode 100644 index 00000000..644e4d6f --- /dev/null +++ b/app/src/test/java/com/stevesoltys/seedvault/transport/backup/PaddingTest.kt @@ -0,0 +1,32 @@ +/* + * SPDX-FileCopyrightText: 2024 The Calyx Institute + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.stevesoltys.seedvault.transport.backup + +import com.stevesoltys.seedvault.transport.backup.Padding.getPadTo +import kotlinx.coroutines.runBlocking +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class PaddingTest { + @Test + fun test() = runBlocking { + assertEquals(52, getPadTo(49)) + assertEquals(52, getPadTo(50)) + assertEquals(60, getPadTo(60)) + assertEquals(4096, getPadTo(4000)) + assertEquals(8192, getPadTo(8000)) + assertEquals(12288, getPadTo(12000)) + assertEquals(12288, getPadTo(12000)) + assertEquals(61440, getPadTo(60000)) + assertEquals(12288, getPadTo(12000)) + assertEquals(638976, getPadTo(634000)) + assertEquals(1277952, getPadTo(1250000)) + assertEquals(8388608, getPadTo(8260000)) + assertEquals(8388608, getPadTo(8380000)) + assertEquals(8388608, getPadTo(8388608)) + assertEquals(8650752, getPadTo(8388609)) + } +} diff --git a/doc/README.md b/doc/README.md index 4d3305b1..8ffb9cd8 100644 --- a/doc/README.md +++ b/doc/README.md @@ -105,15 +105,23 @@ followed by an encrypted and authenticated payload (see also [Cryptography](#cry The version (currently `0x02`) is used to be able to modify aspects of the design in the future and to provide backwards compatibility. -Blob payloads include the raw bytes of the compressed chunks -and snapshot payloads their compressed protobuf encoding. +The first four bytes of the decrypted payload encode the compressed plaintext size +as a signed 32-bit integer. +So the maximum chunk size is 2147483647 bytes. +This size specifies where the compressed plaintext ends and the (to be discarded) padding starts. + +Blob payloads include the raw bytes of the compressed chunks and always get padded. +Snapshot payloads include their compressed protobuf encoding and do not get padded. Compression is using the [zstd](http://www.zstd.net/) algorithm in its default configuration. ```console -┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ -┃ Version ┃ encrypted tink payload ┃ -┃ 0x02 ┃ (with 40 bytes header) ┃ -┗━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━┛ +┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ ┃ encrypted tink payload (with 40 bytes header) ┃ +┃ version ┃ ┏━ plaintext ━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┓ ┃ +┃ 1 byte ┃ ┃ size uint32 ┃ compressed ┃ padding ┃ ┃ +┃ (0x02) ┃ ┃ 4 bytes ┃ plaintext ┃ (optional) ┃ ┃ +┃ ┃ ┗━━━━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━━┛ ┃ +┗━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ ``` The structure of the encrypted tink payload is explored further @@ -301,7 +309,8 @@ All types of files written to the repository have the following format: ```console ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ version ┃ tink payload (with 40 bytes header) ┃ + ┃ ┃ tink payload (with 40 bytes header) ┃ + ┃ version ┃ ciphertext structure: ┃ ┃ byte ┃ ┏━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ ┃ ┃ ┃ ┃ header length ┃ salt ┃ nonce prefix ┃ encrypted segments ┃ ┃ ┃ (0x02) ┃ ┗━━━━━━━━━━━━━━━┻━━━━━━┻━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ ┃ @@ -335,10 +344,11 @@ they know how we chunk larger files, but they should be unable to retrieve our m Since a random gear table computed like this may not be sufficient for attackers able to control (part of the) plaintext, e.g. sending a file in a messaging app, and due to the presence of lots of data consisting of only a single chunk, -we apply additional random padding to all chunks. +we apply padding according to the [Padmé algorithm](https://lbarman.ch/blog/padme/) +([PETS 2019 paper [PDF]](https://www.petsymposium.org/2019/files/papers/issue4/popets-2019-0056.pdf)) +to all chunks. The plaintext gets padded with random bytes after compression and before encryption. - -**TODO** determine the details of the padding. +We could also pad with 0 bytes, but for defense in depth random bytes are used instead. ## Operations