Add padding to all blobs using the Padmé algorithm

This commit is contained in:
Torsten Grote 2024-09-16 11:24:05 -03:00
parent 52f528dbf0
commit f188230269
No known key found for this signature in database
GPG key ID: 3E5F77D92CF891FF
7 changed files with 229 additions and 18 deletions

View file

@ -17,6 +17,7 @@ import org.calyxos.seedvault.core.toHexString
import java.io.ByteArrayOutputStream import java.io.ByteArrayOutputStream
import java.io.File import java.io.File
import java.io.IOException import java.io.IOException
import java.nio.ByteBuffer
/** /**
* Manages interactions with snapshots, such as loading, saving and removing them. * Manages interactions with snapshots, such as loading, saving and removing them.
@ -66,14 +67,27 @@ internal class SnapshotManager(
*/ */
@Throws(IOException::class) @Throws(IOException::class)
suspend fun saveSnapshot(snapshot: Snapshot) { suspend fun saveSnapshot(snapshot: Snapshot) {
// compress payload and get size
val payloadStream = ByteArrayOutputStream()
ZstdOutputStream(payloadStream).use { zstdOutputStream ->
snapshot.writeTo(zstdOutputStream)
}
val payloadSize = payloadStream.size()
val payloadSizeBytes = ByteBuffer.allocate(4).putInt(payloadSize).array()
// encrypt compressed payload and assemble entire blob
val byteStream = ByteArrayOutputStream() val byteStream = ByteArrayOutputStream()
byteStream.write(VERSION.toInt()) byteStream.write(VERSION.toInt())
crypto.newEncryptingStream(byteStream, crypto.getAdForVersion()).use { cryptoStream -> crypto.newEncryptingStream(byteStream, crypto.getAdForVersion()).use { cryptoStream ->
ZstdOutputStream(cryptoStream).use { zstdOutputStream -> cryptoStream.write(payloadSizeBytes)
snapshot.writeTo(zstdOutputStream) cryptoStream.write(payloadStream.toByteArray())
} // not adding any padding here, because it doesn't matter for snapshots
} }
payloadStream.reset()
val bytes = byteStream.toByteArray() val bytes = byteStream.toByteArray()
byteStream.reset()
// compute hash and save blob
val sha256 = crypto.sha256(bytes).toHexString() val sha256 = crypto.sha256(bytes).toHexString()
val snapshotHandle = AppBackupFileType.Snapshot(crypto.repoId, sha256) val snapshotHandle = AppBackupFileType.Snapshot(crypto.repoId, sha256)
backendManager.backend.save(snapshotHandle).use { outputStream -> backendManager.backend.save(snapshotHandle).use { outputStream ->

View file

@ -13,12 +13,14 @@ import com.stevesoltys.seedvault.crypto.Crypto
import com.stevesoltys.seedvault.header.VERSION import com.stevesoltys.seedvault.header.VERSION
import com.stevesoltys.seedvault.proto.Snapshot.Blob import com.stevesoltys.seedvault.proto.Snapshot.Blob
import com.stevesoltys.seedvault.proto.SnapshotKt.blob import com.stevesoltys.seedvault.proto.SnapshotKt.blob
import com.stevesoltys.seedvault.transport.backup.Padding.getPadTo
import okio.Buffer import okio.Buffer
import okio.buffer import okio.buffer
import okio.sink import okio.sink
import org.calyxos.seedvault.chunker.Chunk import org.calyxos.seedvault.chunker.Chunk
import org.calyxos.seedvault.core.backends.AppBackupFileType import org.calyxos.seedvault.core.backends.AppBackupFileType
import java.io.IOException import java.io.IOException
import java.nio.ByteBuffer
/** /**
* Creates and uploads new blobs to the current backend. * Creates and uploads new blobs to the current backend.
@ -28,6 +30,7 @@ internal class BlobCreator(
private val backendManager: BackendManager, private val backendManager: BackendManager,
) { ) {
private val payloadBuffer = Buffer()
private val buffer = Buffer() private val buffer = Buffer()
/** /**
@ -36,14 +39,31 @@ internal class BlobCreator(
@WorkerThread @WorkerThread
@Throws(IOException::class) @Throws(IOException::class)
suspend fun createNewBlob(chunk: Chunk): Blob { suspend fun createNewBlob(chunk: Chunk): Blob {
// ensure buffers are cleared
payloadBuffer.clear()
buffer.clear() buffer.clear()
// compress payload and get size
ZstdOutputStream(payloadBuffer.outputStream()).use { zstdOutputStream ->
zstdOutputStream.write(chunk.data)
}
val payloadSize = payloadBuffer.size.toInt()
val payloadSizeBytes = ByteBuffer.allocate(4).putInt(payloadSize).array()
val paddingSize = getPadTo(payloadSize) - payloadSize
// encrypt compressed payload and assemble entire blob
val bufferStream = buffer.outputStream() val bufferStream = buffer.outputStream()
bufferStream.write(VERSION.toInt()) bufferStream.write(VERSION.toInt())
crypto.newEncryptingStream(bufferStream, crypto.getAdForVersion()).use { cryptoStream -> crypto.newEncryptingStream(bufferStream, crypto.getAdForVersion()).use { cryptoStream ->
ZstdOutputStream(cryptoStream).use { zstdOutputStream -> cryptoStream.write(payloadSizeBytes)
zstdOutputStream.write(chunk.data) payloadBuffer.writeTo(cryptoStream)
} // add padding
// we could just write 0s, but because of defense in depth, we use random bytes
cryptoStream.write(crypto.getRandomBytes(paddingSize))
} }
payloadBuffer.clear()
// compute hash and save blob
val sha256ByteString = buffer.sha256() val sha256ByteString = buffer.sha256()
val handle = AppBackupFileType.Blob(crypto.repoId, sha256ByteString.hex()) val handle = AppBackupFileType.Blob(crypto.repoId, sha256ByteString.hex())
// TODO for later: implement a backend wrapper that handles retries for transient errors // TODO for later: implement a backend wrapper that handles retries for transient errors
@ -54,6 +74,7 @@ internal class BlobCreator(
outputBuffer.flush() outputBuffer.flush()
length length
} }
buffer.clear()
return blob { return blob {
id = ByteString.copyFrom(sha256ByteString.asByteBuffer()) id = ByteString.copyFrom(sha256ByteString.asByteBuffer())
length = size.toInt() length = size.toInt()

View file

@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: 2024 The Calyx Institute
* SPDX-License-Identifier: Apache-2.0
*/
package com.stevesoltys.seedvault.transport.backup
import kotlin.math.floor
import kotlin.math.log2
import kotlin.math.pow
object Padding {
/**
* Pads the given [size] using the [Padmé algorithm](https://lbarman.ch/blog/padme/).
*
* @param size unpadded object length
* @return the padded object length
*/
fun getPadTo(size: Int): Int {
val e = floor(log2(size.toFloat()))
val s = floor(log2(e)) + 1
val lastBits = e - s
val bitMask = (2.toFloat().pow(lastBits) - 1).toInt()
return (size + bitMask) and bitMask.inv()
}
}

View file

@ -17,8 +17,11 @@ import org.calyxos.seedvault.core.backends.AppBackupFileType
import org.calyxos.seedvault.core.toHexString import org.calyxos.seedvault.core.toHexString
import java.io.ByteArrayInputStream import java.io.ByteArrayInputStream
import java.io.File import java.io.File
import java.io.FilterInputStream
import java.io.IOException
import java.io.InputStream import java.io.InputStream
import java.io.SequenceInputStream import java.io.SequenceInputStream
import java.nio.ByteBuffer
import java.security.GeneralSecurityException import java.security.GeneralSecurityException
import java.util.Enumeration import java.util.Enumeration
@ -97,8 +100,49 @@ internal class Loader(
val ad = crypto.getAdForVersion(version) val ad = crypto.getAdForVersion(version)
// skip first version byte when creating cipherText stream // skip first version byte when creating cipherText stream
val byteStream = ByteArrayInputStream(cipherText, 1, cipherText.size - 1) val byteStream = ByteArrayInputStream(cipherText, 1, cipherText.size - 1)
// decrypt and decompress cipherText stream and parse snapshot // decrypt, de-pad and decompress cipherText stream
return ZstdInputStream(crypto.newDecryptingStream(byteStream, ad)) val decryptingStream = crypto.newDecryptingStream(byteStream, ad)
val paddedStream = PaddedInputStream(decryptingStream)
return ZstdInputStream(paddedStream)
} }
} }
private class PaddedInputStream(inputStream: InputStream) : FilterInputStream(inputStream) {
val size: Int
var bytesRead: Int = 0
init {
val sizeBytes = ByteArray(4)
val bytesRead = inputStream.read(sizeBytes)
if (bytesRead != 4) {
throw IOException("Could not read padding size: ${sizeBytes.toHexString()}")
}
size = ByteBuffer.wrap(sizeBytes).getInt()
}
override fun read(): Int {
if (bytesRead >= size) return -1
return getReadBytes(super.read())
}
override fun read(b: ByteArray, off: Int, len: Int): Int {
if (bytesRead >= size) return -1
if (bytesRead + len >= size) {
return getReadBytes(super.read(b, off, size - bytesRead))
}
return getReadBytes(super.read(b, off, len))
}
override fun available(): Int {
return size - bytesRead
}
private fun getReadBytes(read: Int): Int {
if (read == -1) return -1
bytesRead += read
if (bytesRead > size) return -1
return read
}
}

View file

@ -6,7 +6,9 @@
package com.stevesoltys.seedvault.transport.backup package com.stevesoltys.seedvault.transport.backup
import com.stevesoltys.seedvault.backend.BackendManager import com.stevesoltys.seedvault.backend.BackendManager
import com.stevesoltys.seedvault.getRandomByteArray
import com.stevesoltys.seedvault.transport.TransportTest import com.stevesoltys.seedvault.transport.TransportTest
import com.stevesoltys.seedvault.transport.restore.Loader
import io.mockk.coEvery import io.mockk.coEvery
import io.mockk.every import io.mockk.every
import io.mockk.mockk import io.mockk.mockk
@ -16,10 +18,13 @@ import org.calyxos.seedvault.chunker.Chunk
import org.calyxos.seedvault.core.backends.AppBackupFileType import org.calyxos.seedvault.core.backends.AppBackupFileType
import org.calyxos.seedvault.core.backends.Backend import org.calyxos.seedvault.core.backends.Backend
import org.calyxos.seedvault.core.toHexString import org.calyxos.seedvault.core.toHexString
import org.junit.jupiter.api.Assertions.assertArrayEquals
import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNotEquals import org.junit.jupiter.api.Assertions.assertNotEquals
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream import java.io.ByteArrayOutputStream
import java.io.InputStream
import java.io.OutputStream import java.io.OutputStream
import java.security.MessageDigest import java.security.MessageDigest
import kotlin.random.Random import kotlin.random.Random
@ -32,6 +37,7 @@ internal class BlobCreatorTest : TransportTest() {
private val ad = Random.nextBytes(1) private val ad = Random.nextBytes(1)
private val passThroughOutputStream = slot<OutputStream>() private val passThroughOutputStream = slot<OutputStream>()
private val passThroughInputStream = slot<InputStream>()
private val blobHandle = slot<AppBackupFileType.Blob>() private val blobHandle = slot<AppBackupFileType.Blob>()
@Test @Test
@ -42,11 +48,15 @@ internal class BlobCreatorTest : TransportTest() {
val chunk2 = Chunk(0L, data2.size, data2, "doesn't matter here") val chunk2 = Chunk(0L, data2.size, data2, "doesn't matter here")
val outputStream1 = ByteArrayOutputStream() val outputStream1 = ByteArrayOutputStream()
val outputStream2 = ByteArrayOutputStream() val outputStream2 = ByteArrayOutputStream()
val paddingNum = slot<Int>()
every { crypto.getAdForVersion() } returns ad every { crypto.getAdForVersion() } returns ad
every { crypto.newEncryptingStream(capture(passThroughOutputStream), ad) } answers { every { crypto.newEncryptingStream(capture(passThroughOutputStream), ad) } answers {
passThroughOutputStream.captured // not really encrypting here passThroughOutputStream.captured // not really encrypting here
} }
every { crypto.getRandomBytes(capture(paddingNum)) } answers {
getRandomByteArray(paddingNum.captured)
}
every { crypto.repoId } returns repoId every { crypto.repoId } returns repoId
every { backendManager.backend } returns backend every { backendManager.backend } returns backend
@ -79,4 +89,56 @@ internal class BlobCreatorTest : TransportTest() {
assertEquals(outputStream2.size(), blob2.length) assertEquals(outputStream2.size(), blob2.length)
assertEquals(data2.size, blob2.uncompressedLength) assertEquals(data2.size, blob2.uncompressedLength)
} }
@Test
fun `create and load blob`() = runBlocking {
val data = getRandomByteArray(Random.nextInt(8 * 1024 * 1024))
val chunk = Chunk(0L, data.size, data, "doesn't matter here")
val outputStream = ByteArrayOutputStream()
val paddingNum = slot<Int>()
every { crypto.getAdForVersion() } returns ad
every { crypto.newEncryptingStream(capture(passThroughOutputStream), ad) } answers {
passThroughOutputStream.captured // not really encrypting here
}
every { crypto.getRandomBytes(capture(paddingNum)) } answers {
getRandomByteArray(paddingNum.captured)
}
every { crypto.repoId } returns repoId
every { backendManager.backend } returns backend
// create blob
coEvery { backend.save(capture(blobHandle)) } returns outputStream
val blob = blobCreator.createNewBlob(chunk)
// check that file content hash matches snapshot hash
val messageDigest = MessageDigest.getInstance("SHA-256")
val hash = messageDigest.digest(outputStream.toByteArray()).toHexString()
assertEquals(hash, blobHandle.captured.name)
// check blob metadata
assertEquals(hash, blob.id.hexFromProto())
assertEquals(outputStream.size(), blob.length)
assertEquals(data.size, blob.uncompressedLength)
// prepare blob loading
val blobHandle = AppBackupFileType.Blob(repoId, hash)
coEvery {
backend.load(blobHandle)
} returns ByteArrayInputStream(outputStream.toByteArray())
every {
crypto.sha256(outputStream.toByteArray())
} returns messageDigest.digest(outputStream.toByteArray()) // same hash came out
every {
crypto.newDecryptingStream(capture(passThroughInputStream), ad)
} answers {
passThroughInputStream.captured // not really decrypting here
}
// load blob
val loader = Loader(crypto, backendManager) // need a real loader
loader.loadFile(blobHandle).use { inputStream ->
// data came back out
assertArrayEquals(data, inputStream.readAllBytes())
}
}
} }

View file

@ -0,0 +1,32 @@
/*
* SPDX-FileCopyrightText: 2024 The Calyx Institute
* SPDX-License-Identifier: Apache-2.0
*/
package com.stevesoltys.seedvault.transport.backup
import com.stevesoltys.seedvault.transport.backup.Padding.getPadTo
import kotlinx.coroutines.runBlocking
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
class PaddingTest {
@Test
fun test() = runBlocking {
assertEquals(52, getPadTo(49))
assertEquals(52, getPadTo(50))
assertEquals(60, getPadTo(60))
assertEquals(4096, getPadTo(4000))
assertEquals(8192, getPadTo(8000))
assertEquals(12288, getPadTo(12000))
assertEquals(12288, getPadTo(12000))
assertEquals(61440, getPadTo(60000))
assertEquals(12288, getPadTo(12000))
assertEquals(638976, getPadTo(634000))
assertEquals(1277952, getPadTo(1250000))
assertEquals(8388608, getPadTo(8260000))
assertEquals(8388608, getPadTo(8380000))
assertEquals(8388608, getPadTo(8388608))
assertEquals(8650752, getPadTo(8388609))
}
}

View file

@ -105,15 +105,23 @@ followed by an encrypted and authenticated payload (see also [Cryptography](#cry
The version (currently `0x02`) is used to be able to modify aspects of the design in the future The version (currently `0x02`) is used to be able to modify aspects of the design in the future
and to provide backwards compatibility. and to provide backwards compatibility.
Blob payloads include the raw bytes of the compressed chunks The first four bytes of the decrypted payload encode the compressed plaintext size
and snapshot payloads their compressed protobuf encoding. as a signed 32-bit integer.
So the maximum chunk size is 2147483647 bytes.
This size specifies where the compressed plaintext ends and the (to be discarded) padding starts.
Blob payloads include the raw bytes of the compressed chunks and always get padded.
Snapshot payloads include their compressed protobuf encoding and do not get padded.
Compression is using the [zstd](http://www.zstd.net/) algorithm in its default configuration. Compression is using the [zstd](http://www.zstd.net/) algorithm in its default configuration.
```console ```console
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Version ┃ encrypted tink payload ┃ ┃ ┃ encrypted tink payload (with 40 bytes header) ┃
┃ 0x02 ┃ (with 40 bytes header) ┃ ┃ version ┃ ┏━ plaintext ━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┓ ┃
┗━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━┛ ┃ 1 byte ┃ ┃ size uint32 ┃ compressed ┃ padding ┃ ┃
┃ (0x02) ┃ ┃ 4 bytes ┃ plaintext ┃ (optional) ┃ ┃
┃ ┃ ┗━━━━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━━┛ ┃
┗━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
``` ```
The structure of the encrypted tink payload is explored further The structure of the encrypted tink payload is explored further
@ -301,7 +309,8 @@ All types of files written to the repository have the following format:
```console ```console
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ version ┃ tink payload (with 40 bytes header) ┃ ┃ ┃ tink payload (with 40 bytes header) ┃
┃ version ┃ ciphertext structure: ┃
┃ byte ┃ ┏━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ ┃ ┃ byte ┃ ┏━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ ┃
┃ ┃ ┃ header length ┃ salt ┃ nonce prefix ┃ encrypted segments ┃ ┃ ┃ ┃ ┃ header length ┃ salt ┃ nonce prefix ┃ encrypted segments ┃ ┃
┃ (0x02) ┃ ┗━━━━━━━━━━━━━━━┻━━━━━━┻━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ ┃ ┃ (0x02) ┃ ┗━━━━━━━━━━━━━━━┻━━━━━━┻━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ ┃
@ -335,10 +344,11 @@ they know how we chunk larger files, but they should be unable to retrieve our m
Since a random gear table computed like this may not be sufficient for attackers Since a random gear table computed like this may not be sufficient for attackers
able to control (part of the) plaintext, e.g. sending a file in a messaging app, able to control (part of the) plaintext, e.g. sending a file in a messaging app,
and due to the presence of lots of data consisting of only a single chunk, and due to the presence of lots of data consisting of only a single chunk,
we apply additional random padding to all chunks. we apply padding according to the [Padmé algorithm](https://lbarman.ch/blog/padme/)
([PETS 2019 paper [PDF]](https://www.petsymposium.org/2019/files/papers/issue4/popets-2019-0056.pdf))
to all chunks.
The plaintext gets padded with random bytes after compression and before encryption. The plaintext gets padded with random bytes after compression and before encryption.
We could also pad with 0 bytes, but for defense in depth random bytes are used instead.
**TODO** determine the details of the padding.
## Operations ## Operations