Add padding to all blobs using the Padmé algorithm

This commit is contained in:
Torsten Grote 2024-09-16 11:24:05 -03:00
parent 52f528dbf0
commit f188230269
No known key found for this signature in database
GPG key ID: 3E5F77D92CF891FF
7 changed files with 229 additions and 18 deletions

View file

@ -17,6 +17,7 @@ import org.calyxos.seedvault.core.toHexString
import java.io.ByteArrayOutputStream
import java.io.File
import java.io.IOException
import java.nio.ByteBuffer
/**
* Manages interactions with snapshots, such as loading, saving and removing them.
@ -66,14 +67,27 @@ internal class SnapshotManager(
*/
@Throws(IOException::class)
suspend fun saveSnapshot(snapshot: Snapshot) {
// compress payload and get size
val payloadStream = ByteArrayOutputStream()
ZstdOutputStream(payloadStream).use { zstdOutputStream ->
snapshot.writeTo(zstdOutputStream)
}
val payloadSize = payloadStream.size()
val payloadSizeBytes = ByteBuffer.allocate(4).putInt(payloadSize).array()
// encrypt compressed payload and assemble entire blob
val byteStream = ByteArrayOutputStream()
byteStream.write(VERSION.toInt())
crypto.newEncryptingStream(byteStream, crypto.getAdForVersion()).use { cryptoStream ->
ZstdOutputStream(cryptoStream).use { zstdOutputStream ->
snapshot.writeTo(zstdOutputStream)
}
cryptoStream.write(payloadSizeBytes)
cryptoStream.write(payloadStream.toByteArray())
// not adding any padding here, because it doesn't matter for snapshots
}
payloadStream.reset()
val bytes = byteStream.toByteArray()
byteStream.reset()
// compute hash and save blob
val sha256 = crypto.sha256(bytes).toHexString()
val snapshotHandle = AppBackupFileType.Snapshot(crypto.repoId, sha256)
backendManager.backend.save(snapshotHandle).use { outputStream ->

View file

@ -13,12 +13,14 @@ import com.stevesoltys.seedvault.crypto.Crypto
import com.stevesoltys.seedvault.header.VERSION
import com.stevesoltys.seedvault.proto.Snapshot.Blob
import com.stevesoltys.seedvault.proto.SnapshotKt.blob
import com.stevesoltys.seedvault.transport.backup.Padding.getPadTo
import okio.Buffer
import okio.buffer
import okio.sink
import org.calyxos.seedvault.chunker.Chunk
import org.calyxos.seedvault.core.backends.AppBackupFileType
import java.io.IOException
import java.nio.ByteBuffer
/**
* Creates and uploads new blobs to the current backend.
@ -28,6 +30,7 @@ internal class BlobCreator(
private val backendManager: BackendManager,
) {
private val payloadBuffer = Buffer()
private val buffer = Buffer()
/**
@ -36,14 +39,31 @@ internal class BlobCreator(
@WorkerThread
@Throws(IOException::class)
suspend fun createNewBlob(chunk: Chunk): Blob {
// ensure buffers are cleared
payloadBuffer.clear()
buffer.clear()
// compress payload and get size
ZstdOutputStream(payloadBuffer.outputStream()).use { zstdOutputStream ->
zstdOutputStream.write(chunk.data)
}
val payloadSize = payloadBuffer.size.toInt()
val payloadSizeBytes = ByteBuffer.allocate(4).putInt(payloadSize).array()
val paddingSize = getPadTo(payloadSize) - payloadSize
// encrypt compressed payload and assemble entire blob
val bufferStream = buffer.outputStream()
bufferStream.write(VERSION.toInt())
crypto.newEncryptingStream(bufferStream, crypto.getAdForVersion()).use { cryptoStream ->
ZstdOutputStream(cryptoStream).use { zstdOutputStream ->
zstdOutputStream.write(chunk.data)
}
cryptoStream.write(payloadSizeBytes)
payloadBuffer.writeTo(cryptoStream)
// add padding
// we could just write 0s, but because of defense in depth, we use random bytes
cryptoStream.write(crypto.getRandomBytes(paddingSize))
}
payloadBuffer.clear()
// compute hash and save blob
val sha256ByteString = buffer.sha256()
val handle = AppBackupFileType.Blob(crypto.repoId, sha256ByteString.hex())
// TODO for later: implement a backend wrapper that handles retries for transient errors
@ -54,6 +74,7 @@ internal class BlobCreator(
outputBuffer.flush()
length
}
buffer.clear()
return blob {
id = ByteString.copyFrom(sha256ByteString.asByteBuffer())
length = size.toInt()

View file

@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: 2024 The Calyx Institute
* SPDX-License-Identifier: Apache-2.0
*/
package com.stevesoltys.seedvault.transport.backup
import kotlin.math.floor
import kotlin.math.log2
import kotlin.math.pow
object Padding {
/**
* Pads the given [size] using the [Padmé algorithm](https://lbarman.ch/blog/padme/).
*
* @param size unpadded object length
* @return the padded object length
*/
fun getPadTo(size: Int): Int {
val e = floor(log2(size.toFloat()))
val s = floor(log2(e)) + 1
val lastBits = e - s
val bitMask = (2.toFloat().pow(lastBits) - 1).toInt()
return (size + bitMask) and bitMask.inv()
}
}

View file

@ -17,8 +17,11 @@ import org.calyxos.seedvault.core.backends.AppBackupFileType
import org.calyxos.seedvault.core.toHexString
import java.io.ByteArrayInputStream
import java.io.File
import java.io.FilterInputStream
import java.io.IOException
import java.io.InputStream
import java.io.SequenceInputStream
import java.nio.ByteBuffer
import java.security.GeneralSecurityException
import java.util.Enumeration
@ -97,8 +100,49 @@ internal class Loader(
val ad = crypto.getAdForVersion(version)
// skip first version byte when creating cipherText stream
val byteStream = ByteArrayInputStream(cipherText, 1, cipherText.size - 1)
// decrypt and decompress cipherText stream and parse snapshot
return ZstdInputStream(crypto.newDecryptingStream(byteStream, ad))
// decrypt, de-pad and decompress cipherText stream
val decryptingStream = crypto.newDecryptingStream(byteStream, ad)
val paddedStream = PaddedInputStream(decryptingStream)
return ZstdInputStream(paddedStream)
}
}
private class PaddedInputStream(inputStream: InputStream) : FilterInputStream(inputStream) {
val size: Int
var bytesRead: Int = 0
init {
val sizeBytes = ByteArray(4)
val bytesRead = inputStream.read(sizeBytes)
if (bytesRead != 4) {
throw IOException("Could not read padding size: ${sizeBytes.toHexString()}")
}
size = ByteBuffer.wrap(sizeBytes).getInt()
}
override fun read(): Int {
if (bytesRead >= size) return -1
return getReadBytes(super.read())
}
override fun read(b: ByteArray, off: Int, len: Int): Int {
if (bytesRead >= size) return -1
if (bytesRead + len >= size) {
return getReadBytes(super.read(b, off, size - bytesRead))
}
return getReadBytes(super.read(b, off, len))
}
override fun available(): Int {
return size - bytesRead
}
private fun getReadBytes(read: Int): Int {
if (read == -1) return -1
bytesRead += read
if (bytesRead > size) return -1
return read
}
}

View file

@ -6,7 +6,9 @@
package com.stevesoltys.seedvault.transport.backup
import com.stevesoltys.seedvault.backend.BackendManager
import com.stevesoltys.seedvault.getRandomByteArray
import com.stevesoltys.seedvault.transport.TransportTest
import com.stevesoltys.seedvault.transport.restore.Loader
import io.mockk.coEvery
import io.mockk.every
import io.mockk.mockk
@ -16,10 +18,13 @@ import org.calyxos.seedvault.chunker.Chunk
import org.calyxos.seedvault.core.backends.AppBackupFileType
import org.calyxos.seedvault.core.backends.Backend
import org.calyxos.seedvault.core.toHexString
import org.junit.jupiter.api.Assertions.assertArrayEquals
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNotEquals
import org.junit.jupiter.api.Test
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.InputStream
import java.io.OutputStream
import java.security.MessageDigest
import kotlin.random.Random
@ -32,6 +37,7 @@ internal class BlobCreatorTest : TransportTest() {
private val ad = Random.nextBytes(1)
private val passThroughOutputStream = slot<OutputStream>()
private val passThroughInputStream = slot<InputStream>()
private val blobHandle = slot<AppBackupFileType.Blob>()
@Test
@ -42,11 +48,15 @@ internal class BlobCreatorTest : TransportTest() {
val chunk2 = Chunk(0L, data2.size, data2, "doesn't matter here")
val outputStream1 = ByteArrayOutputStream()
val outputStream2 = ByteArrayOutputStream()
val paddingNum = slot<Int>()
every { crypto.getAdForVersion() } returns ad
every { crypto.newEncryptingStream(capture(passThroughOutputStream), ad) } answers {
passThroughOutputStream.captured // not really encrypting here
}
every { crypto.getRandomBytes(capture(paddingNum)) } answers {
getRandomByteArray(paddingNum.captured)
}
every { crypto.repoId } returns repoId
every { backendManager.backend } returns backend
@ -79,4 +89,56 @@ internal class BlobCreatorTest : TransportTest() {
assertEquals(outputStream2.size(), blob2.length)
assertEquals(data2.size, blob2.uncompressedLength)
}
@Test
fun `create and load blob`() = runBlocking {
val data = getRandomByteArray(Random.nextInt(8 * 1024 * 1024))
val chunk = Chunk(0L, data.size, data, "doesn't matter here")
val outputStream = ByteArrayOutputStream()
val paddingNum = slot<Int>()
every { crypto.getAdForVersion() } returns ad
every { crypto.newEncryptingStream(capture(passThroughOutputStream), ad) } answers {
passThroughOutputStream.captured // not really encrypting here
}
every { crypto.getRandomBytes(capture(paddingNum)) } answers {
getRandomByteArray(paddingNum.captured)
}
every { crypto.repoId } returns repoId
every { backendManager.backend } returns backend
// create blob
coEvery { backend.save(capture(blobHandle)) } returns outputStream
val blob = blobCreator.createNewBlob(chunk)
// check that file content hash matches snapshot hash
val messageDigest = MessageDigest.getInstance("SHA-256")
val hash = messageDigest.digest(outputStream.toByteArray()).toHexString()
assertEquals(hash, blobHandle.captured.name)
// check blob metadata
assertEquals(hash, blob.id.hexFromProto())
assertEquals(outputStream.size(), blob.length)
assertEquals(data.size, blob.uncompressedLength)
// prepare blob loading
val blobHandle = AppBackupFileType.Blob(repoId, hash)
coEvery {
backend.load(blobHandle)
} returns ByteArrayInputStream(outputStream.toByteArray())
every {
crypto.sha256(outputStream.toByteArray())
} returns messageDigest.digest(outputStream.toByteArray()) // same hash came out
every {
crypto.newDecryptingStream(capture(passThroughInputStream), ad)
} answers {
passThroughInputStream.captured // not really decrypting here
}
// load blob
val loader = Loader(crypto, backendManager) // need a real loader
loader.loadFile(blobHandle).use { inputStream ->
// data came back out
assertArrayEquals(data, inputStream.readAllBytes())
}
}
}

View file

@ -0,0 +1,32 @@
/*
* SPDX-FileCopyrightText: 2024 The Calyx Institute
* SPDX-License-Identifier: Apache-2.0
*/
package com.stevesoltys.seedvault.transport.backup
import com.stevesoltys.seedvault.transport.backup.Padding.getPadTo
import kotlinx.coroutines.runBlocking
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
class PaddingTest {
@Test
fun test() = runBlocking {
assertEquals(52, getPadTo(49))
assertEquals(52, getPadTo(50))
assertEquals(60, getPadTo(60))
assertEquals(4096, getPadTo(4000))
assertEquals(8192, getPadTo(8000))
assertEquals(12288, getPadTo(12000))
assertEquals(12288, getPadTo(12000))
assertEquals(61440, getPadTo(60000))
assertEquals(12288, getPadTo(12000))
assertEquals(638976, getPadTo(634000))
assertEquals(1277952, getPadTo(1250000))
assertEquals(8388608, getPadTo(8260000))
assertEquals(8388608, getPadTo(8380000))
assertEquals(8388608, getPadTo(8388608))
assertEquals(8650752, getPadTo(8388609))
}
}

View file

@ -105,15 +105,23 @@ followed by an encrypted and authenticated payload (see also [Cryptography](#cry
The version (currently `0x02`) is used to be able to modify aspects of the design in the future
and to provide backwards compatibility.
Blob payloads include the raw bytes of the compressed chunks
and snapshot payloads their compressed protobuf encoding.
The first four bytes of the decrypted payload encode the compressed plaintext size
as a signed 32-bit integer.
So the maximum chunk size is 2147483647 bytes.
This size specifies where the compressed plaintext ends and the (to be discarded) padding starts.
Blob payloads include the raw bytes of the compressed chunks and always get padded.
Snapshot payloads include their compressed protobuf encoding and do not get padded.
Compression is using the [zstd](http://www.zstd.net/) algorithm in its default configuration.
```console
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Version ┃ encrypted tink payload ┃
┃ 0x02 ┃ (with 40 bytes header) ┃
┗━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━┛
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ encrypted tink payload (with 40 bytes header) ┃
┃ version ┃ ┏━ plaintext ━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┓ ┃
┃ 1 byte ┃ ┃ size uint32 ┃ compressed ┃ padding ┃ ┃
┃ (0x02) ┃ ┃ 4 bytes ┃ plaintext ┃ (optional) ┃ ┃
┃ ┃ ┗━━━━━━━━━━━━━┻━━━━━━━━━━━━┻━━━━━━━━━━━━┛ ┃
┗━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
```
The structure of the encrypted tink payload is explored further
@ -301,7 +309,8 @@ All types of files written to the repository have the following format:
```console
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ version ┃ tink payload (with 40 bytes header) ┃
┃ ┃ tink payload (with 40 bytes header) ┃
┃ version ┃ ciphertext structure: ┃
┃ byte ┃ ┏━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ ┃
┃ ┃ ┃ header length ┃ salt ┃ nonce prefix ┃ encrypted segments ┃ ┃
┃ (0x02) ┃ ┗━━━━━━━━━━━━━━━┻━━━━━━┻━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ ┃
@ -335,10 +344,11 @@ they know how we chunk larger files, but they should be unable to retrieve our m
Since a random gear table computed like this may not be sufficient for attackers
able to control (part of the) plaintext, e.g. sending a file in a messaging app,
and due to the presence of lots of data consisting of only a single chunk,
we apply additional random padding to all chunks.
we apply padding according to the [Padmé algorithm](https://lbarman.ch/blog/padme/)
([PETS 2019 paper [PDF]](https://www.petsymposium.org/2019/files/papers/issue4/popets-2019-0056.pdf))
to all chunks.
The plaintext gets padded with random bytes after compression and before encryption.
**TODO** determine the details of the padding.
We could also pad with 0 bytes, but for defense in depth random bytes are used instead.
## Operations