Record known bad blobs on do-not-use list in BlobCache

This commit is contained in:
Torsten Grote 2024-11-04 09:51:42 -03:00
parent 6d9c18bd29
commit 69765c7857
No known key found for this signature in database
GPG key ID: 3E5F77D92CF891FF
6 changed files with 187 additions and 13 deletions

View file

@ -7,7 +7,10 @@ package com.stevesoltys.seedvault.repo
import android.content.Context
import android.content.Context.MODE_APPEND
import android.content.Context.MODE_PRIVATE
import androidx.annotation.VisibleForTesting
import androidx.annotation.WorkerThread
import com.google.protobuf.ByteString
import com.stevesoltys.seedvault.MemoryLogger
import com.stevesoltys.seedvault.proto.Snapshot
import com.stevesoltys.seedvault.proto.Snapshot.Blob
@ -18,7 +21,18 @@ import org.calyxos.seedvault.core.toHexString
import java.io.FileNotFoundException
import java.io.IOException
private const val CACHE_FILE_NAME = "blobsCache"
@VisibleForTesting
internal const val CACHE_FILE_NAME = "blobsCache"
/**
* The filename of the file where we store which blobs are known to be corrupt
* and should not be used anymore.
* Each [BLOB_ID_SIZE] bytes are appended without separator or line breaks.
*/
@VisibleForTesting
internal const val DO_NOT_USE_FILE_NAME = "doNotUseBlobs"
private const val BLOB_ID_SIZE = 32
/**
* Responsible for caching blobs during a backup run,
@ -73,6 +87,10 @@ class BlobCache(
/**
* Should get called for all new blobs as soon as they've been saved to the backend.
*
* We shouldn't need to worry about [Pruner] removing blobs that get cached here locally,
* because we do run [Pruner.removeOldSnapshotsAndPruneUnusedBlobs] only after
* a successful backup which is when we also clear cache in [clearLocalCache].
*/
fun saveNewBlob(chunkId: String, blob: Blob) {
val previous = blobMap.put(chunkId, blob)
@ -160,6 +178,10 @@ class BlobCache(
if (sizeOnBackend == blob.length) {
// only add blob to our mapping, if it still exists
blobMap.putIfAbsent(chunkId, blob)?.let { previous ->
// If there's more than one blob for the same chunk ID, it shouldn't matter
// which one we keep on using provided both are still ok.
// When we are here, the blob exists on storage and has the same size.
// There may still be other corruption such as bit flips in one of the blobs.
if (previous.id != blob.id) log.warn {
"Chunk ID ${chunkId.substring(0..5)} had more than one blob."
}
@ -174,4 +196,72 @@ class BlobCache(
}
}
/**
* This is expected to get called by the [Checker] when it finds a blob
* that has the expected file size, but its content hash doesn't match what we expect.
*
* It appends the given [blobId] to our [DO_NOT_USE_FILE_NAME] file.
*/
fun doNotUseBlob(blobId: ByteString) {
try {
context.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_APPEND).use { outputStream ->
val bytes = blobId.toByteArray()
check(bytes.size == 32) { "Blob ID $blobId has unexpected size of ${bytes.size}" }
outputStream.write(bytes)
}
} catch (e: Exception) {
log.error(e) { "Error adding blob to do-not-use list, may be corrupted: " }
}
}
@VisibleForTesting
fun getDoNotUseBlobIds(): Set<String> {
val blobsIds = mutableSetOf<String>()
try {
context.openFileInput(DO_NOT_USE_FILE_NAME).use { inputStream ->
val bytes = ByteArray(BLOB_ID_SIZE)
while (inputStream.read(bytes) == 32) {
val blobId = bytes.toHexString()
blobsIds.add(blobId)
}
}
} catch (e: FileNotFoundException) {
log.info { "No do-not-use list found" }
} catch (e: Exception) {
log.error(e) { "Our internal do-not-use list is corrupted, deleting it..." }
context.deleteFile(DO_NOT_USE_FILE_NAME)
}
return blobsIds
}
/**
* Call this after deleting blobs from the backend,
* so we can remove those from our do-not-use list.
*/
fun onBlobsRemoved(blobIds: Set<String>) {
log.info { "${blobIds.size} blobs were removed." }
val blobsIdsToKeep = mutableSetOf<String>()
try {
context.openFileInput(DO_NOT_USE_FILE_NAME).use { inputStream ->
val bytes = ByteArray(BLOB_ID_SIZE)
while (inputStream.read(bytes) == 32) {
val blobId = bytes.toHexString()
if (blobId !in blobIds) blobsIdsToKeep.add(blobId)
}
}
} catch (e: FileNotFoundException) {
log.info { "No do-not-use list found, no need to remove blobs from it." }
return
} // if something else goes wrong here, we'll delete the file before next backup
context.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_PRIVATE).use { outputStream ->
blobsIdsToKeep.forEach { blobId ->
val bytes = blobId.toByteArrayFromHex()
outputStream.write(bytes)
}
}
log.info { "${blobsIdsToKeep.size} blobs remain on do-not-use list." }
}
}

View file

@ -7,6 +7,7 @@ package com.stevesoltys.seedvault.repo
import androidx.annotation.WorkerThread
import com.google.protobuf.ByteString
import com.stevesoltys.seedvault.MemoryLogger
import com.stevesoltys.seedvault.backend.BackendManager
import com.stevesoltys.seedvault.crypto.Crypto
import com.stevesoltys.seedvault.proto.Snapshot
@ -35,6 +36,7 @@ internal class Checker(
private val backendManager: BackendManager,
private val snapshotManager: SnapshotManager,
private val loader: Loader,
private val blobCache: BlobCache,
private val nm: BackupNotificationManager,
) {
private val log = KotlinLogging.logger { }
@ -116,6 +118,10 @@ internal class Checker(
semaphore.withPermit {
try {
checkBlob(chunkId, blob)
} catch (e: HashMismatchException) {
log.error(e) { "Error loading chunk $chunkId: " }
badChunks.add(ChunkIdBlobPair(chunkId, blob))
blobCache.doNotUseBlob(blob.id)
} catch (e: Exception) {
log.error(e) { "Error loading chunk $chunkId: " }
// TODO we could try differentiating transient backend issues
@ -132,6 +138,7 @@ internal class Checker(
val thousandth = ((newSize.toDouble() / sampleSize) * 1000).roundToInt()
log.debug { "$thousandth‰ - $bandwidth KB/sec - $newSize bytes" }
nm.showCheckNotification(bandwidth, thousandth)
MemoryLogger.log()
}
}
}

View file

@ -83,13 +83,13 @@ internal class Loader(
// check SHA-256 hash first thing
val sha256 = crypto.sha256(cipherText).toHexString()
if (sha256 != expectedHash) {
throw GeneralSecurityException("File had wrong SHA-256 hash: $expectedHash")
throw HashMismatchException("File had wrong SHA-256 hash: $expectedHash")
}
// check that we can handle the version of that snapshot
val version = cipherText[0]
if (version <= 1) throw GeneralSecurityException("Unexpected version: $version")
if (version > VERSION) throw UnsupportedVersionException(version)
// cache ciperText in cacheFile, if existing
// cache cipherText in cacheFile, if existing
try {
cacheFile?.outputStream()?.use { outputStream ->
outputStream.write(cipherText)
@ -109,3 +109,5 @@ internal class Loader(
}
}
internal class HashMismatchException(msg: String? = null) : GeneralSecurityException(msg)

View file

@ -20,6 +20,6 @@ val repoModule = module {
SnapshotManager(snapshotFolder, get(), get(), get())
}
factory { SnapshotCreatorFactory(androidContext(), get(), get(), get()) }
factory { Pruner(get(), get(), get()) }
single { Checker(get(), get(), get(), get(), get()) }
factory { Pruner(get(), get(), get(), get()) }
single { Checker(get(), get(), get(), get(), get(), get()) }
}

View file

@ -6,15 +6,21 @@
package com.stevesoltys.seedvault.repo
import android.content.Context
import android.content.Context.MODE_APPEND
import android.content.Context.MODE_PRIVATE
import com.stevesoltys.seedvault.transport.TransportTest
import io.mockk.every
import io.mockk.mockk
import io.mockk.verify
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.io.TempDir
import java.io.File
import java.io.FileInputStream
import java.io.FileNotFoundException
import java.io.FileOutputStream
import java.io.IOException
import java.nio.file.Path
internal class BlobCacheTest : TransportTest() {
@ -34,7 +40,7 @@ internal class BlobCacheTest : TransportTest() {
assertNull(cache[chunkId2])
// read saved blobs from cache
every { strictContext.openFileInput(any()) } returns file.inputStream()
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
cache.populateCache(listOf(fileInfo1, fileInfo2), emptyList())
// now both blobs are in the map
@ -55,7 +61,7 @@ internal class BlobCacheTest : TransportTest() {
BlobCache(strictContext).let { cache ->
// read saved blobs from cache
every { strictContext.openFileInput(any()) } returns file.inputStream()
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
cache.populateCache(listOf(fileInfo2), emptyList()) // fileInfo1 is missing
// now only blob2 gets used, because blob1 wasn't on backend
@ -73,7 +79,7 @@ internal class BlobCacheTest : TransportTest() {
BlobCache(strictContext).let { cache ->
// read saved blobs from cache
every { strictContext.openFileInput(any()) } returns file.inputStream()
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
cache.populateCache(listOf(info, fileInfo2), emptyList()) // info has different size now
// now only blob2 gets used, because blob1 wasn't on backend
@ -129,8 +135,76 @@ internal class BlobCacheTest : TransportTest() {
blobCache.clearLocalCache()
}
@Test
fun `get non-existent do not use list`(@TempDir tmpDir: Path) {
val cache = BlobCache(strictContext)
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } throws FileNotFoundException()
val blobIds = cache.getDoNotUseBlobIds()
assertEquals(emptySet<String>(), blobIds)
}
@Test
fun `onBlobsRemoved with non-existent do not use list`(@TempDir tmpDir: Path) {
val cache = BlobCache(strictContext)
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } throws FileNotFoundException()
cache.onBlobsRemoved(setOf("foo", "bar"))
}
@Test
fun `doNotUseBlob persists blobs which get removed later`(@TempDir tmpDir: Path) {
val file = File(tmpDir.toString(), "tmpCache")
val cache = BlobCache(strictContext)
// add blobs to list
every { strictContext.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_APPEND) } answers {
FileOutputStream(file, true)
}
cache.doNotUseBlob(blob1.id)
cache.doNotUseBlob(blob2.id)
// get blobs from list
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } answers {
FileInputStream(file)
}
val blobIds = cache.getDoNotUseBlobIds()
assertEquals(setOf(blob1.id.hexFromProto(), blob2.id.hexFromProto()), blobIds)
// remove first blob from list
every { strictContext.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_PRIVATE) } answers {
FileOutputStream(file, false)
}
cache.onBlobsRemoved(setOf(blob1.id.hexFromProto(), "foo", "bar"))
// getting blobs from list now only returns second blob
assertEquals(setOf(blob2.id.hexFromProto()), cache.getDoNotUseBlobIds())
// remove different blobs leaves empty list
cache.onBlobsRemoved(setOf(blob2.id.hexFromProto(), "foo", "bar"))
assertEquals(emptySet<String>(), cache.getDoNotUseBlobIds())
// empty list can be added to still
cache.doNotUseBlob(blob1.id)
assertEquals(setOf(blob1.id.hexFromProto()), cache.getDoNotUseBlobIds())
}
@Test
fun `corrupted do-not-use list gets deleted when getting blobs`(@TempDir tmpDir: Path) {
val cache = BlobCache(strictContext)
// get blobs from list deletes broken file, so we can continue using it
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } throws IOException()
every { strictContext.deleteFile(DO_NOT_USE_FILE_NAME) } returns true
cache.getDoNotUseBlobIds()
verify { strictContext.deleteFile(DO_NOT_USE_FILE_NAME) }
}
private fun BlobCache.saveTwoBlobsToCache(file: File) {
every { strictContext.openFileOutput(any(), any()) } answers {
every { strictContext.openFileOutput(CACHE_FILE_NAME, MODE_APPEND) } answers {
FileOutputStream(file, true)
}

View file

@ -32,7 +32,6 @@ import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Test
import java.io.ByteArrayInputStream
import java.io.IOException
import java.security.GeneralSecurityException
import java.security.MessageDigest
import kotlin.random.Random
@ -41,10 +40,11 @@ internal class CheckerTest : TransportTest() {
private val backendManager: BackendManager = mockk()
private val snapshotManager: SnapshotManager = mockk()
private val loader: Loader = mockk()
private val blobCache: BlobCache = mockk()
private val nm: BackupNotificationManager = mockk()
private val backend: Backend = mockk()
private val checker = Checker(crypto, backendManager, snapshotManager, loader, nm)
private val checker = Checker(crypto, backendManager, snapshotManager, loader, blobCache, nm)
private val folder = TopLevelFolder(repoId)
private val snapshotHandle1 =
@ -173,7 +173,7 @@ internal class CheckerTest : TransportTest() {
}
@Test
fun `check raises error for loader failure`() = runBlocking {
fun `check records hash error from loader`() = runBlocking {
// chunkId is "real"
val data1 = getRandomByteArray()
val chunkId1 = MessageDigest.getInstance("SHA-256").digest(data1).toHexString()
@ -208,8 +208,9 @@ internal class CheckerTest : TransportTest() {
every { backendManager.requiresNetwork } returns Random.nextBoolean()
coEvery { loader.loadFile(blobHandle1, null) } returns ByteArrayInputStream(data1)
coEvery { loader.loadFile(blobHandle2, null) } throws GeneralSecurityException()
coEvery { loader.loadFile(blobHandle2, null) } throws HashMismatchException()
every { blobCache.doNotUseBlob(ByteString.fromHex(blobHandle2.name)) } just Runs
every { nm.onCheckFinishedWithError(any(), any()) } just Runs
assertNull(checker.checkerResult)