Record known bad blobs on do-not-use list in BlobCache
This commit is contained in:
parent
6d9c18bd29
commit
69765c7857
6 changed files with 187 additions and 13 deletions
|
@ -7,7 +7,10 @@ package com.stevesoltys.seedvault.repo
|
|||
|
||||
import android.content.Context
|
||||
import android.content.Context.MODE_APPEND
|
||||
import android.content.Context.MODE_PRIVATE
|
||||
import androidx.annotation.VisibleForTesting
|
||||
import androidx.annotation.WorkerThread
|
||||
import com.google.protobuf.ByteString
|
||||
import com.stevesoltys.seedvault.MemoryLogger
|
||||
import com.stevesoltys.seedvault.proto.Snapshot
|
||||
import com.stevesoltys.seedvault.proto.Snapshot.Blob
|
||||
|
@ -18,7 +21,18 @@ import org.calyxos.seedvault.core.toHexString
|
|||
import java.io.FileNotFoundException
|
||||
import java.io.IOException
|
||||
|
||||
private const val CACHE_FILE_NAME = "blobsCache"
|
||||
@VisibleForTesting
|
||||
internal const val CACHE_FILE_NAME = "blobsCache"
|
||||
|
||||
/**
|
||||
* The filename of the file where we store which blobs are known to be corrupt
|
||||
* and should not be used anymore.
|
||||
* Each [BLOB_ID_SIZE] bytes are appended without separator or line breaks.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
internal const val DO_NOT_USE_FILE_NAME = "doNotUseBlobs"
|
||||
|
||||
private const val BLOB_ID_SIZE = 32
|
||||
|
||||
/**
|
||||
* Responsible for caching blobs during a backup run,
|
||||
|
@ -73,6 +87,10 @@ class BlobCache(
|
|||
|
||||
/**
|
||||
* Should get called for all new blobs as soon as they've been saved to the backend.
|
||||
*
|
||||
* We shouldn't need to worry about [Pruner] removing blobs that get cached here locally,
|
||||
* because we do run [Pruner.removeOldSnapshotsAndPruneUnusedBlobs] only after
|
||||
* a successful backup which is when we also clear cache in [clearLocalCache].
|
||||
*/
|
||||
fun saveNewBlob(chunkId: String, blob: Blob) {
|
||||
val previous = blobMap.put(chunkId, blob)
|
||||
|
@ -160,6 +178,10 @@ class BlobCache(
|
|||
if (sizeOnBackend == blob.length) {
|
||||
// only add blob to our mapping, if it still exists
|
||||
blobMap.putIfAbsent(chunkId, blob)?.let { previous ->
|
||||
// If there's more than one blob for the same chunk ID, it shouldn't matter
|
||||
// which one we keep on using provided both are still ok.
|
||||
// When we are here, the blob exists on storage and has the same size.
|
||||
// There may still be other corruption such as bit flips in one of the blobs.
|
||||
if (previous.id != blob.id) log.warn {
|
||||
"Chunk ID ${chunkId.substring(0..5)} had more than one blob."
|
||||
}
|
||||
|
@ -174,4 +196,72 @@ class BlobCache(
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is expected to get called by the [Checker] when it finds a blob
|
||||
* that has the expected file size, but its content hash doesn't match what we expect.
|
||||
*
|
||||
* It appends the given [blobId] to our [DO_NOT_USE_FILE_NAME] file.
|
||||
*/
|
||||
fun doNotUseBlob(blobId: ByteString) {
|
||||
try {
|
||||
context.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_APPEND).use { outputStream ->
|
||||
val bytes = blobId.toByteArray()
|
||||
check(bytes.size == 32) { "Blob ID $blobId has unexpected size of ${bytes.size}" }
|
||||
outputStream.write(bytes)
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
log.error(e) { "Error adding blob to do-not-use list, may be corrupted: " }
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
fun getDoNotUseBlobIds(): Set<String> {
|
||||
val blobsIds = mutableSetOf<String>()
|
||||
try {
|
||||
context.openFileInput(DO_NOT_USE_FILE_NAME).use { inputStream ->
|
||||
val bytes = ByteArray(BLOB_ID_SIZE)
|
||||
while (inputStream.read(bytes) == 32) {
|
||||
val blobId = bytes.toHexString()
|
||||
blobsIds.add(blobId)
|
||||
}
|
||||
}
|
||||
} catch (e: FileNotFoundException) {
|
||||
log.info { "No do-not-use list found" }
|
||||
} catch (e: Exception) {
|
||||
log.error(e) { "Our internal do-not-use list is corrupted, deleting it..." }
|
||||
context.deleteFile(DO_NOT_USE_FILE_NAME)
|
||||
}
|
||||
return blobsIds
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this after deleting blobs from the backend,
|
||||
* so we can remove those from our do-not-use list.
|
||||
*/
|
||||
fun onBlobsRemoved(blobIds: Set<String>) {
|
||||
log.info { "${blobIds.size} blobs were removed." }
|
||||
|
||||
val blobsIdsToKeep = mutableSetOf<String>()
|
||||
|
||||
try {
|
||||
context.openFileInput(DO_NOT_USE_FILE_NAME).use { inputStream ->
|
||||
val bytes = ByteArray(BLOB_ID_SIZE)
|
||||
while (inputStream.read(bytes) == 32) {
|
||||
val blobId = bytes.toHexString()
|
||||
if (blobId !in blobIds) blobsIdsToKeep.add(blobId)
|
||||
}
|
||||
}
|
||||
} catch (e: FileNotFoundException) {
|
||||
log.info { "No do-not-use list found, no need to remove blobs from it." }
|
||||
return
|
||||
} // if something else goes wrong here, we'll delete the file before next backup
|
||||
context.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_PRIVATE).use { outputStream ->
|
||||
blobsIdsToKeep.forEach { blobId ->
|
||||
val bytes = blobId.toByteArrayFromHex()
|
||||
outputStream.write(bytes)
|
||||
}
|
||||
}
|
||||
log.info { "${blobsIdsToKeep.size} blobs remain on do-not-use list." }
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ package com.stevesoltys.seedvault.repo
|
|||
|
||||
import androidx.annotation.WorkerThread
|
||||
import com.google.protobuf.ByteString
|
||||
import com.stevesoltys.seedvault.MemoryLogger
|
||||
import com.stevesoltys.seedvault.backend.BackendManager
|
||||
import com.stevesoltys.seedvault.crypto.Crypto
|
||||
import com.stevesoltys.seedvault.proto.Snapshot
|
||||
|
@ -35,6 +36,7 @@ internal class Checker(
|
|||
private val backendManager: BackendManager,
|
||||
private val snapshotManager: SnapshotManager,
|
||||
private val loader: Loader,
|
||||
private val blobCache: BlobCache,
|
||||
private val nm: BackupNotificationManager,
|
||||
) {
|
||||
private val log = KotlinLogging.logger { }
|
||||
|
@ -116,6 +118,10 @@ internal class Checker(
|
|||
semaphore.withPermit {
|
||||
try {
|
||||
checkBlob(chunkId, blob)
|
||||
} catch (e: HashMismatchException) {
|
||||
log.error(e) { "Error loading chunk $chunkId: " }
|
||||
badChunks.add(ChunkIdBlobPair(chunkId, blob))
|
||||
blobCache.doNotUseBlob(blob.id)
|
||||
} catch (e: Exception) {
|
||||
log.error(e) { "Error loading chunk $chunkId: " }
|
||||
// TODO we could try differentiating transient backend issues
|
||||
|
@ -132,6 +138,7 @@ internal class Checker(
|
|||
val thousandth = ((newSize.toDouble() / sampleSize) * 1000).roundToInt()
|
||||
log.debug { "$thousandth‰ - $bandwidth KB/sec - $newSize bytes" }
|
||||
nm.showCheckNotification(bandwidth, thousandth)
|
||||
MemoryLogger.log()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -83,13 +83,13 @@ internal class Loader(
|
|||
// check SHA-256 hash first thing
|
||||
val sha256 = crypto.sha256(cipherText).toHexString()
|
||||
if (sha256 != expectedHash) {
|
||||
throw GeneralSecurityException("File had wrong SHA-256 hash: $expectedHash")
|
||||
throw HashMismatchException("File had wrong SHA-256 hash: $expectedHash")
|
||||
}
|
||||
// check that we can handle the version of that snapshot
|
||||
val version = cipherText[0]
|
||||
if (version <= 1) throw GeneralSecurityException("Unexpected version: $version")
|
||||
if (version > VERSION) throw UnsupportedVersionException(version)
|
||||
// cache ciperText in cacheFile, if existing
|
||||
// cache cipherText in cacheFile, if existing
|
||||
try {
|
||||
cacheFile?.outputStream()?.use { outputStream ->
|
||||
outputStream.write(cipherText)
|
||||
|
@ -109,3 +109,5 @@ internal class Loader(
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
internal class HashMismatchException(msg: String? = null) : GeneralSecurityException(msg)
|
||||
|
|
|
@ -20,6 +20,6 @@ val repoModule = module {
|
|||
SnapshotManager(snapshotFolder, get(), get(), get())
|
||||
}
|
||||
factory { SnapshotCreatorFactory(androidContext(), get(), get(), get()) }
|
||||
factory { Pruner(get(), get(), get()) }
|
||||
single { Checker(get(), get(), get(), get(), get()) }
|
||||
factory { Pruner(get(), get(), get(), get()) }
|
||||
single { Checker(get(), get(), get(), get(), get(), get()) }
|
||||
}
|
||||
|
|
|
@ -6,15 +6,21 @@
|
|||
package com.stevesoltys.seedvault.repo
|
||||
|
||||
import android.content.Context
|
||||
import android.content.Context.MODE_APPEND
|
||||
import android.content.Context.MODE_PRIVATE
|
||||
import com.stevesoltys.seedvault.transport.TransportTest
|
||||
import io.mockk.every
|
||||
import io.mockk.mockk
|
||||
import io.mockk.verify
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Assertions.assertNull
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.io.TempDir
|
||||
import java.io.File
|
||||
import java.io.FileInputStream
|
||||
import java.io.FileNotFoundException
|
||||
import java.io.FileOutputStream
|
||||
import java.io.IOException
|
||||
import java.nio.file.Path
|
||||
|
||||
internal class BlobCacheTest : TransportTest() {
|
||||
|
@ -34,7 +40,7 @@ internal class BlobCacheTest : TransportTest() {
|
|||
assertNull(cache[chunkId2])
|
||||
|
||||
// read saved blobs from cache
|
||||
every { strictContext.openFileInput(any()) } returns file.inputStream()
|
||||
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
|
||||
cache.populateCache(listOf(fileInfo1, fileInfo2), emptyList())
|
||||
|
||||
// now both blobs are in the map
|
||||
|
@ -55,7 +61,7 @@ internal class BlobCacheTest : TransportTest() {
|
|||
|
||||
BlobCache(strictContext).let { cache ->
|
||||
// read saved blobs from cache
|
||||
every { strictContext.openFileInput(any()) } returns file.inputStream()
|
||||
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
|
||||
cache.populateCache(listOf(fileInfo2), emptyList()) // fileInfo1 is missing
|
||||
|
||||
// now only blob2 gets used, because blob1 wasn't on backend
|
||||
|
@ -73,7 +79,7 @@ internal class BlobCacheTest : TransportTest() {
|
|||
|
||||
BlobCache(strictContext).let { cache ->
|
||||
// read saved blobs from cache
|
||||
every { strictContext.openFileInput(any()) } returns file.inputStream()
|
||||
every { strictContext.openFileInput(CACHE_FILE_NAME) } returns file.inputStream()
|
||||
cache.populateCache(listOf(info, fileInfo2), emptyList()) // info has different size now
|
||||
|
||||
// now only blob2 gets used, because blob1 wasn't on backend
|
||||
|
@ -129,8 +135,76 @@ internal class BlobCacheTest : TransportTest() {
|
|||
blobCache.clearLocalCache()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `get non-existent do not use list`(@TempDir tmpDir: Path) {
|
||||
val cache = BlobCache(strictContext)
|
||||
|
||||
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } throws FileNotFoundException()
|
||||
|
||||
val blobIds = cache.getDoNotUseBlobIds()
|
||||
assertEquals(emptySet<String>(), blobIds)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `onBlobsRemoved with non-existent do not use list`(@TempDir tmpDir: Path) {
|
||||
val cache = BlobCache(strictContext)
|
||||
|
||||
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } throws FileNotFoundException()
|
||||
|
||||
cache.onBlobsRemoved(setOf("foo", "bar"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `doNotUseBlob persists blobs which get removed later`(@TempDir tmpDir: Path) {
|
||||
val file = File(tmpDir.toString(), "tmpCache")
|
||||
val cache = BlobCache(strictContext)
|
||||
|
||||
// add blobs to list
|
||||
every { strictContext.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_APPEND) } answers {
|
||||
FileOutputStream(file, true)
|
||||
}
|
||||
cache.doNotUseBlob(blob1.id)
|
||||
cache.doNotUseBlob(blob2.id)
|
||||
|
||||
// get blobs from list
|
||||
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } answers {
|
||||
FileInputStream(file)
|
||||
}
|
||||
val blobIds = cache.getDoNotUseBlobIds()
|
||||
assertEquals(setOf(blob1.id.hexFromProto(), blob2.id.hexFromProto()), blobIds)
|
||||
|
||||
// remove first blob from list
|
||||
every { strictContext.openFileOutput(DO_NOT_USE_FILE_NAME, MODE_PRIVATE) } answers {
|
||||
FileOutputStream(file, false)
|
||||
}
|
||||
cache.onBlobsRemoved(setOf(blob1.id.hexFromProto(), "foo", "bar"))
|
||||
|
||||
// getting blobs from list now only returns second blob
|
||||
assertEquals(setOf(blob2.id.hexFromProto()), cache.getDoNotUseBlobIds())
|
||||
|
||||
// remove different blobs leaves empty list
|
||||
cache.onBlobsRemoved(setOf(blob2.id.hexFromProto(), "foo", "bar"))
|
||||
assertEquals(emptySet<String>(), cache.getDoNotUseBlobIds())
|
||||
|
||||
// empty list can be added to still
|
||||
cache.doNotUseBlob(blob1.id)
|
||||
assertEquals(setOf(blob1.id.hexFromProto()), cache.getDoNotUseBlobIds())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `corrupted do-not-use list gets deleted when getting blobs`(@TempDir tmpDir: Path) {
|
||||
val cache = BlobCache(strictContext)
|
||||
|
||||
// get blobs from list deletes broken file, so we can continue using it
|
||||
every { strictContext.openFileInput(DO_NOT_USE_FILE_NAME) } throws IOException()
|
||||
every { strictContext.deleteFile(DO_NOT_USE_FILE_NAME) } returns true
|
||||
cache.getDoNotUseBlobIds()
|
||||
|
||||
verify { strictContext.deleteFile(DO_NOT_USE_FILE_NAME) }
|
||||
}
|
||||
|
||||
private fun BlobCache.saveTwoBlobsToCache(file: File) {
|
||||
every { strictContext.openFileOutput(any(), any()) } answers {
|
||||
every { strictContext.openFileOutput(CACHE_FILE_NAME, MODE_APPEND) } answers {
|
||||
FileOutputStream(file, true)
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,6 @@ import org.junit.jupiter.api.Assertions.assertNull
|
|||
import org.junit.jupiter.api.Test
|
||||
import java.io.ByteArrayInputStream
|
||||
import java.io.IOException
|
||||
import java.security.GeneralSecurityException
|
||||
import java.security.MessageDigest
|
||||
import kotlin.random.Random
|
||||
|
||||
|
@ -41,10 +40,11 @@ internal class CheckerTest : TransportTest() {
|
|||
private val backendManager: BackendManager = mockk()
|
||||
private val snapshotManager: SnapshotManager = mockk()
|
||||
private val loader: Loader = mockk()
|
||||
private val blobCache: BlobCache = mockk()
|
||||
private val nm: BackupNotificationManager = mockk()
|
||||
private val backend: Backend = mockk()
|
||||
|
||||
private val checker = Checker(crypto, backendManager, snapshotManager, loader, nm)
|
||||
private val checker = Checker(crypto, backendManager, snapshotManager, loader, blobCache, nm)
|
||||
private val folder = TopLevelFolder(repoId)
|
||||
|
||||
private val snapshotHandle1 =
|
||||
|
@ -173,7 +173,7 @@ internal class CheckerTest : TransportTest() {
|
|||
}
|
||||
|
||||
@Test
|
||||
fun `check raises error for loader failure`() = runBlocking {
|
||||
fun `check records hash error from loader`() = runBlocking {
|
||||
// chunkId is "real"
|
||||
val data1 = getRandomByteArray()
|
||||
val chunkId1 = MessageDigest.getInstance("SHA-256").digest(data1).toHexString()
|
||||
|
@ -208,8 +208,9 @@ internal class CheckerTest : TransportTest() {
|
|||
every { backendManager.requiresNetwork } returns Random.nextBoolean()
|
||||
|
||||
coEvery { loader.loadFile(blobHandle1, null) } returns ByteArrayInputStream(data1)
|
||||
coEvery { loader.loadFile(blobHandle2, null) } throws GeneralSecurityException()
|
||||
coEvery { loader.loadFile(blobHandle2, null) } throws HashMismatchException()
|
||||
|
||||
every { blobCache.doNotUseBlob(ByteString.fromHex(blobHandle2.name)) } just Runs
|
||||
every { nm.onCheckFinishedWithError(any(), any()) } just Runs
|
||||
|
||||
assertNull(checker.checkerResult)
|
||||
|
|
Loading…
Reference in a new issue