Checker: account for possibility that one chunkId maps to more than one blob

This commit is contained in:
Torsten Grote 2024-11-01 14:14:01 -03:00
parent 4f5199ce27
commit 6d9c18bd29
No known key found for this signature in database
GPG key ID: 3E5F77D92CF891FF
3 changed files with 141 additions and 32 deletions

View file

@ -6,6 +6,7 @@
package com.stevesoltys.seedvault.repo
import androidx.annotation.WorkerThread
import com.google.protobuf.ByteString
import com.stevesoltys.seedvault.backend.BackendManager
import com.stevesoltys.seedvault.crypto.Crypto
import com.stevesoltys.seedvault.proto.Snapshot
@ -71,10 +72,12 @@ internal class Checker(
this.handleSize = handles.size // remember number of snapshot handles we had
// get total disk space used by snapshots
val sizeMap = mutableMapOf<String, Int>()
val sizeMap = mutableMapOf<ByteString, Int>() // uses blob.id as key
snapshots.forEach { snapshot ->
// add sizes to a map first, so we don't double count
snapshot.blobsMap.forEach { (chunkId, blob) -> sizeMap[chunkId] = blob.length }
snapshot.blobsMap.forEach { (_, blob) ->
sizeMap[blob.id] = blob.length
}
}
return sizeMap.values.sumOf { it.toLong() }
}
@ -96,13 +99,13 @@ internal class Checker(
"Got $handleSize handles, but ${snapshots.size} snapshots."
}
val blobSample = getBlobSample(snapshots, percent)
val sampleSize = blobSample.values.sumOf { it.length.toLong() }
val sampleSize = blobSample.sumOf { it.blob.length.toLong() }
log.info { "Blob sample has ${blobSample.size} blobs worth $sampleSize bytes." }
// check blobs concurrently
val semaphore = Semaphore(concurrencyLimit)
val size = AtomicLong()
val badChunks = ConcurrentSkipListSet<String>()
val badChunks = ConcurrentSkipListSet<ChunkIdBlobPair>()
val lastNotification = AtomicLong()
val startTime = System.currentTimeMillis()
coroutineScope {
@ -116,7 +119,7 @@ internal class Checker(
} catch (e: Exception) {
log.error(e) { "Error loading chunk $chunkId: " }
// TODO we could try differentiating transient backend issues
badChunks.add(chunkId)
badChunks.add(ChunkIdBlobPair(chunkId, blob))
}
}
// keep track of how much we checked and for how long
@ -154,25 +157,30 @@ internal class Checker(
checkerResult = null
}
private fun getBlobSample(snapshots: List<Snapshot>, percent: Int): Map<String, Blob> {
// split up blobs for app data and for APKs
val appBlobs = mutableMapOf<String, Blob>()
val apkBlobs = mutableMapOf<String, Blob>()
private fun getBlobSample(
snapshots: List<Snapshot>,
percent: Int,
): List<ChunkIdBlobPair> {
// split up blobs for app data and for APKs (use blob.id as key to prevent double counting)
val appBlobs = mutableMapOf<ByteString, ChunkIdBlobPair>()
val apkBlobs = mutableMapOf<ByteString, ChunkIdBlobPair>()
snapshots.forEach { snapshot ->
val appChunkIds = snapshot.appsMap.flatMap { it.value.chunkIdsList.hexFromProto() }
val apkChunkIds = snapshot.appsMap.flatMap {
it.value.apk.splitsList.flatMap { split -> split.chunkIdsList.hexFromProto() }
}
appChunkIds.forEach { chunkId ->
appBlobs[chunkId] = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
val blob = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
appBlobs[blob.id] = ChunkIdBlobPair(chunkId, blob)
}
apkChunkIds.forEach { chunkId ->
apkBlobs[chunkId] = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
val blob = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
apkBlobs[blob.id] = ChunkIdBlobPair(chunkId, blob)
}
}
// calculate sizes
val appSize = appBlobs.values.sumOf { it.length.toLong() }
val apkSize = apkBlobs.values.sumOf { it.length.toLong() }
val appSize = appBlobs.values.sumOf { it.blob.length.toLong() }
val apkSize = apkBlobs.values.sumOf { it.blob.length.toLong() }
// let's assume it is unlikely that app data and APKs have blobs in common
val totalSize = appSize + apkSize
log.info { "Got ${appBlobs.size + apkBlobs.size} blobs worth $totalSize bytes to check." }
@ -182,23 +190,21 @@ internal class Checker(
val appTargetSize = min((targetSize * 0.75).roundToLong(), appSize) // 75% of targetSize
log.info { "Sampling $targetSize bytes of which $appTargetSize bytes for apps." }
val blobSample = mutableMapOf<String, Blob>()
val blobSample = mutableListOf<ChunkIdBlobPair>()
var currentSize = 0L
// check apps first until we reach their target size
val appIterator = appBlobs.keys.shuffled().iterator() // random app blob iterator
val appIterator = appBlobs.values.shuffled().iterator() // random app blob iterator
while (currentSize < appTargetSize && appIterator.hasNext()) {
val randomChunkId = appIterator.next()
val blob = appBlobs[randomChunkId] ?: error("No blob")
blobSample[randomChunkId] = blob
currentSize += blob.length
val pair = appIterator.next()
blobSample.add(pair)
currentSize += pair.blob.length
}
// now check APKs until we reach total targetSize
val apkIterator = apkBlobs.keys.shuffled().iterator() // random APK blob iterator
val apkIterator = apkBlobs.values.shuffled().iterator() // random APK blob iterator
while (currentSize < targetSize && apkIterator.hasNext()) {
val randomChunkId = apkIterator.next()
val blob = apkBlobs[randomChunkId] ?: error("No blob")
blobSample[randomChunkId] = blob
currentSize += blob.length
val pair = apkIterator.next()
blobSample.add(pair)
currentSize += pair.blob.length
}
return blobSample
}
@ -216,3 +222,9 @@ internal class Checker(
if (readChunkId != chunkId) throw GeneralSecurityException("ChunkId doesn't match")
}
}
data class ChunkIdBlobPair(val chunkId: String, val blob: Blob) : Comparable<ChunkIdBlobPair> {
override fun compareTo(other: ChunkIdBlobPair): Int {
return chunkId.compareTo(other.chunkId)
}
}

View file

@ -24,7 +24,7 @@ sealed class CheckerResult {
/**
* The list of chunkIDs that had errors.
*/
val errorChunkIds: Set<String>,
val errorChunkIdBlobPairs: Set<ChunkIdBlobPair>,
) : CheckerResult() {
val goodSnapshots: List<Snapshot>
val badSnapshots: List<Snapshot>
@ -32,9 +32,23 @@ sealed class CheckerResult {
init {
val good = mutableListOf<Snapshot>()
val bad = mutableListOf<Snapshot>()
val errorChunkIds = errorChunkIdBlobPairs.map { it.chunkId }.toSet()
snapshots.forEach { snapshot ->
val isGood = snapshot.blobsMap.keys.intersect(errorChunkIds).isEmpty()
if (isGood) good.add(snapshot) else bad.add(snapshot)
val badChunkIds = snapshot.blobsMap.keys.intersect(errorChunkIds)
if (badChunkIds.isEmpty()) {
// snapshot doesn't contain chunks with erroneous blobs
good.add(snapshot)
} else {
// snapshot may contain chunks with erroneous blobs, check deeper
val isBad = badChunkIds.any { chunkId ->
val blob = snapshot.blobsMap[chunkId] ?: error("No blob for chunkId")
// is this chunkId/blob pair in errorChunkIdBlobPairs?
errorChunkIdBlobPairs.any { pair ->
pair.chunkId == chunkId && pair.blob == blob
}
}
if (isBad) bad.add(snapshot) else good.add(snapshot)
}
}
goodSnapshots = good
badSnapshots = bad

View file

@ -82,6 +82,36 @@ internal class CheckerTest : TransportTest() {
assertEquals(expectedSize, checker.getBackupSize())
}
@Test
fun `getBackupSize returns size without under-counting blobs with same chunkId`() =
runBlocking {
val apk = apk.copy {
splits.clear()
splits.add(baseSplit.copy {
this.chunkIds.clear()
chunkIds.add(ByteString.fromHex(chunkId1))
})
}
val snapshot = snapshot.copy {
apps[packageName] = app.copy { this.apk = apk }
blobs.clear()
}
val snapshotMap = mapOf(
snapshotHandle1 to snapshot.copy {
token = 1
blobs[chunkId1] = blob1
},
snapshotHandle2 to snapshot.copy {
token = 2
blobs[chunkId1] = blob2
},
)
val expectedSize = blob1.length.toLong() + blob2.length.toLong()
expectLoadingSnapshots(snapshotMap)
assertEquals(expectedSize, checker.getBackupSize())
}
@Test
fun `check works even with no backup data`() = runBlocking {
expectLoadingSnapshots(emptyMap())
@ -138,7 +168,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet())
assertEquals(emptyList<Snapshot>(), result.goodSnapshots)
assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(chunkId1, chunkId2), result.errorChunkIds)
val errorPairs = setOf(ChunkIdBlobPair(chunkId1, blob1), ChunkIdBlobPair(chunkId2, blob2))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
}
@Test
@ -189,7 +220,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(listOf(snapshotMap[snapshotHandle1]), result.goodSnapshots)
assertEquals(listOf(snapshotMap[snapshotHandle2]), result.badSnapshots)
assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(chunkId2), result.errorChunkIds)
val errorPairs = setOf(ChunkIdBlobPair(chunkId2, blob2))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
}
@Test
@ -247,8 +279,8 @@ internal class CheckerTest : TransportTest() {
fun `check prefers app data over APKs`() = runBlocking {
val appDataBlob = blob {
id = ByteString.copyFrom(Random.nextBytes(32))
length = Random.nextInt(0, Int.MAX_VALUE)
uncompressedLength = Random.nextInt(0, Int.MAX_VALUE)
length = Random.nextInt(1, Int.MAX_VALUE)
uncompressedLength = Random.nextInt(1, Int.MAX_VALUE)
}
val appDataBlobHandle1 = AppBackupFileType.Blob(repoId, appDataBlob.id.hexFromProto())
val appDataChunkId = Random.nextBytes(32).toHexString()
@ -266,6 +298,7 @@ internal class CheckerTest : TransportTest() {
// only loading app data, not other blobs
coEvery { loader.loadFile(appDataBlobHandle1, null) } throws SecurityException()
println("appDataBlob.length = $appDataBlob.length")
every { nm.onCheckFinishedWithError(appDataBlob.length.toLong(), any()) } just Runs
assertNull(checker.checkerResult)
@ -275,7 +308,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(snapshotMap.values.toSet(), result.snapshots.toSet())
assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet())
assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(appDataChunkId), result.errorChunkIds)
val errorPairs = setOf(ChunkIdBlobPair(appDataChunkId, appDataBlob))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
coVerify(exactly = 0) {
loader.loadFile(blobHandle1, null)
@ -283,6 +317,55 @@ internal class CheckerTest : TransportTest() {
}
}
@Test
fun `check doesn't skip broken blobs that have a fix with same chunkID`() = runBlocking {
// get "real" data for blob2
val messageDigest = MessageDigest.getInstance("SHA-256")
val data1 = getRandomByteArray() // broken blob
val data2 = getRandomByteArray() // data2 matches chunkId
val chunkId = messageDigest.digest(data2).toHexString()
val apk = apk.copy {
splits.clear()
splits.add(baseSplit.copy {
this.chunkIds.clear()
chunkIds.add(ByteString.fromHex(chunkId))
})
}
val snapshot = snapshot.copy {
apps[packageName] = app.copy { this.apk = apk }
blobs.clear()
}
val snapshotMap = mapOf(
snapshotHandle1 to snapshot.copy {
token = 1
blobs[chunkId] = blob1 // snapshot1 has broken blob for chunkId
},
snapshotHandle2 to snapshot.copy {
token = 2
blobs[chunkId] = blob2 // snapshot2 has fixed blob for chunkId
},
)
expectLoadingSnapshots(snapshotMap)
every { backendManager.requiresNetwork } returns Random.nextBoolean()
coEvery { loader.loadFile(blobHandle1, null) } returns ByteArrayInputStream(data1)
coEvery { loader.loadFile(blobHandle2, null) } returns ByteArrayInputStream(data2)
every { nm.onCheckFinishedWithError(any(), any()) } just Runs
assertNull(checker.checkerResult)
checker.check(100)
assertInstanceOf(CheckerResult.Error::class.java, checker.checkerResult)
val result = checker.checkerResult as CheckerResult.Error
assertEquals(snapshotMap.values.toSet(), result.snapshots.toSet())
assertEquals(setOf(snapshotMap[snapshotHandle2]), result.goodSnapshots.toSet())
assertEquals(setOf(snapshotMap[snapshotHandle1]), result.badSnapshots.toSet())
assertEquals(snapshotMap.size, result.existingSnapshots)
val errorPairs = setOf(ChunkIdBlobPair(chunkId, blob1))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
}
private suspend fun expectLoadingSnapshots(
snapshots: Map<AppBackupFileType.Snapshot, Snapshot>,
) {