Checker: account for possibility that one chunkId maps to more than one blob

This commit is contained in:
Torsten Grote 2024-11-01 14:14:01 -03:00
parent 4f5199ce27
commit 6d9c18bd29
No known key found for this signature in database
GPG key ID: 3E5F77D92CF891FF
3 changed files with 141 additions and 32 deletions

View file

@ -6,6 +6,7 @@
package com.stevesoltys.seedvault.repo package com.stevesoltys.seedvault.repo
import androidx.annotation.WorkerThread import androidx.annotation.WorkerThread
import com.google.protobuf.ByteString
import com.stevesoltys.seedvault.backend.BackendManager import com.stevesoltys.seedvault.backend.BackendManager
import com.stevesoltys.seedvault.crypto.Crypto import com.stevesoltys.seedvault.crypto.Crypto
import com.stevesoltys.seedvault.proto.Snapshot import com.stevesoltys.seedvault.proto.Snapshot
@ -71,10 +72,12 @@ internal class Checker(
this.handleSize = handles.size // remember number of snapshot handles we had this.handleSize = handles.size // remember number of snapshot handles we had
// get total disk space used by snapshots // get total disk space used by snapshots
val sizeMap = mutableMapOf<String, Int>() val sizeMap = mutableMapOf<ByteString, Int>() // uses blob.id as key
snapshots.forEach { snapshot -> snapshots.forEach { snapshot ->
// add sizes to a map first, so we don't double count // add sizes to a map first, so we don't double count
snapshot.blobsMap.forEach { (chunkId, blob) -> sizeMap[chunkId] = blob.length } snapshot.blobsMap.forEach { (_, blob) ->
sizeMap[blob.id] = blob.length
}
} }
return sizeMap.values.sumOf { it.toLong() } return sizeMap.values.sumOf { it.toLong() }
} }
@ -96,13 +99,13 @@ internal class Checker(
"Got $handleSize handles, but ${snapshots.size} snapshots." "Got $handleSize handles, but ${snapshots.size} snapshots."
} }
val blobSample = getBlobSample(snapshots, percent) val blobSample = getBlobSample(snapshots, percent)
val sampleSize = blobSample.values.sumOf { it.length.toLong() } val sampleSize = blobSample.sumOf { it.blob.length.toLong() }
log.info { "Blob sample has ${blobSample.size} blobs worth $sampleSize bytes." } log.info { "Blob sample has ${blobSample.size} blobs worth $sampleSize bytes." }
// check blobs concurrently // check blobs concurrently
val semaphore = Semaphore(concurrencyLimit) val semaphore = Semaphore(concurrencyLimit)
val size = AtomicLong() val size = AtomicLong()
val badChunks = ConcurrentSkipListSet<String>() val badChunks = ConcurrentSkipListSet<ChunkIdBlobPair>()
val lastNotification = AtomicLong() val lastNotification = AtomicLong()
val startTime = System.currentTimeMillis() val startTime = System.currentTimeMillis()
coroutineScope { coroutineScope {
@ -116,7 +119,7 @@ internal class Checker(
} catch (e: Exception) { } catch (e: Exception) {
log.error(e) { "Error loading chunk $chunkId: " } log.error(e) { "Error loading chunk $chunkId: " }
// TODO we could try differentiating transient backend issues // TODO we could try differentiating transient backend issues
badChunks.add(chunkId) badChunks.add(ChunkIdBlobPair(chunkId, blob))
} }
} }
// keep track of how much we checked and for how long // keep track of how much we checked and for how long
@ -154,25 +157,30 @@ internal class Checker(
checkerResult = null checkerResult = null
} }
private fun getBlobSample(snapshots: List<Snapshot>, percent: Int): Map<String, Blob> { private fun getBlobSample(
// split up blobs for app data and for APKs snapshots: List<Snapshot>,
val appBlobs = mutableMapOf<String, Blob>() percent: Int,
val apkBlobs = mutableMapOf<String, Blob>() ): List<ChunkIdBlobPair> {
// split up blobs for app data and for APKs (use blob.id as key to prevent double counting)
val appBlobs = mutableMapOf<ByteString, ChunkIdBlobPair>()
val apkBlobs = mutableMapOf<ByteString, ChunkIdBlobPair>()
snapshots.forEach { snapshot -> snapshots.forEach { snapshot ->
val appChunkIds = snapshot.appsMap.flatMap { it.value.chunkIdsList.hexFromProto() } val appChunkIds = snapshot.appsMap.flatMap { it.value.chunkIdsList.hexFromProto() }
val apkChunkIds = snapshot.appsMap.flatMap { val apkChunkIds = snapshot.appsMap.flatMap {
it.value.apk.splitsList.flatMap { split -> split.chunkIdsList.hexFromProto() } it.value.apk.splitsList.flatMap { split -> split.chunkIdsList.hexFromProto() }
} }
appChunkIds.forEach { chunkId -> appChunkIds.forEach { chunkId ->
appBlobs[chunkId] = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId") val blob = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
appBlobs[blob.id] = ChunkIdBlobPair(chunkId, blob)
} }
apkChunkIds.forEach { chunkId -> apkChunkIds.forEach { chunkId ->
apkBlobs[chunkId] = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId") val blob = snapshot.blobsMap[chunkId] ?: error("No Blob for chunkId")
apkBlobs[blob.id] = ChunkIdBlobPair(chunkId, blob)
} }
} }
// calculate sizes // calculate sizes
val appSize = appBlobs.values.sumOf { it.length.toLong() } val appSize = appBlobs.values.sumOf { it.blob.length.toLong() }
val apkSize = apkBlobs.values.sumOf { it.length.toLong() } val apkSize = apkBlobs.values.sumOf { it.blob.length.toLong() }
// let's assume it is unlikely that app data and APKs have blobs in common // let's assume it is unlikely that app data and APKs have blobs in common
val totalSize = appSize + apkSize val totalSize = appSize + apkSize
log.info { "Got ${appBlobs.size + apkBlobs.size} blobs worth $totalSize bytes to check." } log.info { "Got ${appBlobs.size + apkBlobs.size} blobs worth $totalSize bytes to check." }
@ -182,23 +190,21 @@ internal class Checker(
val appTargetSize = min((targetSize * 0.75).roundToLong(), appSize) // 75% of targetSize val appTargetSize = min((targetSize * 0.75).roundToLong(), appSize) // 75% of targetSize
log.info { "Sampling $targetSize bytes of which $appTargetSize bytes for apps." } log.info { "Sampling $targetSize bytes of which $appTargetSize bytes for apps." }
val blobSample = mutableMapOf<String, Blob>() val blobSample = mutableListOf<ChunkIdBlobPair>()
var currentSize = 0L var currentSize = 0L
// check apps first until we reach their target size // check apps first until we reach their target size
val appIterator = appBlobs.keys.shuffled().iterator() // random app blob iterator val appIterator = appBlobs.values.shuffled().iterator() // random app blob iterator
while (currentSize < appTargetSize && appIterator.hasNext()) { while (currentSize < appTargetSize && appIterator.hasNext()) {
val randomChunkId = appIterator.next() val pair = appIterator.next()
val blob = appBlobs[randomChunkId] ?: error("No blob") blobSample.add(pair)
blobSample[randomChunkId] = blob currentSize += pair.blob.length
currentSize += blob.length
} }
// now check APKs until we reach total targetSize // now check APKs until we reach total targetSize
val apkIterator = apkBlobs.keys.shuffled().iterator() // random APK blob iterator val apkIterator = apkBlobs.values.shuffled().iterator() // random APK blob iterator
while (currentSize < targetSize && apkIterator.hasNext()) { while (currentSize < targetSize && apkIterator.hasNext()) {
val randomChunkId = apkIterator.next() val pair = apkIterator.next()
val blob = apkBlobs[randomChunkId] ?: error("No blob") blobSample.add(pair)
blobSample[randomChunkId] = blob currentSize += pair.blob.length
currentSize += blob.length
} }
return blobSample return blobSample
} }
@ -216,3 +222,9 @@ internal class Checker(
if (readChunkId != chunkId) throw GeneralSecurityException("ChunkId doesn't match") if (readChunkId != chunkId) throw GeneralSecurityException("ChunkId doesn't match")
} }
} }
data class ChunkIdBlobPair(val chunkId: String, val blob: Blob) : Comparable<ChunkIdBlobPair> {
override fun compareTo(other: ChunkIdBlobPair): Int {
return chunkId.compareTo(other.chunkId)
}
}

View file

@ -24,7 +24,7 @@ sealed class CheckerResult {
/** /**
* The list of chunkIDs that had errors. * The list of chunkIDs that had errors.
*/ */
val errorChunkIds: Set<String>, val errorChunkIdBlobPairs: Set<ChunkIdBlobPair>,
) : CheckerResult() { ) : CheckerResult() {
val goodSnapshots: List<Snapshot> val goodSnapshots: List<Snapshot>
val badSnapshots: List<Snapshot> val badSnapshots: List<Snapshot>
@ -32,9 +32,23 @@ sealed class CheckerResult {
init { init {
val good = mutableListOf<Snapshot>() val good = mutableListOf<Snapshot>()
val bad = mutableListOf<Snapshot>() val bad = mutableListOf<Snapshot>()
val errorChunkIds = errorChunkIdBlobPairs.map { it.chunkId }.toSet()
snapshots.forEach { snapshot -> snapshots.forEach { snapshot ->
val isGood = snapshot.blobsMap.keys.intersect(errorChunkIds).isEmpty() val badChunkIds = snapshot.blobsMap.keys.intersect(errorChunkIds)
if (isGood) good.add(snapshot) else bad.add(snapshot) if (badChunkIds.isEmpty()) {
// snapshot doesn't contain chunks with erroneous blobs
good.add(snapshot)
} else {
// snapshot may contain chunks with erroneous blobs, check deeper
val isBad = badChunkIds.any { chunkId ->
val blob = snapshot.blobsMap[chunkId] ?: error("No blob for chunkId")
// is this chunkId/blob pair in errorChunkIdBlobPairs?
errorChunkIdBlobPairs.any { pair ->
pair.chunkId == chunkId && pair.blob == blob
}
}
if (isBad) bad.add(snapshot) else good.add(snapshot)
}
} }
goodSnapshots = good goodSnapshots = good
badSnapshots = bad badSnapshots = bad

View file

@ -82,6 +82,36 @@ internal class CheckerTest : TransportTest() {
assertEquals(expectedSize, checker.getBackupSize()) assertEquals(expectedSize, checker.getBackupSize())
} }
@Test
fun `getBackupSize returns size without under-counting blobs with same chunkId`() =
runBlocking {
val apk = apk.copy {
splits.clear()
splits.add(baseSplit.copy {
this.chunkIds.clear()
chunkIds.add(ByteString.fromHex(chunkId1))
})
}
val snapshot = snapshot.copy {
apps[packageName] = app.copy { this.apk = apk }
blobs.clear()
}
val snapshotMap = mapOf(
snapshotHandle1 to snapshot.copy {
token = 1
blobs[chunkId1] = blob1
},
snapshotHandle2 to snapshot.copy {
token = 2
blobs[chunkId1] = blob2
},
)
val expectedSize = blob1.length.toLong() + blob2.length.toLong()
expectLoadingSnapshots(snapshotMap)
assertEquals(expectedSize, checker.getBackupSize())
}
@Test @Test
fun `check works even with no backup data`() = runBlocking { fun `check works even with no backup data`() = runBlocking {
expectLoadingSnapshots(emptyMap()) expectLoadingSnapshots(emptyMap())
@ -138,7 +168,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet()) assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet())
assertEquals(emptyList<Snapshot>(), result.goodSnapshots) assertEquals(emptyList<Snapshot>(), result.goodSnapshots)
assertEquals(snapshotMap.size, result.existingSnapshots) assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(chunkId1, chunkId2), result.errorChunkIds) val errorPairs = setOf(ChunkIdBlobPair(chunkId1, blob1), ChunkIdBlobPair(chunkId2, blob2))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
} }
@Test @Test
@ -189,7 +220,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(listOf(snapshotMap[snapshotHandle1]), result.goodSnapshots) assertEquals(listOf(snapshotMap[snapshotHandle1]), result.goodSnapshots)
assertEquals(listOf(snapshotMap[snapshotHandle2]), result.badSnapshots) assertEquals(listOf(snapshotMap[snapshotHandle2]), result.badSnapshots)
assertEquals(snapshotMap.size, result.existingSnapshots) assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(chunkId2), result.errorChunkIds) val errorPairs = setOf(ChunkIdBlobPair(chunkId2, blob2))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
} }
@Test @Test
@ -247,8 +279,8 @@ internal class CheckerTest : TransportTest() {
fun `check prefers app data over APKs`() = runBlocking { fun `check prefers app data over APKs`() = runBlocking {
val appDataBlob = blob { val appDataBlob = blob {
id = ByteString.copyFrom(Random.nextBytes(32)) id = ByteString.copyFrom(Random.nextBytes(32))
length = Random.nextInt(0, Int.MAX_VALUE) length = Random.nextInt(1, Int.MAX_VALUE)
uncompressedLength = Random.nextInt(0, Int.MAX_VALUE) uncompressedLength = Random.nextInt(1, Int.MAX_VALUE)
} }
val appDataBlobHandle1 = AppBackupFileType.Blob(repoId, appDataBlob.id.hexFromProto()) val appDataBlobHandle1 = AppBackupFileType.Blob(repoId, appDataBlob.id.hexFromProto())
val appDataChunkId = Random.nextBytes(32).toHexString() val appDataChunkId = Random.nextBytes(32).toHexString()
@ -266,6 +298,7 @@ internal class CheckerTest : TransportTest() {
// only loading app data, not other blobs // only loading app data, not other blobs
coEvery { loader.loadFile(appDataBlobHandle1, null) } throws SecurityException() coEvery { loader.loadFile(appDataBlobHandle1, null) } throws SecurityException()
println("appDataBlob.length = $appDataBlob.length")
every { nm.onCheckFinishedWithError(appDataBlob.length.toLong(), any()) } just Runs every { nm.onCheckFinishedWithError(appDataBlob.length.toLong(), any()) } just Runs
assertNull(checker.checkerResult) assertNull(checker.checkerResult)
@ -275,7 +308,8 @@ internal class CheckerTest : TransportTest() {
assertEquals(snapshotMap.values.toSet(), result.snapshots.toSet()) assertEquals(snapshotMap.values.toSet(), result.snapshots.toSet())
assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet()) assertEquals(snapshotMap.values.toSet(), result.badSnapshots.toSet())
assertEquals(snapshotMap.size, result.existingSnapshots) assertEquals(snapshotMap.size, result.existingSnapshots)
assertEquals(setOf(appDataChunkId), result.errorChunkIds) val errorPairs = setOf(ChunkIdBlobPair(appDataChunkId, appDataBlob))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
coVerify(exactly = 0) { coVerify(exactly = 0) {
loader.loadFile(blobHandle1, null) loader.loadFile(blobHandle1, null)
@ -283,6 +317,55 @@ internal class CheckerTest : TransportTest() {
} }
} }
@Test
fun `check doesn't skip broken blobs that have a fix with same chunkID`() = runBlocking {
// get "real" data for blob2
val messageDigest = MessageDigest.getInstance("SHA-256")
val data1 = getRandomByteArray() // broken blob
val data2 = getRandomByteArray() // data2 matches chunkId
val chunkId = messageDigest.digest(data2).toHexString()
val apk = apk.copy {
splits.clear()
splits.add(baseSplit.copy {
this.chunkIds.clear()
chunkIds.add(ByteString.fromHex(chunkId))
})
}
val snapshot = snapshot.copy {
apps[packageName] = app.copy { this.apk = apk }
blobs.clear()
}
val snapshotMap = mapOf(
snapshotHandle1 to snapshot.copy {
token = 1
blobs[chunkId] = blob1 // snapshot1 has broken blob for chunkId
},
snapshotHandle2 to snapshot.copy {
token = 2
blobs[chunkId] = blob2 // snapshot2 has fixed blob for chunkId
},
)
expectLoadingSnapshots(snapshotMap)
every { backendManager.requiresNetwork } returns Random.nextBoolean()
coEvery { loader.loadFile(blobHandle1, null) } returns ByteArrayInputStream(data1)
coEvery { loader.loadFile(blobHandle2, null) } returns ByteArrayInputStream(data2)
every { nm.onCheckFinishedWithError(any(), any()) } just Runs
assertNull(checker.checkerResult)
checker.check(100)
assertInstanceOf(CheckerResult.Error::class.java, checker.checkerResult)
val result = checker.checkerResult as CheckerResult.Error
assertEquals(snapshotMap.values.toSet(), result.snapshots.toSet())
assertEquals(setOf(snapshotMap[snapshotHandle2]), result.goodSnapshots.toSet())
assertEquals(setOf(snapshotMap[snapshotHandle1]), result.badSnapshots.toSet())
assertEquals(snapshotMap.size, result.existingSnapshots)
val errorPairs = setOf(ChunkIdBlobPair(chunkId, blob1))
assertEquals(errorPairs, result.errorChunkIdBlobPairs)
}
private suspend fun expectLoadingSnapshots( private suspend fun expectLoadingSnapshots(
snapshots: Map<AppBackupFileType.Snapshot, Snapshot>, snapshots: Map<AppBackupFileType.Snapshot, Snapshot>,
) { ) {