Merge pull request #721 from grote/dont-restore-existing-files

Don't restore files that still exist unchanged
This commit is contained in:
Torsten Grote 2024-08-22 17:39:55 -03:00 committed by GitHub
commit 5418a8ef12
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 150 additions and 46 deletions

View file

@ -13,7 +13,6 @@ import de.grobox.storagebackuptester.backup.getSpeed
import org.calyxos.backup.storage.api.BackupFile
import org.calyxos.backup.storage.restore.NotificationRestoreObserver
import kotlin.time.DurationUnit
import kotlin.time.ExperimentalTime
import kotlin.time.toDuration
data class RestoreProgress(
@ -41,6 +40,10 @@ class RestoreStats(
liveData.postValue(RestoreProgress(filesProcessed, totalFiles, text))
}
override fun onFileDuplicatesRemoved(num: Int) {
// no-op
}
override fun onFileRestored(
file: BackupFile,
bytesWritten: Long,
@ -68,7 +71,6 @@ class RestoreStats(
liveData.postValue(RestoreProgress(filesProcessed, totalFiles))
}
@OptIn(ExperimentalTime::class)
override fun onRestoreComplete(restoreDuration: Long) {
super.onRestoreComplete(restoreDuration)
val sb = StringBuilder("\n")

View file

@ -7,6 +7,7 @@ package org.calyxos.backup.storage.api
public interface RestoreObserver {
public fun onRestoreStart(numFiles: Int, totalSize: Long)
public fun onFileDuplicatesRemoved(num: Int)
public fun onFileRestored(file: BackupFile, bytesWritten: Long, tag: String)
/**

View file

@ -7,6 +7,7 @@ package org.calyxos.backup.storage.backup
import android.app.Service
import android.content.Intent
import android.content.pm.ServiceInfo.FOREGROUND_SERVICE_TYPE_MANIFEST
import android.os.IBinder
import android.util.Log
import kotlinx.coroutines.GlobalScope
@ -30,7 +31,8 @@ public abstract class BackupService : Service() {
Log.d(TAG, "onStartCommand $intent $flags $startId")
startForeground(
NOTIFICATION_ID_BACKUP,
n.getBackupNotification(R.string.notification_backup_scanning)
n.getBackupNotification(R.string.notification_backup_scanning),
FOREGROUND_SERVICE_TYPE_MANIFEST,
)
GlobalScope.launch {
val success = storageBackup.runBackup(backupObserver)
@ -38,7 +40,8 @@ public abstract class BackupService : Service() {
// only prune old backups when backup run was successful
startForeground(
NOTIFICATION_ID_PRUNE,
n.getPruneNotification(R.string.notification_prune)
n.getPruneNotification(R.string.notification_prune),
FOREGROUND_SERVICE_TYPE_MANIFEST,
)
storageBackup.pruneOldBackups(backupObserver)
}

View file

@ -100,6 +100,7 @@ internal data class MediaFile(
.setName(fileName)
.setSize(size)
.addAllChunkIds(chunkIds)
.setIsFavorite(isFavorite)
.setVolume(if (volume == MediaStore.VOLUME_EXTERNAL_PRIMARY) "" else volume)
if (lastModified != null) {
builder.lastModified = lastModified
@ -107,6 +108,9 @@ internal data class MediaFile(
if (zipIndex != null) {
builder.zipIndex = zipIndex
}
if (ownerPackageName != null) {
builder.setOwnerPackageName(ownerPackageName)
}
return builder.build()
}
}

View file

@ -46,8 +46,6 @@ internal abstract class AbstractChunkRestore(
tag: String,
streamWriter: suspend (outputStream: OutputStream) -> Long,
) {
// TODO check if the file exists already (same name, size, chunk IDs)
// and skip it in this case
fileRestore.restoreFile(file, observer, tag, streamWriter)
}

View file

@ -24,7 +24,6 @@ import kotlin.random.Random
private const val TAG = "FileRestore"
@Suppress("BlockingMethodInNonBlockingContext")
internal class FileRestore(
private val context: Context,
private val mediaScanner: MediaScanner,
@ -46,10 +45,12 @@ internal class FileRestore(
bytes = restoreFile(file.mediaFile, streamWriter)
finalTag = "M$tag"
}
file.docFile != null -> {
bytes = restoreFile(file, streamWriter)
finalTag = "D$tag"
}
else -> {
error("unexpected file: $file")
}
@ -63,39 +64,45 @@ internal class FileRestore(
streamWriter: suspend (outputStream: OutputStream) -> Long,
): Long {
// ensure directory exists
@Suppress("DEPRECATION")
val dir = File("${getExternalStorageDirectory()}/${docFile.dir}")
if (!dir.mkdirs() && !dir.isDirectory) {
throw IOException("Could not create ${dir.absolutePath}")
}
// find non-existing file-name
var file = File(dir, docFile.name)
var i = 0
// we don't support existing files, but at least don't overwrite them when they do exist
while (file.exists()) {
i++
val lastDot = docFile.name.lastIndexOf('.')
val newName = if (lastDot == -1) "${docFile.name} ($i)"
else docFile.name.replaceRange(lastDot..lastDot, " ($i).")
file = File(dir, newName)
}
val bytesWritten = try {
// copy chunk(s) into file
file.outputStream().use { outputStream ->
streamWriter(outputStream)
// TODO should we also calculate and check the chunk IDs?
if (file.isFile && file.length() == docFile.size &&
file.lastModified() == docFile.lastModified
) {
Log.i(TAG, "Not restoring $file, already there unchanged.")
return file.length() // not restoring existing file with same length and date
} else {
var i = 0
// don't overwrite existing files, if they exist
while (file.exists()) { // find non-existing file-name
i++
val lastDot = docFile.name.lastIndexOf('.')
val newName = if (lastDot == -1) "${docFile.name} ($i)"
else docFile.name.replaceRange(lastDot..lastDot, " ($i).")
file = File(dir, newName)
}
} catch (e: IOException) {
file.delete()
throw e
val bytesWritten = try {
// copy chunk(s) into file
file.outputStream().use { outputStream ->
streamWriter(outputStream)
}
} catch (e: IOException) {
file.delete()
throw e
}
// re-set lastModified timestamp
file.setLastModified(docFile.lastModified ?: 0)
// This might be a media file, so do we need to index it.
// Otherwise things like a wrong size of 0 bytes in MediaStore can happen.
indexFile(file)
return bytesWritten
}
// re-set lastModified timestamp
file.setLastModified(docFile.lastModified ?: 0)
// This might be a media file, so do we need to index it.
// Otherwise things like a wrong size of 0 bytes in MediaStore can happen.
indexFile(file)
return bytesWritten
}
@Throws(IOException::class)
@ -103,12 +110,18 @@ internal class FileRestore(
mediaFile: BackupMediaFile,
streamWriter: suspend (outputStream: OutputStream) -> Long,
): Long {
// TODO should we also calculate and check the chunk IDs?
if (mediaScanner.existsMediaFileUnchanged(mediaFile)) {
Log.i(
TAG,
"Not restoring ${mediaFile.path}/${mediaFile.name}, already there unchanged."
)
return mediaFile.size
}
// Insert pending media item into MediaStore
val contentValues = ContentValues().apply {
put(MediaColumns.DISPLAY_NAME, mediaFile.name)
put(MediaColumns.RELATIVE_PATH, mediaFile.path)
// changing owner requires backup permission
put(MediaColumns.OWNER_PACKAGE_NAME, mediaFile.ownerPackageName)
put(MediaColumns.IS_PENDING, 1)
put(MediaColumns.IS_FAVORITE, if (mediaFile.isFavorite) 1 else 0)
}
@ -124,6 +137,9 @@ internal class FileRestore(
contentValues.clear()
contentValues.apply {
put(MediaColumns.IS_PENDING, 0)
// changing owner requires backup permission
// done here because we are not allowed to access pending media we don't own
put(MediaColumns.OWNER_PACKAGE_NAME, mediaFile.ownerPackageName)
}
try {
contentResolver.update(uri, contentValues, null, null)
@ -143,7 +159,6 @@ internal class FileRestore(
}
private fun setLastModifiedOnMediaFile(mediaFile: BackupMediaFile, uri: Uri) {
@Suppress("DEPRECATION")
val extDir = getExternalStorageDirectory()
// re-set lastModified as we can't use the MediaStore for this (read-only property)

View file

@ -21,24 +21,32 @@ internal data class RestorableChunk(
/**
* Call this after the RestorableChunk is complete and **before** using it for restore.
*
* @return the number of duplicate files removed
*/
fun finalize() {
fun finalize(): Int {
// entries in the zip chunk need to be sorted by their index in the zip
files.sortBy { it.zipIndex }
// There might be duplicates in case the *exact* same set of files exists more than once
// so they'll produce the same chunk ID.
// But since the content is there and this is an unlikely scenario, we drop the duplicates.
var lastIndex = 0
var numRemoved = 0
val iterator = files.iterator()
while (iterator.hasNext()) {
val file = iterator.next()
val i = file.zipIndex
when {
i < lastIndex -> error("unsorted list")
i == lastIndex -> iterator.remove() // remove duplicate
i == lastIndex -> { // remove duplicate
numRemoved++
iterator.remove()
}
i > lastIndex -> lastIndex = i // gaps are possible when we don't restore all files
}
}
return numRemoved
}
}
@ -87,6 +95,14 @@ internal data class FileSplitterResult(
* Files referenced in [multiChunkMap] sorted for restoring.
*/
val multiChunkFiles: Collection<RestorableFile>,
/**
* The number of duplicate files that was removed from [zipChunks].
* Duplicate files in [zipChunks] with the same chunk ID will have the same index in the ZIP.
* So we remove them to make restore easier.
* With some extra work, we could restore those files,
* but by not doing so we are probably doing a favor to the user.
*/
val numRemovedDuplicates: Int,
)
/**
@ -121,7 +137,7 @@ internal object FileSplitter {
}
}
// entries in the zip chunk need to be sorted by their index in the zip, duplicated removed
zipChunkMap.values.forEach { zipChunk -> zipChunk.finalize() }
val numRemovedDuplicates = zipChunkMap.values.sumOf { zipChunk -> zipChunk.finalize() }
val singleChunks = chunkMap.values.filter { it.isSingle }
val multiChunks = chunkMap.filterValues { !it.isSingle }
return FileSplitterResult(
@ -129,6 +145,7 @@ internal object FileSplitter {
singleChunks = singleChunks,
multiChunkMap = multiChunks,
multiChunkFiles = getMultiFiles(multiChunks),
numRemovedDuplicates = numRemovedDuplicates,
)
}
@ -145,6 +162,7 @@ internal object FileSplitter {
f1.chunkIdsCount == f2.chunkIdsCount -> {
f1.chunkIds.joinToString().compareTo(f2.chunkIds.joinToString())
}
else -> 1
}
}

View file

@ -18,6 +18,7 @@ public open class NotificationRestoreObserver internal constructor(private val n
private var totalFiles = 0
private var filesRestored = 0
private var filesRemovedAsDuplicates = 0
private var filesWithError = 0
override fun onRestoreStart(numFiles: Int, totalSize: Long) {
@ -25,6 +26,10 @@ public open class NotificationRestoreObserver internal constructor(private val n
n.updateRestoreNotification(filesRestored + filesWithError, totalFiles)
}
override fun onFileDuplicatesRemoved(num: Int) {
filesRemovedAsDuplicates = num
}
override fun onFileRestored(file: BackupFile, bytesWritten: Long, tag: String) {
filesRestored++
n.updateRestoreNotification(filesRestored + filesWithError, totalFiles)
@ -36,7 +41,13 @@ public open class NotificationRestoreObserver internal constructor(private val n
}
override fun onRestoreComplete(restoreDuration: Long) {
n.showRestoreCompleteNotification(filesRestored, totalFiles, getRestoreCompleteIntent())
n.showRestoreCompleteNotification(
restored = filesRestored,
duplicates = filesRemovedAsDuplicates,
errors = filesWithError,
total = totalFiles,
intent = getRestoreCompleteIntent(),
)
}
protected open fun getRestoreCompleteIntent(): PendingIntent? {

View file

@ -110,8 +110,10 @@ internal class Restore(
observer?.onRestoreStart(filesTotal, totalSize)
val split = FileSplitter.splitSnapshot(snapshot)
observer?.onFileDuplicatesRemoved(split.numRemovedDuplicates)
var restoredFiles = split.numRemovedDuplicates // count removed dups, so numbers add up
val version = snapshot.version
var restoredFiles = 0
val smallFilesDuration = measure {
restoredFiles += zipChunkRestore.restore(
version,

View file

@ -7,6 +7,7 @@ package org.calyxos.backup.storage.restore
import android.app.Service
import android.content.Intent
import android.content.pm.ServiceInfo.FOREGROUND_SERVICE_TYPE_MANIFEST
import android.os.IBinder
import android.util.Log
import kotlinx.coroutines.Dispatchers
@ -49,7 +50,11 @@ public abstract class RestoreService : Service() {
if (timestamp < 0) error("No timestamp in intent: $intent")
val storedSnapshot = StoredSnapshot(userId, timestamp)
startForeground(NOTIFICATION_ID_RESTORE, n.getRestoreNotification())
startForeground(
NOTIFICATION_ID_RESTORE,
n.getRestoreNotification(),
FOREGROUND_SERVICE_TYPE_MANIFEST,
)
GlobalScope.launch {
val snapshot = withContext(Dispatchers.Main) {
fileSelectionManager.getBackupSnapshotAndReset()

View file

@ -56,7 +56,7 @@ public class DocumentScanner(context: Context) {
queryUri, PROJECTION, null, null, null
)
val documentFiles = ArrayList<DocFile>(cursor?.count ?: 0)
cursor?.use { it ->
cursor?.use {
while (it.moveToNext()) {
val id = it.getString(PROJECTION_ID)
val documentUri = DocumentsContract.buildDocumentUriUsingTree(uri, id)

View file

@ -16,7 +16,6 @@ import org.calyxos.backup.storage.content.DocFile
import org.calyxos.backup.storage.content.MediaFile
import org.calyxos.backup.storage.db.UriStore
import org.calyxos.backup.storage.measure
import kotlin.time.ExperimentalTime
internal class FileScannerResult(
val smallFiles: List<ContentFile>,
@ -36,7 +35,6 @@ internal class FileScanner(
private const val FILES_LARGE = "large"
}
@OptIn(ExperimentalTime::class)
fun getFiles(): FileScannerResult {
// scan both APIs
val mediaFiles = ArrayList<ContentFile>()

View file

@ -6,6 +6,7 @@
package org.calyxos.backup.storage.scanner
import android.content.ContentResolver.QUERY_ARG_SQL_SELECTION
import android.content.ContentResolver.QUERY_ARG_SQL_SELECTION_ARGS
import android.content.ContentUris
import android.content.Context
import android.database.Cursor
@ -21,6 +22,7 @@ import androidx.core.database.getLongOrNull
import androidx.core.database.getStringOrNull
import org.calyxos.backup.storage.api.BackupFile
import org.calyxos.backup.storage.api.MediaType
import org.calyxos.backup.storage.backup.BackupMediaFile
import org.calyxos.backup.storage.content.MediaFile
import java.io.File
@ -79,6 +81,37 @@ public class MediaScanner(context: Context) {
}
}
internal fun existsMediaFileUnchanged(mediaFile: BackupMediaFile): Boolean {
val uri = MediaType.fromBackupMediaType(mediaFile.type).contentUri
val extras = Bundle().apply {
// search for files with same path and name
val query = StringBuilder().apply {
append("${MediaStore.MediaColumns.MIME_TYPE}!='$MIME_TYPE_DIR'")
append(" AND ")
append("${MediaStore.MediaColumns.RELATIVE_PATH}=?")
append(" AND ")
append("${MediaStore.MediaColumns.DISPLAY_NAME}=?")
}
putString(QUERY_ARG_SQL_SELECTION, query.toString())
val args = arrayOf(
mediaFile.path + "/", // Note trailing slash that is important
mediaFile.name,
)
putStringArray(QUERY_ARG_SQL_SELECTION_ARGS, args)
}
contentResolver.query(uri, PROJECTION, extras, null)?.use { c ->
while (c.moveToNext()) {
val f = createMediaFile(c, uri)
// note that we get seconds, but store milliseconds
if (f.dateModified == mediaFile.lastModified / 1000 && f.size == mediaFile.size) {
return true
}
}
}
return false
}
internal fun getPath(uri: Uri): String? {
val projection = arrayOf(
MediaStore.MediaColumns.RELATIVE_PATH,

View file

@ -116,13 +116,25 @@ internal class Notifications(private val context: Context) {
internal fun showRestoreCompleteNotification(
restored: Int,
duplicates: Int,
errors: Int,
total: Int,
intent: PendingIntent?,
) {
val title = context.getString(R.string.notification_restore_complete_title, restored, total)
val msg = StringBuilder().apply {
if (duplicates > 0) {
append(context.getString(R.string.notification_restore_complete_dups, duplicates))
}
if (errors > 0) {
if (duplicates > 0) append("\n")
append(context.getString(R.string.notification_restore_complete_errors, errors))
}
}.toString().ifEmpty { null }
val notification = NotificationCompat.Builder(context, CHANNEL_ID_BACKUP).apply {
setSmallIcon(R.drawable.ic_cloud_done)
setContentTitle(title)
setContentText(msg)
setOngoing(false)
setShowWhen(true)
setAutoCancel(true)

View file

@ -138,7 +138,7 @@ public class FileSelectionManager {
val name = if (i >= parts.size - 1) {
parts[i]
} else {
parts.subList(i, parts.size).joinToString { "/" }
parts.subList(i, parts.size).joinToString("/")
}
levels[folder] = Pair(subPathLevel.first + 1, name)
return@forEach

View file

@ -18,6 +18,8 @@
<string name="notification_restore_title">Restoring files…</string>
<string name="notification_restore_info">%1$d/%2$d</string>
<string name="notification_restore_complete_title">%1$d of %2$d files restored</string>
<string name="notification_restore_complete_dups">%1$d files were duplicates.</string>
<string name="notification_restore_complete_errors">%1$d files had errors.</string>
<string name="snapshots_title">Available storage backups</string>
<string name="snapshots_empty">No storage backups found\n\nSorry, but there is nothing that can be restored.</string>