Skip to content

Commit

Permalink
Validate S3 file list can be downloaded to file system (#17)
Browse files Browse the repository at this point in the history
* Add validation of file list from S3 to ensure we can download it

It is possible to have an S3 file with the same name as a folder, which is not possible in a file system

* Add ignoreFileFolderClashes flag, optimize validateFileList
  • Loading branch information
adam-fowler authored Sep 2, 2021
1 parent a24e24f commit 6a35d73
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.build
.git
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ extension S3FileTransferManager {
/// VersionId used to reference a specific version of the object.
public let versionId: String?

public init(expectedBucketOwner: String? = nil, ifMatch: String? = nil, ifModifiedSince: Date? = nil, ifNoneMatch: String? = nil, ifUnmodifiedSince: Date? = nil, requestPayer: S3.RequestPayer? = nil, sSECustomerAlgorithm: String? = nil, sSECustomerKey: String? = nil, sSECustomerKeyMD5: String? = nil, versionId: String? = nil) {
/// Ignore file folder clashes when downloading folders. When set to true the file is not downloaded
public let ignoreFileFolderClashes: Bool

public init(expectedBucketOwner: String? = nil, ifMatch: String? = nil, ifModifiedSince: Date? = nil, ifNoneMatch: String? = nil, ifUnmodifiedSince: Date? = nil, requestPayer: S3.RequestPayer? = nil, sSECustomerAlgorithm: String? = nil, sSECustomerKey: String? = nil, sSECustomerKeyMD5: String? = nil, versionId: String? = nil, ignoreFileFolderClashes: Bool = false) {
self.expectedBucketOwner = expectedBucketOwner
self.ifMatch = ifMatch
self.ifModifiedSince = ifModifiedSince
Expand All @@ -131,6 +134,8 @@ extension S3FileTransferManager {
self.sSECustomerKey = sSECustomerKey
self.sSECustomerKeyMD5 = sSECustomerKeyMD5
self.versionId = versionId

self.ignoreFileFolderClashes = ignoreFileFolderClashes
}
}

Expand Down
34 changes: 32 additions & 2 deletions Sources/SotoS3FileTransfer/S3FileTransferManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,14 @@ public struct S3FileTransferManager {

/// Errors created by S3TransferManager
public enum Error: Swift.Error {
/// File you referenced doesn't exist
case fileDoesNotExist(String)
/// Failed to create a folder during a download
case failedToCreateFolder(String)
/// Failed to enumerate the contents of a folder
case failedToEnumerateFolder(String)
/// Cannot download file from S3 as it is a folder on your local file system
case fileFolderClash(String, String)
}

/// S3 service object
Expand Down Expand Up @@ -290,6 +295,7 @@ public struct S3FileTransferManager {
) -> EventLoopFuture<Void> {
let eventLoop = self.s3.eventLoopGroup.next()
return listFiles(in: s3Folder)
.flatMapThrowing { try self.validateFileList($0, ignoreClashes: options.ignoreFileFolderClashes) }
.flatMap { files in
let taskQueue = TaskQueue<Void>(maxConcurrentTasks: configuration.maxConcurrentTasks, on: eventLoop)
let transfers = Self.targetFiles(files: files, from: s3Folder, to: folder)
Expand Down Expand Up @@ -393,12 +399,14 @@ public struct S3FileTransferManager {
public func sync(
from s3Folder: S3Folder,
to folder: String,
delete: Bool, options: GetOptions = .init(),
delete: Bool,
options: GetOptions = .init(),
progress: @escaping (Double) throws -> Void = { _ in }
) -> EventLoopFuture<Void> {
let eventLoop = self.s3.eventLoopGroup.next()

return listFiles(in: folder).and(listFiles(in: s3Folder))
return listFiles(in: folder)
.and(listFiles(in: s3Folder).flatMapThrowing { try self.validateFileList($0, ignoreClashes: options.ignoreFileFolderClashes) })
.flatMap { files, s3Files in
let taskQueue = TaskQueue<Void>(maxConcurrentTasks: configuration.maxConcurrentTasks, on: eventLoop)
let targetFiles = Self.targetFiles(files: s3Files, from: s3Folder, to: folder)
Expand Down Expand Up @@ -567,6 +575,28 @@ extension S3FileTransferManager {
}
}

/// Validate we can save file list from S3 to file system
///
/// Look for files that are the same name as directory names in other files
func validateFileList(_ list: [S3FileDescriptor], ignoreClashes: Bool) throws -> [S3FileDescriptor] {
let list = list.sorted { $0.file.key < $1.file.key }
return try list.reduce([]) { result, file -> [S3FileDescriptor] in
let filename = file.file.key
return try result.compactMap {
let prevFilename = $0.file.key
if filename.hasPrefix(prevFilename),
(prevFilename.last == "/" || filename.dropFirst(prevFilename.count).first == "/") {
if ignoreClashes {
return nil
} else {
throw Error.fileFolderClash(prevFilename, filename)
}
}
return $0
} + [file]
}
}

/// delete a local file
func delete(_ file: String) -> EventLoopFuture<Void> {
self.logger.info("Deleting \(file)")
Expand Down
33 changes: 33 additions & 0 deletions Tests/SotoS3FileTransferTests/S3FileTransferManagerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,39 @@ final class S3FileTransferManagerTests: XCTestCase {
}
}

/// check the correct error is thrown when trying to download a file on top of a folder
func testFileFolderClash() {
let folder = S3Folder(bucket: Self.bucketName, key: "testFileFolderClash")
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("folder"), bucket: Self.bucketName, key: "testFileFolderClash/fold")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("folder"), bucket: Self.bucketName, key: "testFileFolderClash/folder")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("folder"), bucket: Self.bucketName, key: "testFileFolderClash/folder*")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("file"), bucket: Self.bucketName, key: "testFileFolderClash/folder/file")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("file"), bucket: Self.bucketName, key: "testFileFolderClash/folder/file2")).wait())
XCTAssertThrowsError(try Self.s3FileTransfer.copy(from: folder, to: Self.tempFolder).wait()) { error in
switch error {
case S3FileTransferManager.Error.fileFolderClash(let file1, let file2):
XCTAssertEqual(file1, "testFileFolderClash/folder")
XCTAssertEqual(file2, "testFileFolderClash/folder/file")
default:
XCTFail("\(error)")
}
}
}

/// check no error is thrown when trying to download a file with the same prefix
func testIgnoreFileFolderClash() {
let folder = S3Folder(bucket: Self.bucketName, key: "testIgnoreFileFolderClash")
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("folder"), bucket: Self.bucketName, key: "testIgnoreFileFolderClash/fold")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("folder"), bucket: Self.bucketName, key: "testIgnoreFileFolderClash/folder")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("folder"), bucket: Self.bucketName, key: "testIgnoreFileFolderClash/folder*")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("file"), bucket: Self.bucketName, key: "testIgnoreFileFolderClash/folder/file")).wait())
XCTAssertNoThrow(try Self.s3.putObject(.init(body: .string("file"), bucket: Self.bucketName, key: "testIgnoreFileFolderClash/folder/file2")).wait())
XCTAssertNoThrow(try Self.s3FileTransfer.copy(from: folder, to: Self.tempFolder + "/testIgnoreFileFolderClash", options: .init(ignoreFileFolderClashes: true)).wait())
var files: [S3FileTransferManager.FileDescriptor]?
XCTAssertNoThrow(files = try Self.s3FileTransfer.listFiles(in: Self.tempFolder + "/testIgnoreFileFolderClash").wait())
XCTAssertEqual(files?.count, 4)
}

static var rootPath: String {
return #file
.split(separator: "/", omittingEmptySubsequences: false)
Expand Down

0 comments on commit 6a35d73

Please sign in to comment.