mirror of
https://github.com/GayPizzaSpecifications/darwin-apk.git
synced 2025-08-03 05:30:54 +00:00
zlib-based homecooked gzip reader for significantly faster decompression times
This commit is contained in:
@ -8,14 +8,10 @@ let package = Package(
|
||||
],
|
||||
dependencies: [
|
||||
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.5.0"),
|
||||
.package(url: "https://github.com/tsolomko/SWCompression", from: "4.8.6"),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
name: "darwin-apk",
|
||||
dependencies: [
|
||||
.product(name: "SWCompression", package: "SWCompression"),
|
||||
],
|
||||
path: "Sources/apk"),
|
||||
.executableTarget(
|
||||
name: "dpk",
|
||||
|
@ -4,7 +4,6 @@
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
import SWCompression
|
||||
import CryptoKit
|
||||
|
||||
public struct ApkIndexUpdater {
|
||||
@ -64,23 +63,46 @@ public struct ApkIndexUpdater {
|
||||
let tarSignature: [TarReader.Entry]
|
||||
let tarRecords: [TarReader.Entry]
|
||||
|
||||
let tars = try GzipArchive.multiUnarchive( // Slow...
|
||||
archive: Data(contentsOf: indexURL))
|
||||
assert(tars.count >= 2)
|
||||
print("Archive: \(indexURL.lastPathComponent)")
|
||||
|
||||
var signatureStream = MemoryInputStream(buffer: tars[0].data)
|
||||
let durFormat = Duration.UnitsFormatStyle(
|
||||
allowedUnits: [ .seconds, .milliseconds ],
|
||||
width: .condensedAbbreviated,
|
||||
fractionalPart: .show(length: 3))
|
||||
let gzipStart = ContinuousClock.now
|
||||
|
||||
var tars = [Data]()
|
||||
do {
|
||||
var file: any InputStream = try FileInputStream(indexURL)
|
||||
//var file: any InputStream = try MemoryInputStream(buffer: try Data(contentsOf: indexURL))
|
||||
tars.append(try GZip.read(inStream: &file))
|
||||
tars.append(try GZip.read(inStream: &file))
|
||||
|
||||
} catch {
|
||||
fatalError(error.localizedDescription)
|
||||
}
|
||||
|
||||
print("Gzip time: \((ContinuousClock.now - gzipStart).formatted(durFormat))")
|
||||
let untarStart = ContinuousClock.now
|
||||
|
||||
var signatureStream = MemoryInputStream(buffer: tars[0])
|
||||
tarSignature = try TarReader.read(&signatureStream)
|
||||
var recordsStream = MemoryInputStream(buffer: tars[1].data)
|
||||
var recordsStream = MemoryInputStream(buffer: tars[1])
|
||||
tarRecords = try TarReader.read(&recordsStream)
|
||||
|
||||
guard case .file(let signatureName, _) = tarSignature.first
|
||||
else { fatalError("Missing signature") }
|
||||
print(signatureName)
|
||||
guard let apkIndexFile = tarRecords.firstFile(name: "APKINDEX")
|
||||
else { fatalError("APKINDEX missing") }
|
||||
guard let description = tarRecords.firstFile(name: "DESCRIPTION")
|
||||
else { fatalError("DESCRIPTION missing") }
|
||||
|
||||
print("TAR time: \((ContinuousClock.now - untarStart).formatted(durFormat))")
|
||||
let indexStart = ContinuousClock.now
|
||||
defer {
|
||||
print("Index time: \((ContinuousClock.now - indexStart).formatted(durFormat))")
|
||||
}
|
||||
|
||||
let reader = TextInputStream(binaryStream: MemoryInputStream(buffer: apkIndexFile))
|
||||
return try ApkIndex(raw:
|
||||
try ApkRawIndex(lines: reader.lines))
|
||||
|
@ -4,6 +4,8 @@
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
import Darwin
|
||||
import System
|
||||
|
||||
public struct FileInputStream: InputStream {
|
||||
private var _hnd: FileHandle
|
||||
@ -72,4 +74,12 @@ public struct FileInputStream: InputStream {
|
||||
throw .fileHandleError(error)
|
||||
}
|
||||
}
|
||||
|
||||
public mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) throws(StreamError) -> Int {
|
||||
let res = unistd.read(self._hnd.fileDescriptor, buffer, len)
|
||||
if res < 0 {
|
||||
throw .fileHandleError(Errno(rawValue: errno))
|
||||
}
|
||||
return res
|
||||
}
|
||||
}
|
||||
|
203
Sources/apk/Utility/GZip.swift
Normal file
203
Sources/apk/Utility/GZip.swift
Normal file
@ -0,0 +1,203 @@
|
||||
/*
|
||||
* darwin-apk © 2024 Gay Pizza Specifications
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
import zlib
|
||||
|
||||
struct GZip {
|
||||
static let CM_DEFLATE: UInt8 = 8
|
||||
|
||||
static let FTEXT: UInt8 = 1 << 0
|
||||
static let FHCRC: UInt8 = 1 << 1
|
||||
static let FEXTRA: UInt8 = 1 << 2
|
||||
static let FNAME: UInt8 = 1 << 3
|
||||
static let FCOMMENT: UInt8 = 1 << 4
|
||||
|
||||
static let XFL_BEST: UInt8 = 2
|
||||
static let XFL_FASTEST: UInt8 = 4
|
||||
|
||||
private static func skipString(_ stream: inout any InputStream) throws(GZipError) {
|
||||
var c: UInt8?
|
||||
repeat {
|
||||
c = stream.next()
|
||||
guard c != nil else {
|
||||
throw .truncatedStream
|
||||
}
|
||||
} while c != 0
|
||||
}
|
||||
|
||||
static func read(inStream stream: inout any InputStream) throws(GZipError) -> Data {
|
||||
// Check Gzip magic signature
|
||||
guard (try? stream.read(2)) == Data([0x1F, 0x8B]) else {
|
||||
throw .badMagic
|
||||
}
|
||||
|
||||
// Check compression field (should only ever be DEFLATE)
|
||||
guard let compression = stream.next(),
|
||||
compression == Self.CM_DEFLATE else {
|
||||
throw .badHeader
|
||||
}
|
||||
|
||||
guard
|
||||
let flags = stream.next(),
|
||||
let modificationTime = stream.readUInt(),
|
||||
let extraFlags = stream.next(),
|
||||
let operatingSystemID = stream.next() else {
|
||||
throw .truncatedStream
|
||||
}
|
||||
|
||||
|
||||
if flags & Self.FEXTRA != 0 {
|
||||
// Skip "extra" field
|
||||
guard let extraLength = stream.readUShort() else {
|
||||
throw.truncatedStream
|
||||
}
|
||||
do {
|
||||
try stream.seek(.current(Int(extraLength)))
|
||||
} catch {
|
||||
throw .streamError(error)
|
||||
}
|
||||
}
|
||||
if flags & Self.FNAME != 0 {
|
||||
// Skip null-terminated name string
|
||||
try skipString(&stream)
|
||||
}
|
||||
if flags & Self.FCOMMENT != 0 {
|
||||
// Skip null-terminated comment string
|
||||
try skipString(&stream)
|
||||
}
|
||||
if flags & Self.FHCRC != 0 {
|
||||
guard let crc16 = stream.readUShort() else {
|
||||
throw .badField("crc16")
|
||||
}
|
||||
}
|
||||
|
||||
let deflateBegin: Int
|
||||
do {
|
||||
deflateBegin = try stream.tell
|
||||
} catch {
|
||||
throw .streamError(error)
|
||||
}
|
||||
|
||||
var payload = Data()
|
||||
let (streamLength, computedCRC) = try Self.deflate(payload: &payload, stream: &stream)
|
||||
|
||||
// End-of-stream verification fields
|
||||
do {
|
||||
try stream.seek(.set(deflateBegin + streamLength))
|
||||
} catch {
|
||||
throw .streamError(error)
|
||||
}
|
||||
guard
|
||||
let crc = stream.readUInt(),
|
||||
let inputSizeMod32 = stream.readUInt() else {
|
||||
throw .truncatedStream
|
||||
}
|
||||
|
||||
// Perform verification checks
|
||||
guard UInt32(truncatingIfNeeded: computedCRC) == crc else {
|
||||
throw .verificationFailed("CRC32 didn't match")
|
||||
}
|
||||
guard inputSizeMod32 == UInt32(truncatingIfNeeded: payload.count) else {
|
||||
throw .verificationFailed("Bad decompressed size")
|
||||
}
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
private static func deflate(payload: inout Data, stream: inout any InputStream) throws(GZipError) -> (Int, UInt) {
|
||||
var zstream = z_stream()
|
||||
var zerr = inflateInit2_(&zstream, -15, ZLIB_VERSION, Int32(MemoryLayout<z_stream>.size))
|
||||
guard zerr == Z_OK else {
|
||||
throw .zlib(zerr)
|
||||
}
|
||||
|
||||
defer {
|
||||
inflateEnd(&zstream)
|
||||
}
|
||||
|
||||
let bufferSize = 0x8000
|
||||
var inputBuffer = [UInt8](repeating: 0, count: bufferSize)
|
||||
var outputBuffer = [UInt8](repeating: 0, count: bufferSize)
|
||||
|
||||
var computeCRC: UInt = crc32(0, nil, 0)
|
||||
var block = 0
|
||||
repeat {
|
||||
if zstream.avail_in == 0 {
|
||||
let read: Int
|
||||
do {
|
||||
read = try stream.read(&inputBuffer, maxLength: inputBuffer.count)
|
||||
} catch {
|
||||
throw .streamError(error)
|
||||
}
|
||||
guard read > 0 else {
|
||||
throw .truncatedStream
|
||||
}
|
||||
zstream.avail_in = UInt32(read)
|
||||
zstream.next_in = inputBuffer.withUnsafeMutableBufferPointer(\.baseAddress!)
|
||||
}
|
||||
zstream.avail_out = UInt32(outputBuffer.count)
|
||||
zstream.next_out = outputBuffer.withUnsafeMutableBufferPointer(\.baseAddress!)
|
||||
zerr = inflate(&zstream, Z_BLOCK)
|
||||
|
||||
let decodedBytes = outputBuffer.count - Int(zstream.avail_out)
|
||||
computeCRC = crc32(computeCRC, outputBuffer, UInt32(decodedBytes))
|
||||
payload += Data(outputBuffer[..<decodedBytes])
|
||||
block += decodedBytes
|
||||
|
||||
if zstream.data_type & (1 << 7) != 0 {
|
||||
// At the end of a deflate block, we're done if it was empty
|
||||
if block == 0 {
|
||||
break
|
||||
}
|
||||
block = 0
|
||||
}
|
||||
} while zerr == Z_OK
|
||||
|
||||
guard zerr == Z_STREAM_END else {
|
||||
throw .zlib(zerr)
|
||||
}
|
||||
|
||||
return (Int(zstream.total_in), computeCRC)
|
||||
}
|
||||
}
|
||||
|
||||
enum GZipError: LocalizedError {
|
||||
case streamError(_ err: StreamError)
|
||||
case verificationFailed(_ msg: String)
|
||||
case badMagic
|
||||
case badHeader
|
||||
case badField(_ name: String)
|
||||
case truncatedStream
|
||||
case zlib(_ err: Int32)
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .verificationFailed(let msg): msg
|
||||
case .streamError(let err): "Underlying stream error: \(err.localizedDescription)"
|
||||
case .badMagic: "Not a Gzip file"
|
||||
case .badHeader: "Malformed Gzip header"
|
||||
case .badField(let name): "Bad Gzip \(name) field"
|
||||
case .truncatedStream: "Reached end-of-stream before decoding finished"
|
||||
case .zlib(let err): "zlib error \(err)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fileprivate extension InputStream {
|
||||
mutating func readUShort() -> UInt16? {
|
||||
guard let buffer = try? self.read(2), buffer.count == 2 else {
|
||||
return nil
|
||||
}
|
||||
return buffer.withUnsafeBytes { $0.load(as: UInt16.self) }.littleEndian
|
||||
}
|
||||
|
||||
mutating func readUInt() -> UInt32? {
|
||||
guard let buffer = try? self.read(4), buffer.count == 4 else {
|
||||
return nil
|
||||
}
|
||||
return buffer.withUnsafeBytes { $0.load(as: UInt32.self) }.littleEndian
|
||||
}
|
||||
}
|
@ -5,10 +5,9 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
public protocol InputStream: Stream, IteratorProtocol {
|
||||
associatedtype Element = UInt8
|
||||
|
||||
public protocol InputStream: Stream, IteratorProtocol where Element == UInt8 {
|
||||
mutating func read(_ count: Int) throws(StreamError) -> Data
|
||||
mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) throws(StreamError) -> Int
|
||||
}
|
||||
|
||||
public extension InputStream {
|
||||
|
@ -52,6 +52,15 @@ public struct MemoryInputStream: InputStream {
|
||||
return bytes
|
||||
}
|
||||
|
||||
public mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength count: Int) throws(StreamError) -> Int {
|
||||
let beg = min(self._idx, self._len)
|
||||
let end = min(self._idx + count, self._len)
|
||||
let len = beg.distance(to: end)
|
||||
let buf = UnsafeMutableRawBufferPointer(start: buffer, count: len)
|
||||
self._idx += len
|
||||
return self._sli.copyBytes(to: buf, from: beg..<end)
|
||||
}
|
||||
|
||||
public mutating func next() -> UInt8? {
|
||||
if self._idx < self._len {
|
||||
let byte = self._sli[self._idx]
|
||||
|
@ -4,6 +4,7 @@
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
import System
|
||||
|
||||
public protocol Stream {
|
||||
mutating func seek(_ whence: StreamWhence) throws(StreamError)
|
||||
@ -21,6 +22,7 @@ public enum StreamError: Error, LocalizedError {
|
||||
case seekRange
|
||||
case overflow
|
||||
case fileHandleError(_ error: any Error)
|
||||
case fileDescriptorError(_ error: Errno)
|
||||
|
||||
public var errorDescription: String? {
|
||||
switch self {
|
||||
@ -28,6 +30,7 @@ public enum StreamError: Error, LocalizedError {
|
||||
case .seekRange: "Seek out of range"
|
||||
case .overflow: "Stream position overflowed"
|
||||
case .fileHandleError(let error): "Error from file handle: \(error.localizedDescription)"
|
||||
case .fileDescriptorError(let error): "\(error)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user