mirror of
https://github.com/GayPizzaSpecifications/darwin-apk.git
synced 2025-08-03 21:41:31 +00:00
zlib-based homecooked gzip reader for significantly faster decompression times
This commit is contained in:
@ -8,14 +8,10 @@ let package = Package(
|
|||||||
],
|
],
|
||||||
dependencies: [
|
dependencies: [
|
||||||
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.5.0"),
|
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.5.0"),
|
||||||
.package(url: "https://github.com/tsolomko/SWCompression", from: "4.8.6"),
|
|
||||||
],
|
],
|
||||||
targets: [
|
targets: [
|
||||||
.target(
|
.target(
|
||||||
name: "darwin-apk",
|
name: "darwin-apk",
|
||||||
dependencies: [
|
|
||||||
.product(name: "SWCompression", package: "SWCompression"),
|
|
||||||
],
|
|
||||||
path: "Sources/apk"),
|
path: "Sources/apk"),
|
||||||
.executableTarget(
|
.executableTarget(
|
||||||
name: "dpk",
|
name: "dpk",
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
import SWCompression
|
|
||||||
import CryptoKit
|
import CryptoKit
|
||||||
|
|
||||||
public struct ApkIndexUpdater {
|
public struct ApkIndexUpdater {
|
||||||
@ -64,23 +63,46 @@ public struct ApkIndexUpdater {
|
|||||||
let tarSignature: [TarReader.Entry]
|
let tarSignature: [TarReader.Entry]
|
||||||
let tarRecords: [TarReader.Entry]
|
let tarRecords: [TarReader.Entry]
|
||||||
|
|
||||||
let tars = try GzipArchive.multiUnarchive( // Slow...
|
print("Archive: \(indexURL.lastPathComponent)")
|
||||||
archive: Data(contentsOf: indexURL))
|
|
||||||
assert(tars.count >= 2)
|
|
||||||
|
|
||||||
var signatureStream = MemoryInputStream(buffer: tars[0].data)
|
let durFormat = Duration.UnitsFormatStyle(
|
||||||
|
allowedUnits: [ .seconds, .milliseconds ],
|
||||||
|
width: .condensedAbbreviated,
|
||||||
|
fractionalPart: .show(length: 3))
|
||||||
|
let gzipStart = ContinuousClock.now
|
||||||
|
|
||||||
|
var tars = [Data]()
|
||||||
|
do {
|
||||||
|
var file: any InputStream = try FileInputStream(indexURL)
|
||||||
|
//var file: any InputStream = try MemoryInputStream(buffer: try Data(contentsOf: indexURL))
|
||||||
|
tars.append(try GZip.read(inStream: &file))
|
||||||
|
tars.append(try GZip.read(inStream: &file))
|
||||||
|
|
||||||
|
} catch {
|
||||||
|
fatalError(error.localizedDescription)
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Gzip time: \((ContinuousClock.now - gzipStart).formatted(durFormat))")
|
||||||
|
let untarStart = ContinuousClock.now
|
||||||
|
|
||||||
|
var signatureStream = MemoryInputStream(buffer: tars[0])
|
||||||
tarSignature = try TarReader.read(&signatureStream)
|
tarSignature = try TarReader.read(&signatureStream)
|
||||||
var recordsStream = MemoryInputStream(buffer: tars[1].data)
|
var recordsStream = MemoryInputStream(buffer: tars[1])
|
||||||
tarRecords = try TarReader.read(&recordsStream)
|
tarRecords = try TarReader.read(&recordsStream)
|
||||||
|
|
||||||
guard case .file(let signatureName, _) = tarSignature.first
|
guard case .file(let signatureName, _) = tarSignature.first
|
||||||
else { fatalError("Missing signature") }
|
else { fatalError("Missing signature") }
|
||||||
print(signatureName)
|
|
||||||
guard let apkIndexFile = tarRecords.firstFile(name: "APKINDEX")
|
guard let apkIndexFile = tarRecords.firstFile(name: "APKINDEX")
|
||||||
else { fatalError("APKINDEX missing") }
|
else { fatalError("APKINDEX missing") }
|
||||||
guard let description = tarRecords.firstFile(name: "DESCRIPTION")
|
guard let description = tarRecords.firstFile(name: "DESCRIPTION")
|
||||||
else { fatalError("DESCRIPTION missing") }
|
else { fatalError("DESCRIPTION missing") }
|
||||||
|
|
||||||
|
print("TAR time: \((ContinuousClock.now - untarStart).formatted(durFormat))")
|
||||||
|
let indexStart = ContinuousClock.now
|
||||||
|
defer {
|
||||||
|
print("Index time: \((ContinuousClock.now - indexStart).formatted(durFormat))")
|
||||||
|
}
|
||||||
|
|
||||||
let reader = TextInputStream(binaryStream: MemoryInputStream(buffer: apkIndexFile))
|
let reader = TextInputStream(binaryStream: MemoryInputStream(buffer: apkIndexFile))
|
||||||
return try ApkIndex(raw:
|
return try ApkIndex(raw:
|
||||||
try ApkRawIndex(lines: reader.lines))
|
try ApkRawIndex(lines: reader.lines))
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
import Darwin
|
||||||
|
import System
|
||||||
|
|
||||||
public struct FileInputStream: InputStream {
|
public struct FileInputStream: InputStream {
|
||||||
private var _hnd: FileHandle
|
private var _hnd: FileHandle
|
||||||
@ -72,4 +74,12 @@ public struct FileInputStream: InputStream {
|
|||||||
throw .fileHandleError(error)
|
throw .fileHandleError(error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) throws(StreamError) -> Int {
|
||||||
|
let res = unistd.read(self._hnd.fileDescriptor, buffer, len)
|
||||||
|
if res < 0 {
|
||||||
|
throw .fileHandleError(Errno(rawValue: errno))
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
203
Sources/apk/Utility/GZip.swift
Normal file
203
Sources/apk/Utility/GZip.swift
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
/*
|
||||||
|
* darwin-apk © 2024 Gay Pizza Specifications
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
struct GZip {
|
||||||
|
static let CM_DEFLATE: UInt8 = 8
|
||||||
|
|
||||||
|
static let FTEXT: UInt8 = 1 << 0
|
||||||
|
static let FHCRC: UInt8 = 1 << 1
|
||||||
|
static let FEXTRA: UInt8 = 1 << 2
|
||||||
|
static let FNAME: UInt8 = 1 << 3
|
||||||
|
static let FCOMMENT: UInt8 = 1 << 4
|
||||||
|
|
||||||
|
static let XFL_BEST: UInt8 = 2
|
||||||
|
static let XFL_FASTEST: UInt8 = 4
|
||||||
|
|
||||||
|
private static func skipString(_ stream: inout any InputStream) throws(GZipError) {
|
||||||
|
var c: UInt8?
|
||||||
|
repeat {
|
||||||
|
c = stream.next()
|
||||||
|
guard c != nil else {
|
||||||
|
throw .truncatedStream
|
||||||
|
}
|
||||||
|
} while c != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
static func read(inStream stream: inout any InputStream) throws(GZipError) -> Data {
|
||||||
|
// Check Gzip magic signature
|
||||||
|
guard (try? stream.read(2)) == Data([0x1F, 0x8B]) else {
|
||||||
|
throw .badMagic
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check compression field (should only ever be DEFLATE)
|
||||||
|
guard let compression = stream.next(),
|
||||||
|
compression == Self.CM_DEFLATE else {
|
||||||
|
throw .badHeader
|
||||||
|
}
|
||||||
|
|
||||||
|
guard
|
||||||
|
let flags = stream.next(),
|
||||||
|
let modificationTime = stream.readUInt(),
|
||||||
|
let extraFlags = stream.next(),
|
||||||
|
let operatingSystemID = stream.next() else {
|
||||||
|
throw .truncatedStream
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if flags & Self.FEXTRA != 0 {
|
||||||
|
// Skip "extra" field
|
||||||
|
guard let extraLength = stream.readUShort() else {
|
||||||
|
throw.truncatedStream
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
try stream.seek(.current(Int(extraLength)))
|
||||||
|
} catch {
|
||||||
|
throw .streamError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if flags & Self.FNAME != 0 {
|
||||||
|
// Skip null-terminated name string
|
||||||
|
try skipString(&stream)
|
||||||
|
}
|
||||||
|
if flags & Self.FCOMMENT != 0 {
|
||||||
|
// Skip null-terminated comment string
|
||||||
|
try skipString(&stream)
|
||||||
|
}
|
||||||
|
if flags & Self.FHCRC != 0 {
|
||||||
|
guard let crc16 = stream.readUShort() else {
|
||||||
|
throw .badField("crc16")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let deflateBegin: Int
|
||||||
|
do {
|
||||||
|
deflateBegin = try stream.tell
|
||||||
|
} catch {
|
||||||
|
throw .streamError(error)
|
||||||
|
}
|
||||||
|
|
||||||
|
var payload = Data()
|
||||||
|
let (streamLength, computedCRC) = try Self.deflate(payload: &payload, stream: &stream)
|
||||||
|
|
||||||
|
// End-of-stream verification fields
|
||||||
|
do {
|
||||||
|
try stream.seek(.set(deflateBegin + streamLength))
|
||||||
|
} catch {
|
||||||
|
throw .streamError(error)
|
||||||
|
}
|
||||||
|
guard
|
||||||
|
let crc = stream.readUInt(),
|
||||||
|
let inputSizeMod32 = stream.readUInt() else {
|
||||||
|
throw .truncatedStream
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform verification checks
|
||||||
|
guard UInt32(truncatingIfNeeded: computedCRC) == crc else {
|
||||||
|
throw .verificationFailed("CRC32 didn't match")
|
||||||
|
}
|
||||||
|
guard inputSizeMod32 == UInt32(truncatingIfNeeded: payload.count) else {
|
||||||
|
throw .verificationFailed("Bad decompressed size")
|
||||||
|
}
|
||||||
|
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func deflate(payload: inout Data, stream: inout any InputStream) throws(GZipError) -> (Int, UInt) {
|
||||||
|
var zstream = z_stream()
|
||||||
|
var zerr = inflateInit2_(&zstream, -15, ZLIB_VERSION, Int32(MemoryLayout<z_stream>.size))
|
||||||
|
guard zerr == Z_OK else {
|
||||||
|
throw .zlib(zerr)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer {
|
||||||
|
inflateEnd(&zstream)
|
||||||
|
}
|
||||||
|
|
||||||
|
let bufferSize = 0x8000
|
||||||
|
var inputBuffer = [UInt8](repeating: 0, count: bufferSize)
|
||||||
|
var outputBuffer = [UInt8](repeating: 0, count: bufferSize)
|
||||||
|
|
||||||
|
var computeCRC: UInt = crc32(0, nil, 0)
|
||||||
|
var block = 0
|
||||||
|
repeat {
|
||||||
|
if zstream.avail_in == 0 {
|
||||||
|
let read: Int
|
||||||
|
do {
|
||||||
|
read = try stream.read(&inputBuffer, maxLength: inputBuffer.count)
|
||||||
|
} catch {
|
||||||
|
throw .streamError(error)
|
||||||
|
}
|
||||||
|
guard read > 0 else {
|
||||||
|
throw .truncatedStream
|
||||||
|
}
|
||||||
|
zstream.avail_in = UInt32(read)
|
||||||
|
zstream.next_in = inputBuffer.withUnsafeMutableBufferPointer(\.baseAddress!)
|
||||||
|
}
|
||||||
|
zstream.avail_out = UInt32(outputBuffer.count)
|
||||||
|
zstream.next_out = outputBuffer.withUnsafeMutableBufferPointer(\.baseAddress!)
|
||||||
|
zerr = inflate(&zstream, Z_BLOCK)
|
||||||
|
|
||||||
|
let decodedBytes = outputBuffer.count - Int(zstream.avail_out)
|
||||||
|
computeCRC = crc32(computeCRC, outputBuffer, UInt32(decodedBytes))
|
||||||
|
payload += Data(outputBuffer[..<decodedBytes])
|
||||||
|
block += decodedBytes
|
||||||
|
|
||||||
|
if zstream.data_type & (1 << 7) != 0 {
|
||||||
|
// At the end of a deflate block, we're done if it was empty
|
||||||
|
if block == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
block = 0
|
||||||
|
}
|
||||||
|
} while zerr == Z_OK
|
||||||
|
|
||||||
|
guard zerr == Z_STREAM_END else {
|
||||||
|
throw .zlib(zerr)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (Int(zstream.total_in), computeCRC)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum GZipError: LocalizedError {
|
||||||
|
case streamError(_ err: StreamError)
|
||||||
|
case verificationFailed(_ msg: String)
|
||||||
|
case badMagic
|
||||||
|
case badHeader
|
||||||
|
case badField(_ name: String)
|
||||||
|
case truncatedStream
|
||||||
|
case zlib(_ err: Int32)
|
||||||
|
|
||||||
|
var errorDescription: String? {
|
||||||
|
switch self {
|
||||||
|
case .verificationFailed(let msg): msg
|
||||||
|
case .streamError(let err): "Underlying stream error: \(err.localizedDescription)"
|
||||||
|
case .badMagic: "Not a Gzip file"
|
||||||
|
case .badHeader: "Malformed Gzip header"
|
||||||
|
case .badField(let name): "Bad Gzip \(name) field"
|
||||||
|
case .truncatedStream: "Reached end-of-stream before decoding finished"
|
||||||
|
case .zlib(let err): "zlib error \(err)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fileprivate extension InputStream {
|
||||||
|
mutating func readUShort() -> UInt16? {
|
||||||
|
guard let buffer = try? self.read(2), buffer.count == 2 else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return buffer.withUnsafeBytes { $0.load(as: UInt16.self) }.littleEndian
|
||||||
|
}
|
||||||
|
|
||||||
|
mutating func readUInt() -> UInt32? {
|
||||||
|
guard let buffer = try? self.read(4), buffer.count == 4 else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return buffer.withUnsafeBytes { $0.load(as: UInt32.self) }.littleEndian
|
||||||
|
}
|
||||||
|
}
|
@ -5,10 +5,9 @@
|
|||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
|
||||||
public protocol InputStream: Stream, IteratorProtocol {
|
public protocol InputStream: Stream, IteratorProtocol where Element == UInt8 {
|
||||||
associatedtype Element = UInt8
|
|
||||||
|
|
||||||
mutating func read(_ count: Int) throws(StreamError) -> Data
|
mutating func read(_ count: Int) throws(StreamError) -> Data
|
||||||
|
mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) throws(StreamError) -> Int
|
||||||
}
|
}
|
||||||
|
|
||||||
public extension InputStream {
|
public extension InputStream {
|
||||||
|
@ -52,6 +52,15 @@ public struct MemoryInputStream: InputStream {
|
|||||||
return bytes
|
return bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength count: Int) throws(StreamError) -> Int {
|
||||||
|
let beg = min(self._idx, self._len)
|
||||||
|
let end = min(self._idx + count, self._len)
|
||||||
|
let len = beg.distance(to: end)
|
||||||
|
let buf = UnsafeMutableRawBufferPointer(start: buffer, count: len)
|
||||||
|
self._idx += len
|
||||||
|
return self._sli.copyBytes(to: buf, from: beg..<end)
|
||||||
|
}
|
||||||
|
|
||||||
public mutating func next() -> UInt8? {
|
public mutating func next() -> UInt8? {
|
||||||
if self._idx < self._len {
|
if self._idx < self._len {
|
||||||
let byte = self._sli[self._idx]
|
let byte = self._sli[self._idx]
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
import System
|
||||||
|
|
||||||
public protocol Stream {
|
public protocol Stream {
|
||||||
mutating func seek(_ whence: StreamWhence) throws(StreamError)
|
mutating func seek(_ whence: StreamWhence) throws(StreamError)
|
||||||
@ -21,6 +22,7 @@ public enum StreamError: Error, LocalizedError {
|
|||||||
case seekRange
|
case seekRange
|
||||||
case overflow
|
case overflow
|
||||||
case fileHandleError(_ error: any Error)
|
case fileHandleError(_ error: any Error)
|
||||||
|
case fileDescriptorError(_ error: Errno)
|
||||||
|
|
||||||
public var errorDescription: String? {
|
public var errorDescription: String? {
|
||||||
switch self {
|
switch self {
|
||||||
@ -28,6 +30,7 @@ public enum StreamError: Error, LocalizedError {
|
|||||||
case .seekRange: "Seek out of range"
|
case .seekRange: "Seek out of range"
|
||||||
case .overflow: "Stream position overflowed"
|
case .overflow: "Stream position overflowed"
|
||||||
case .fileHandleError(let error): "Error from file handle: \(error.localizedDescription)"
|
case .fileHandleError(let error): "Error from file handle: \(error.localizedDescription)"
|
||||||
|
case .fileDescriptorError(let error): "\(error)"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user