mirror of
				https://github.com/GayPizzaSpecifications/darwin-apk.git
				synced 2025-11-03 23:49:38 +00:00 
			
		
		
		
	zlib-based homecooked gzip reader for significantly faster decompression times
This commit is contained in:
		@ -8,14 +8,10 @@ let package = Package(
 | 
			
		||||
  ],
 | 
			
		||||
  dependencies: [
 | 
			
		||||
    .package(url: "https://github.com/apple/swift-argument-parser", from: "1.5.0"),
 | 
			
		||||
    .package(url: "https://github.com/tsolomko/SWCompression", from: "4.8.6"),
 | 
			
		||||
  ],
 | 
			
		||||
  targets: [
 | 
			
		||||
    .target(
 | 
			
		||||
      name: "darwin-apk",
 | 
			
		||||
      dependencies: [
 | 
			
		||||
        .product(name: "SWCompression", package: "SWCompression"),
 | 
			
		||||
      ],
 | 
			
		||||
      path: "Sources/apk"),
 | 
			
		||||
    .executableTarget(
 | 
			
		||||
      name: "dpk",
 | 
			
		||||
 | 
			
		||||
@ -4,7 +4,6 @@
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
import SWCompression
 | 
			
		||||
import CryptoKit
 | 
			
		||||
 | 
			
		||||
public struct ApkIndexUpdater {
 | 
			
		||||
@ -64,23 +63,46 @@ public struct ApkIndexUpdater {
 | 
			
		||||
    let tarSignature: [TarReader.Entry]
 | 
			
		||||
    let tarRecords: [TarReader.Entry]
 | 
			
		||||
 | 
			
		||||
    let tars = try GzipArchive.multiUnarchive(  // Slow...
 | 
			
		||||
      archive: Data(contentsOf: indexURL))
 | 
			
		||||
    assert(tars.count >= 2)
 | 
			
		||||
    print("Archive:    \(indexURL.lastPathComponent)")
 | 
			
		||||
 | 
			
		||||
    var signatureStream = MemoryInputStream(buffer: tars[0].data)
 | 
			
		||||
    let durFormat = Duration.UnitsFormatStyle(
 | 
			
		||||
      allowedUnits: [ .seconds, .milliseconds ],
 | 
			
		||||
      width: .condensedAbbreviated,
 | 
			
		||||
      fractionalPart: .show(length: 3))
 | 
			
		||||
    let gzipStart = ContinuousClock.now
 | 
			
		||||
 | 
			
		||||
    var tars = [Data]()
 | 
			
		||||
    do {
 | 
			
		||||
      var file: any InputStream = try FileInputStream(indexURL)
 | 
			
		||||
      //var file: any InputStream = try MemoryInputStream(buffer: try Data(contentsOf: indexURL))
 | 
			
		||||
      tars.append(try GZip.read(inStream: &file))
 | 
			
		||||
      tars.append(try GZip.read(inStream: &file))
 | 
			
		||||
      
 | 
			
		||||
    } catch {
 | 
			
		||||
      fatalError(error.localizedDescription)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    print("Gzip time:  \((ContinuousClock.now - gzipStart).formatted(durFormat))")
 | 
			
		||||
    let untarStart = ContinuousClock.now
 | 
			
		||||
 | 
			
		||||
    var signatureStream = MemoryInputStream(buffer: tars[0])
 | 
			
		||||
    tarSignature = try TarReader.read(&signatureStream)
 | 
			
		||||
    var recordsStream = MemoryInputStream(buffer: tars[1].data)
 | 
			
		||||
    var recordsStream = MemoryInputStream(buffer: tars[1])
 | 
			
		||||
    tarRecords = try TarReader.read(&recordsStream)
 | 
			
		||||
 | 
			
		||||
    guard case .file(let signatureName, _) = tarSignature.first
 | 
			
		||||
    else { fatalError("Missing signature") }
 | 
			
		||||
    print(signatureName)
 | 
			
		||||
    guard let apkIndexFile = tarRecords.firstFile(name: "APKINDEX")
 | 
			
		||||
    else { fatalError("APKINDEX missing") }
 | 
			
		||||
    guard let description = tarRecords.firstFile(name: "DESCRIPTION")
 | 
			
		||||
    else { fatalError("DESCRIPTION missing") }
 | 
			
		||||
 | 
			
		||||
    print("TAR time:   \((ContinuousClock.now - untarStart).formatted(durFormat))")
 | 
			
		||||
    let indexStart = ContinuousClock.now
 | 
			
		||||
    defer {
 | 
			
		||||
      print("Index time: \((ContinuousClock.now - indexStart).formatted(durFormat))")
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let reader = TextInputStream(binaryStream: MemoryInputStream(buffer: apkIndexFile))
 | 
			
		||||
    return try ApkIndex(raw:
 | 
			
		||||
      try ApkRawIndex(lines: reader.lines))
 | 
			
		||||
 | 
			
		||||
@ -4,6 +4,8 @@
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
import Darwin
 | 
			
		||||
import System
 | 
			
		||||
 | 
			
		||||
public struct FileInputStream: InputStream {
 | 
			
		||||
  private var _hnd: FileHandle
 | 
			
		||||
@ -72,4 +74,12 @@ public struct FileInputStream: InputStream {
 | 
			
		||||
      throw .fileHandleError(error)
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) throws(StreamError) -> Int {
 | 
			
		||||
    let res = unistd.read(self._hnd.fileDescriptor, buffer, len)
 | 
			
		||||
    if res < 0 {
 | 
			
		||||
      throw .fileHandleError(Errno(rawValue: errno))
 | 
			
		||||
    }
 | 
			
		||||
    return res
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										203
									
								
								Sources/apk/Utility/GZip.swift
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203
									
								
								Sources/apk/Utility/GZip.swift
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,203 @@
 | 
			
		||||
/*
 | 
			
		||||
 * darwin-apk © 2024 Gay Pizza Specifications
 | 
			
		||||
 * SPDX-License-Identifier: Apache-2.0
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
import zlib
 | 
			
		||||
 | 
			
		||||
struct GZip {
 | 
			
		||||
  static let CM_DEFLATE: UInt8 = 8
 | 
			
		||||
 | 
			
		||||
  static let FTEXT: UInt8    = 1 << 0
 | 
			
		||||
  static let FHCRC: UInt8    = 1 << 1
 | 
			
		||||
  static let FEXTRA: UInt8   = 1 << 2
 | 
			
		||||
  static let FNAME: UInt8    = 1 << 3
 | 
			
		||||
  static let FCOMMENT: UInt8 = 1 << 4
 | 
			
		||||
 | 
			
		||||
  static let XFL_BEST: UInt8    = 2
 | 
			
		||||
  static let XFL_FASTEST: UInt8 = 4
 | 
			
		||||
 | 
			
		||||
  private static func skipString(_ stream: inout any InputStream) throws(GZipError) {
 | 
			
		||||
    var c: UInt8?
 | 
			
		||||
    repeat {
 | 
			
		||||
      c = stream.next()
 | 
			
		||||
      guard c != nil else {
 | 
			
		||||
        throw .truncatedStream
 | 
			
		||||
      }
 | 
			
		||||
    } while c != 0
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static func read(inStream stream: inout any InputStream) throws(GZipError) -> Data {
 | 
			
		||||
    // Check Gzip magic signature
 | 
			
		||||
    guard (try? stream.read(2)) == Data([0x1F, 0x8B]) else {
 | 
			
		||||
      throw .badMagic
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Check compression field (should only ever be DEFLATE)
 | 
			
		||||
    guard let compression = stream.next(),
 | 
			
		||||
        compression == Self.CM_DEFLATE else {
 | 
			
		||||
      throw .badHeader
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    guard
 | 
			
		||||
        let flags = stream.next(),
 | 
			
		||||
        let modificationTime = stream.readUInt(),
 | 
			
		||||
        let extraFlags = stream.next(),
 | 
			
		||||
        let operatingSystemID = stream.next() else {
 | 
			
		||||
      throw .truncatedStream
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    if flags & Self.FEXTRA != 0 {
 | 
			
		||||
      // Skip "extra" field
 | 
			
		||||
      guard let extraLength = stream.readUShort() else {
 | 
			
		||||
        throw.truncatedStream
 | 
			
		||||
      }
 | 
			
		||||
      do {
 | 
			
		||||
        try stream.seek(.current(Int(extraLength)))
 | 
			
		||||
      } catch {
 | 
			
		||||
        throw .streamError(error)
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if flags & Self.FNAME != 0 {
 | 
			
		||||
      // Skip null-terminated name string
 | 
			
		||||
      try skipString(&stream)
 | 
			
		||||
    }
 | 
			
		||||
    if flags & Self.FCOMMENT != 0 {
 | 
			
		||||
      // Skip null-terminated comment string
 | 
			
		||||
      try skipString(&stream)
 | 
			
		||||
    }
 | 
			
		||||
    if flags & Self.FHCRC != 0 {
 | 
			
		||||
      guard let crc16 = stream.readUShort() else {
 | 
			
		||||
        throw .badField("crc16")
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let deflateBegin: Int
 | 
			
		||||
    do {
 | 
			
		||||
      deflateBegin = try stream.tell
 | 
			
		||||
    } catch {
 | 
			
		||||
      throw .streamError(error)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    var payload = Data()
 | 
			
		||||
    let (streamLength, computedCRC) = try Self.deflate(payload: &payload, stream: &stream)
 | 
			
		||||
 | 
			
		||||
    // End-of-stream verification fields
 | 
			
		||||
    do {
 | 
			
		||||
      try stream.seek(.set(deflateBegin + streamLength))
 | 
			
		||||
    } catch {
 | 
			
		||||
      throw .streamError(error)
 | 
			
		||||
    }
 | 
			
		||||
    guard
 | 
			
		||||
        let crc = stream.readUInt(),
 | 
			
		||||
        let inputSizeMod32 = stream.readUInt() else {
 | 
			
		||||
      throw .truncatedStream
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Perform verification checks
 | 
			
		||||
    guard UInt32(truncatingIfNeeded: computedCRC) == crc else {
 | 
			
		||||
      throw .verificationFailed("CRC32 didn't match")
 | 
			
		||||
    }
 | 
			
		||||
    guard inputSizeMod32 == UInt32(truncatingIfNeeded: payload.count) else {
 | 
			
		||||
      throw .verificationFailed("Bad decompressed size")
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return payload
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private static func deflate(payload: inout Data, stream: inout any InputStream) throws(GZipError) -> (Int, UInt) {
 | 
			
		||||
    var zstream = z_stream()
 | 
			
		||||
    var zerr = inflateInit2_(&zstream, -15, ZLIB_VERSION, Int32(MemoryLayout<z_stream>.size))
 | 
			
		||||
    guard zerr == Z_OK else {
 | 
			
		||||
      throw .zlib(zerr)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    defer {
 | 
			
		||||
      inflateEnd(&zstream)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let bufferSize = 0x8000
 | 
			
		||||
    var inputBuffer = [UInt8](repeating: 0, count: bufferSize)
 | 
			
		||||
    var outputBuffer = [UInt8](repeating: 0, count: bufferSize)
 | 
			
		||||
 | 
			
		||||
    var computeCRC: UInt = crc32(0, nil, 0)
 | 
			
		||||
    var block = 0
 | 
			
		||||
    repeat {
 | 
			
		||||
      if zstream.avail_in == 0 {
 | 
			
		||||
        let read: Int
 | 
			
		||||
        do {
 | 
			
		||||
          read = try stream.read(&inputBuffer, maxLength: inputBuffer.count)
 | 
			
		||||
        } catch {
 | 
			
		||||
          throw .streamError(error)
 | 
			
		||||
        }
 | 
			
		||||
        guard read > 0 else {
 | 
			
		||||
          throw .truncatedStream
 | 
			
		||||
        }
 | 
			
		||||
        zstream.avail_in = UInt32(read)
 | 
			
		||||
        zstream.next_in = inputBuffer.withUnsafeMutableBufferPointer(\.baseAddress!)
 | 
			
		||||
      }
 | 
			
		||||
      zstream.avail_out = UInt32(outputBuffer.count)
 | 
			
		||||
      zstream.next_out = outputBuffer.withUnsafeMutableBufferPointer(\.baseAddress!)
 | 
			
		||||
      zerr = inflate(&zstream, Z_BLOCK)
 | 
			
		||||
 | 
			
		||||
      let decodedBytes = outputBuffer.count - Int(zstream.avail_out)
 | 
			
		||||
      computeCRC = crc32(computeCRC, outputBuffer, UInt32(decodedBytes))
 | 
			
		||||
      payload += Data(outputBuffer[..<decodedBytes])
 | 
			
		||||
      block += decodedBytes
 | 
			
		||||
 | 
			
		||||
      if zstream.data_type & (1 << 7) != 0 {
 | 
			
		||||
        // At the end of a deflate block, we're done if it was empty
 | 
			
		||||
        if block == 0 {
 | 
			
		||||
          break
 | 
			
		||||
        }
 | 
			
		||||
        block = 0
 | 
			
		||||
      }
 | 
			
		||||
    } while zerr == Z_OK
 | 
			
		||||
 | 
			
		||||
    guard zerr == Z_STREAM_END else {
 | 
			
		||||
      throw .zlib(zerr)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (Int(zstream.total_in), computeCRC)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
enum GZipError: LocalizedError {
 | 
			
		||||
  case streamError(_ err: StreamError)
 | 
			
		||||
  case verificationFailed(_ msg: String)
 | 
			
		||||
  case badMagic
 | 
			
		||||
  case badHeader
 | 
			
		||||
  case badField(_ name: String)
 | 
			
		||||
  case truncatedStream
 | 
			
		||||
  case zlib(_ err: Int32)
 | 
			
		||||
 | 
			
		||||
  var errorDescription: String? {
 | 
			
		||||
    switch self {
 | 
			
		||||
    case .verificationFailed(let msg): msg
 | 
			
		||||
    case .streamError(let err): "Underlying stream error: \(err.localizedDescription)"
 | 
			
		||||
    case .badMagic:             "Not a Gzip file"
 | 
			
		||||
    case .badHeader:            "Malformed Gzip header"
 | 
			
		||||
    case .badField(let name):   "Bad Gzip \(name) field"
 | 
			
		||||
    case .truncatedStream:      "Reached end-of-stream before decoding finished"
 | 
			
		||||
    case .zlib(let err):        "zlib error \(err)"
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fileprivate extension InputStream {
 | 
			
		||||
  mutating func readUShort() -> UInt16? {
 | 
			
		||||
    guard let buffer = try? self.read(2), buffer.count == 2 else {
 | 
			
		||||
      return nil
 | 
			
		||||
    }
 | 
			
		||||
    return buffer.withUnsafeBytes { $0.load(as: UInt16.self) }.littleEndian
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  mutating func readUInt() -> UInt32? {
 | 
			
		||||
    guard let buffer = try? self.read(4), buffer.count == 4 else {
 | 
			
		||||
      return nil
 | 
			
		||||
    }
 | 
			
		||||
    return buffer.withUnsafeBytes { $0.load(as: UInt32.self) }.littleEndian
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -5,10 +5,9 @@
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
 | 
			
		||||
public protocol InputStream: Stream, IteratorProtocol {
 | 
			
		||||
  associatedtype Element = UInt8
 | 
			
		||||
 | 
			
		||||
public protocol InputStream: Stream, IteratorProtocol where Element == UInt8 {
 | 
			
		||||
  mutating func read(_ count: Int) throws(StreamError) -> Data
 | 
			
		||||
  mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength len: Int) throws(StreamError) -> Int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
public extension InputStream {
 | 
			
		||||
 | 
			
		||||
@ -52,6 +52,15 @@ public struct MemoryInputStream: InputStream {
 | 
			
		||||
    return bytes
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public mutating func read(_ buffer: UnsafeMutablePointer<UInt8>, maxLength count: Int) throws(StreamError) -> Int {
 | 
			
		||||
    let beg = min(self._idx, self._len)
 | 
			
		||||
    let end = min(self._idx + count, self._len)
 | 
			
		||||
    let len = beg.distance(to: end)
 | 
			
		||||
    let buf = UnsafeMutableRawBufferPointer(start: buffer, count: len)
 | 
			
		||||
    self._idx += len
 | 
			
		||||
    return self._sli.copyBytes(to: buf, from: beg..<end)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public mutating func next() -> UInt8? {
 | 
			
		||||
    if self._idx < self._len {
 | 
			
		||||
      let byte = self._sli[self._idx]
 | 
			
		||||
 | 
			
		||||
@ -4,6 +4,7 @@
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
import System
 | 
			
		||||
 | 
			
		||||
public protocol Stream {
 | 
			
		||||
  mutating func seek(_ whence: StreamWhence) throws(StreamError)
 | 
			
		||||
@ -21,6 +22,7 @@ public enum StreamError: Error, LocalizedError {
 | 
			
		||||
  case seekRange
 | 
			
		||||
  case overflow
 | 
			
		||||
  case fileHandleError(_ error: any Error)
 | 
			
		||||
  case fileDescriptorError(_ error: Errno)
 | 
			
		||||
 | 
			
		||||
  public var errorDescription: String? {
 | 
			
		||||
    switch self {
 | 
			
		||||
@ -28,6 +30,7 @@ public enum StreamError: Error, LocalizedError {
 | 
			
		||||
    case .seekRange: "Seek out of range"
 | 
			
		||||
    case .overflow: "Stream position overflowed"
 | 
			
		||||
    case .fileHandleError(let error): "Error from file handle: \(error.localizedDescription)"
 | 
			
		||||
    case .fileDescriptorError(let error): "\(error)"
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user