Initial implementation of APKINDEX, fetching, reading, parsing, & merging

This commit is contained in:
2024-11-08 21:22:33 +11:00
parent f6cbddb608
commit 941dfae317
18 changed files with 877 additions and 65 deletions

View File

@ -0,0 +1,72 @@
// SPDX-License-Identifier: Apache-2.0
import Foundation
public struct FileInputStream: InputStream {
private var _hnd: FileHandle
public init(_ fileURL: URL) throws {
self._hnd = try FileHandle(forReadingFrom: fileURL)
}
public mutating func seek(_ whence: StreamWhence) throws(StreamError) {
let applyOffset = { (position: UInt64, offset: Int) throws(StreamError) -> UInt64 in
if offset < 0 {
let (newPosition, overflow) = position.subtractingReportingOverflow(UInt64(-offset))
if overflow { throw .seekRange }
return newPosition
} else {
let (newPosition, overflow) = position.addingReportingOverflow(UInt64(offset))
if overflow { throw .overflow }
return newPosition
}
}
switch whence {
case .set(let position):
if position < 0 { throw .seekRange }
do { try self._hnd.seek(toOffset: UInt64(truncatingIfNeeded: position)) }
catch {
throw .fileHandleError(error)
}
case .current(let offset):
do { try self._hnd.seek(toOffset: try applyOffset(try self._hnd.offset(), offset)) }
catch {
if error is StreamError {
throw error as! StreamError
} else {
throw .fileHandleError(error)
}
}
case .end(let offset):
do { try self._hnd.seek(toOffset: applyOffset(try self._hnd.seekToEnd(), offset)) }
catch {
if error is StreamError {
throw error as! StreamError
} else {
throw .fileHandleError(error)
}
}
}
}
public var tell: Int {
get throws(StreamError) {
let offset: UInt64
do { offset = try self._hnd.offset() }
catch {
throw .fileHandleError(error)
}
if offset > Int.max { throw .overflow }
return Int(truncatingIfNeeded: offset)
}
}
public mutating func read(_ count: Int) throws(StreamError) -> Data {
do {
return try self._hnd.read(upToCount: count) ?? Data()
} catch {
throw .fileHandleError(error)
}
}
}

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: Apache-2.0
import Foundation
public protocol InputStream: Stream, IteratorProtocol {
associatedtype Element = UInt8
mutating func read(_ count: Int) throws(StreamError) -> Data
}
public extension InputStream {
mutating func read(_ size: Int, items: Int) throws(StreamError) -> Data {
try self.read(size * items)
}
}
public extension InputStream {
mutating func next() -> UInt8? {
try? self.read(1).first
}
}

View File

@ -0,0 +1,61 @@
// SPDX-License-Identifier: Apache-2.0
import Foundation
public struct MemoryInputStream: InputStream {
private var _buf: [UInt8]! = nil
private let _sli: ArraySlice<UInt8>
private let _len: Int
private var _idx = 0
public init(buffer: Data) {
self._len = buffer.count
self._buf = [UInt8](repeating: 0, count: self._len)
self._buf.withUnsafeMutableBytes { _ = buffer.copyBytes(to: $0) }
self._sli = self._buf[...]
}
public init(view: ArraySlice<UInt8>) {
self._sli = view
self._len = view.count
}
public mutating func seek(_ whence: StreamWhence) throws(StreamError) {
let (position, overflow) = switch whence {
case .set(let position): (position, false)
case .current(let offset): self._idx.addingReportingOverflow(offset)
case .end(let offset): self._len.addingReportingOverflow(offset)
}
if overflow {
throw .overflow
} else if position < 0 {
throw .seekRange
} else {
self._idx = position
}
}
public var tell: Int {
get throws(StreamError) {
self._idx
}
}
public mutating func read(_ count: Int) throws(StreamError) -> Data {
let beg = min(self._idx, self._len)
let end = min(self._idx + count, self._len)
let bytes = Data(self._sli[beg..<end])
self._idx += beg.distance(to: end)
return bytes
}
public mutating func next() -> UInt8? {
if self._idx < self._len {
let byte = self._sli[self._idx]
self._idx += 1
return byte
} else {
return nil
}
}
}

View File

@ -0,0 +1,30 @@
// SPDX-License-Identifier: Apache-2.0
import Foundation
public protocol Stream {
mutating func seek(_ whence: StreamWhence) throws(StreamError)
var tell: Int { get throws(StreamError) }
}
public enum StreamWhence {
case set(_ position: Int)
case current(_ offset: Int)
case end(_ offset: Int)
}
public enum StreamError: Error, LocalizedError {
case unsupported
case seekRange
case overflow
case fileHandleError(_ error: any Error)
public var errorDescription: String? {
switch self {
case .unsupported: "Unsupported operation"
case .seekRange: "Seek out of range"
case .overflow: "Stream position overflowed"
case .fileHandleError(let error): "Error from file handle: \(error.localizedDescription)"
}
}
}

View File

@ -0,0 +1,121 @@
// SPDX-License-Identifier: Apache-2.0
import Foundation
public struct TarReader {
private static let tarBlockSize = 512
private static let tarTypeOffset = 156
private static let tarNameOffset = 0
private static let tarNameSize = 100
private static let tarSizeOffset = 124
private static let tarSizeSize = 12
public enum Entry {
case file(name: String, data: Data)
case directory(name: String)
}
public static func read<S: InputStream>(_ stream: inout S) throws -> [Entry] {
var entries = [Entry]()
while true {
let tarBlock = try stream.read(Self.tarBlockSize)
if tarBlock.isEmpty { break }
if tarBlock.count < Self.tarBlockSize { throw TarError.unexpectedEndOfStream }
let type = UnicodeScalar(tarBlock[Self.tarTypeOffset])
switch type {
case "0": // Regular file
// Read metadata
let name = try Self.readName(tarBlock)
let size = try Self.readSize(tarBlock)
// Read file data
var data = Data()
var bytesRemaining = size, readAmount = 0
while bytesRemaining > 0 {
//FIXME: just read the whole thing at once tbh
readAmount = min(bytesRemaining, Self.tarBlockSize)
let block = try stream.read(readAmount)
if block.count < readAmount { throw TarError.unexpectedEndOfStream }
data += block
bytesRemaining -= readAmount
}
entries.append(.file(name: name, data: data))
// Seek to next block
let seekAmount = Self.tarBlockSize - readAmount
if seekAmount > 0 {
try stream.seek(.current(seekAmount))
}
case "5":
// Directory
let name = try Self.readName(tarBlock)
entries.append(.directory(name: name))
case "\0":
// Null block, might also be a legacy regular file
break
case "x":
// Extended header block
try stream.seek(.current(Self.tarBlockSize))
// Symlink, Reserved, Character, Block, FIFO, Reserved, Global, ignore all these
case "1", "2", "3", "4", "6", "7", "g":
let size = try self.readSize(tarBlock)
let blockCount = (size - 1) / Self.tarBlockSize + 1 // Compute blocks to skip
try stream.seek(.current(Self.tarBlockSize * blockCount))
default: throw TarError.invalidType(type: type) // Not a TAR type
}
}
return entries
}
private static func readName(_ tar: Data, offset: Int = Self.tarNameOffset) throws (TarError) -> String {
var nameSize = Self.tarNameSize
for i in 0...Self.tarNameSize {
if tar[offset + i] == 0 {
nameSize = i
break
}
}
let data = tar.subdata(in: offset..<offset + nameSize)
guard let name = String(data: data, encoding: .utf8) else { throw TarError.badNameField }
return name
}
private static func readSize(_ tar: Data, offset: Int = Self.tarSizeOffset) throws (TarError) -> Int {
let sizeData = tar.subdata(in: offset..<offset + Self.tarSizeSize)
let sizeEnd = sizeData.firstIndex(of: 0) ?? sizeData.endIndex // Find the null terminator
guard
let sizeString = String(data: sizeData[..<sizeEnd], encoding: .ascii),
let result = Int(sizeString, radix: 0o10) else { throw TarError.badSizeField }
return result
}
}
public enum TarError: Error, LocalizedError {
case unexpectedEndOfStream
case invalidType(type: UnicodeScalar)
case badNameField, badSizeField
public var errorDescription: String? {
switch self {
case .unexpectedEndOfStream: "Stream unexpectedly ended early"
case .invalidType(let type): "Invalid block type \(type) found"
case .badNameField: "Bad name field"
case .badSizeField: "Bad size field"
}
}
}
public extension Array<TarReader.Entry> {
func firstFile(name firstNameMatch: String) -> Data? {
for entry in self {
if case .file(let name, let data) = entry {
if name == firstNameMatch {
return data
}
}
}
return nil
}
}

View File

@ -0,0 +1,85 @@
// SPDX-License-Identifier: Apache-2.0
//FIXME: I don't like this, also SLOWWW
struct TextInputStream<InStream: InputStream> where InStream.Element == UInt8 {
private var _stream: InStream
public init(binaryStream: InStream) {
_stream = binaryStream
}
public var lines: LineSequence {
LineSequence(_stream: self._stream)
}
public struct LineSequence: Sequence {
public typealias Element = String
fileprivate var _stream: InStream
public struct Iterator: IteratorProtocol where InStream.Element == UInt8 {
public typealias Element = String
fileprivate init(stream: InStream) {
self._stream = stream
}
private var _stream: InStream
private var _utf8Decoder = UTF8()
private var _scalars = [Unicode.Scalar]()
private var _lastChar: UnicodeScalar = "\0"
private var _eof = false
private mutating func decodeScalarsLine() {
Decode: while true {
switch self._utf8Decoder.decode(&self._stream) {
case .scalarValue(let value):
if value == "\n" {
if self._lastChar == "\n" { break }
else { break Decode }
} else if value == "\r" {
break Decode
}
self._scalars.append(value)
self._lastChar = value
case .emptyInput:
self._eof = true
break Decode
case .error:
break Decode
//FIXME: repair like the stdlib does
//scalars.append(UTF8.encodedReplacementCharacter)
//lastChar = UTF8.encodedReplacementCharacter
}
}
}
public mutating func next() -> String? {
// Return early if we already hit the end of the stream
guard !self._eof else {
return nil
}
// Decode a line of scalars
self.decodeScalarsLine()
defer {
self._scalars.removeAll(keepingCapacity: true)
}
// Ignore the final empty newline
guard !self._eof || !self._scalars.isEmpty else {
return nil
}
// Convert to string and return
var string = String()
string.unicodeScalars.append(contentsOf: self._scalars)
return string
}
}
public func makeIterator() -> Iterator {
Iterator(stream: self._stream)
}
}
}