Parse version as byte array for speed

I guess :(
This commit is contained in:
2024-11-22 11:45:30 +11:00
parent 8a339d6116
commit 5d3fc9a774
2 changed files with 74 additions and 56 deletions

View File

@ -3,10 +3,15 @@
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
import Foundation
import Darwin import Darwin
public struct ApkVersionCompare { public struct ApkVersionCompare {
public static func validate(_ version: String) -> Bool { @inlinable public static func validate(_ version: String) -> Bool {
Self.validate(ContiguousArray(version.utf8))
}
public static func validate(_ version: ContiguousArray<UInt8>) -> Bool {
var reader = ApkVersionReader(version[...]) var reader = ApkVersionReader(version[...])
while true { while true {
switch try? reader.next() { switch try? reader.next() {
@ -17,7 +22,11 @@ public struct ApkVersionCompare {
} }
} }
public static func compare(_ a: String, _ b: String, mode: Mode = .normal) -> Comparison? { @inlinable public static func compare(_ a: String, _ b: String, mode: Mode = .normal) -> Comparison? {
Self.compare(ContiguousArray(a.utf8), ContiguousArray((b.utf8)), mode: mode)
}
public static func compare(_ a: ContiguousArray<UInt8>, _ b: ContiguousArray<UInt8>, mode: Mode = .normal) -> Comparison? {
if (a.isEmpty && b.isEmpty) || a == b { if (a.isEmpty && b.isEmpty) || a == b {
return .equal return .equal
} }
@ -74,16 +83,13 @@ fileprivate extension ApkVersionReader.TokenPart {
return nil return nil
} }
// If either are digit & zero prefixed & not initial then handle as string // If either are digit & zero prefixed & not initial then handle as string
return if lhsString?.first == "0" || rhsString?.first == "0" { return if lhsString?.first == UInt8(ascii: "0") || rhsString?.first == UInt8(ascii: "0") {
self.compValue(lhsString!, rhsString!) self.compValue(lhsString!, rhsString!)
} else { } else {
Self.compValue(lhsNumber, rhsNumber) Self.compValue(lhsNumber, rhsNumber)
} }
case .letter(let lhs): case .letter(let lhs):
guard case .letter(let rhs) = b else { return if case .letter(let rhs) = b { Self.compValue(lhs, rhs) } else { nil }
return nil
}
return Self.compValue(lhs.isASCII ? UInt(lhs.asciiValue!) : 0, rhs.isASCII ? UInt(rhs.asciiValue!) : 0)
case .suffixNumber(let lhs): case .suffixNumber(let lhs):
return if case .suffixNumber(let rhs) = b { Self.compValue(lhs, rhs) } else { nil } return if case .suffixNumber(let rhs) = b { Self.compValue(lhs, rhs) } else { nil }
case .revision(let lhs): case .revision(let lhs):
@ -105,17 +111,17 @@ fileprivate extension ApkVersionReader.TokenPart {
else { .greater } else { .greater }
} }
private static func compValue<T: StringProtocol>(_ a: T, _ b: T) -> ApkVersionCompare.Comparison { private static func compValue(_ a: ArraySlice<UInt8>, _ b: ArraySlice<UInt8>) -> ApkVersionCompare.Comparison {
let minLength = min(a.utf8.count, b.utf8.count) let minLength = min(a.count, b.count)
let comparison = a.withCString { ca in let comparison = a.withUnsafeBytes { ca in
b.withCString { cb in b.withUnsafeBytes { cb in
memcmp(ca, cb, minLength) memcmp(ca.baseAddress!, cb.baseAddress!, minLength)
} }
} }
if comparison != 0 { if comparison != 0 {
return comparison < 0 ? .less : .greater return comparison < 0 ? .less : .greater
} else { } else {
return Self.compValue(a.utf8.count, b.utf8.count) return Self.compValue(a.count, b.count)
} }
} }
} }

View File

@ -4,10 +4,10 @@
*/ */
internal struct ApkVersionReader { internal struct ApkVersionReader {
var string: Substring var string: ArraySlice<UInt8>
private var seen: TokenFlag, last: TokenFlag private var seen: TokenFlag, last: TokenFlag
init(_ string: Substring) { init(_ string: ArraySlice<UInt8>) {
self.string = string self.string = string
self.seen = [] self.seen = []
self.last = [] self.last = []
@ -16,21 +16,21 @@ internal struct ApkVersionReader {
mutating func next() throws(Invalid) -> TokenPart { mutating func next() throws(Invalid) -> TokenPart {
self.seen.formUnion(self.last) self.seen.formUnion(self.last)
switch string.first ?? "\0" { switch string.first ?? UInt8(ascii: "0") {
case "a"..."z": // Letter suffix case UInt8(ascii: "a")...UInt8(ascii: "z"): // Letter suffix
guard self.seen.contains(.initial), guard self.seen.contains(.initial),
self.last.isDisjoint(with: [ .letter, .suffix, .suffixNumber, .commitHash, .revision ]) else { self.last.isDisjoint(with: [ .letter, .suffix, .suffixNumber, .commitHash, .revision ]) else {
throw .invalid throw .invalid
} }
self.last = .letter self.last = .letter
return .letter(self.advance()) return .letter(self.advance())
case ".": // Version separator case UInt8(ascii: "."): // Version separator
guard self.seen.contains(.initial), self.last.contains(.digit) else { guard self.seen.contains(.initial), self.last.contains(.digit) else {
throw .invalid throw .invalid
} }
self.advance() self.advance()
fallthrough fallthrough
case "0"..."9": // Numeric component case UInt8(ascii: "0")...UInt8(ascii: "9"): // Numeric component
guard self.last.isSubset(of: [ .initial, .digit, .suffix ]), guard self.last.isSubset(of: [ .initial, .digit, .suffix ]),
let (number, numString) = self.readNumber() else { let (number, numString) = self.readNumber() else {
throw .invalid throw .invalid
@ -48,7 +48,7 @@ internal struct ApkVersionReader {
return .digit(number, numString) return .digit(number, numString)
} }
} }
case "_": // Suffix case UInt8(ascii: "_"): // Suffix
guard self.seen.contains(.initial), self.seen.isDisjoint(with: [ .commitHash, .revision ]) else { guard self.seen.contains(.initial), self.seen.isDisjoint(with: [ .commitHash, .revision ]) else {
throw .invalid throw .invalid
} }
@ -58,7 +58,7 @@ internal struct ApkVersionReader {
} }
self.last = .suffix self.last = .suffix
return .suffix(suffix) return .suffix(suffix)
case "~": // Commit hash case UInt8(ascii: "~"): // Commit hash
guard self.seen.contains(.initial), self.seen.isDisjoint(with: [ .commitHash, .revision ]) else { guard self.seen.contains(.initial), self.seen.isDisjoint(with: [ .commitHash, .revision ]) else {
throw .invalid throw .invalid
} }
@ -70,15 +70,15 @@ internal struct ApkVersionReader {
} }
self.last = .commitHash self.last = .commitHash
return .commitHash(hex) return .commitHash(hex)
case "-": // Package revision case UInt8(ascii: "-"): // Package revision
guard self.seen.contains(.initial), self.seen.isDisjoint(with: .revision), guard self.seen.contains(.initial), self.seen.isDisjoint(with: .revision),
self.advance(2) == "-r", self.advance(2) == [ UInt8(ascii: "-"), UInt8(ascii: "r") ],
let (number, _) = self.readNumber() else { let (number, _) = self.readNumber() else {
throw .invalid throw .invalid
} }
self .last = .revision self .last = .revision
return .revision(number) return .revision(number)
case "\0": // End of version string case UInt8(ascii: "\0"): // End of version string
guard self.seen.contains(.initial) else { guard self.seen.contains(.initial) else {
throw .invalid throw .invalid
} }
@ -90,59 +90,59 @@ internal struct ApkVersionReader {
//MARK: - Private Implementation //MARK: - Private Implementation
private mutating func readNumber() -> (UInt, Substring)? { private mutating func readNumber() -> (UInt, ArraySlice<UInt8>)? {
// Hacky and awful but seems to be the fastest way to get numeric token length let maxLength = self.string.count
let digits = self.string.withCString { let (end, result) = self.string.withUnsafeBufferPointer {
var i = 0 var i = 0, accum: UInt = 0
while 48...57 ~= $0[i] { // isnumber(Int32($0[i])) != 0 while i < maxLength {
let c = $0[i]
if !(UInt8(ascii: "0")...UInt8(ascii: "9") ~= c) {
break
}
accum = accum &* 10 &+ UInt(c - UInt8(ascii: "0"))
i += 1 i += 1
} }
return i return (i, accum)
} }
let end = self.string.index(self.string.startIndex, offsetBy: digits) if end == 0 {
let string = self.string[..<end]
self.string = self.string[end...]
guard !string.isEmpty, let result = UInt(string, radix: 10) else {
return nil return nil
} }
return (result, string) return (result, self.advance(end))
} }
private mutating func readVersionSuffix() -> VersionSuffix? { private mutating func readVersionSuffix() -> VersionSuffix? {
let end = self.string.firstIndex(where: { !$0.isLowercase }) ?? self.string.endIndex let end = self.string.firstIndex(where: { !(UInt8(ascii: "a")...UInt8(ascii: "z") ~= $0) }) ?? self.string.endIndex
let suffix = self.advance(end) let suffix = self.advance(end - self.string.startIndex)
return switch suffix.first { // TODO: Should this matching be stricter? return switch suffix.first { // TODO: Should this matching be stricter?
case "a": .alpha case UInt8(ascii: "a"): .alpha
case "b": .beta case UInt8(ascii: "b"): .beta
case "c": .cvs case UInt8(ascii: "c"): .cvs
case "g": .git case UInt8(ascii: "g"): .git
case "h": .hg case UInt8(ascii: "h"): .hg
case "p": suffix.count == 1 ? .p : .pre case UInt8(ascii: "p"): suffix.count == 1 ? .p : .pre
case "r": .rc case UInt8(ascii: "r"): .rc
case "s": .svn case UInt8(ascii: "s"): .svn
default: nil default: nil
} }
} }
@discardableResult @discardableResult
private mutating func advance(_ next: String.Index) -> Substring { private mutating func advance(_ len: Int) -> ArraySlice<UInt8> {
let beg = self.string.startIndex
let end = min(string.index(beg, offsetBy: len), string.endIndex)
defer { defer {
self.string = self.string[next...] self.string = self.string[end...]
} }
return self.string[..<next] return self.string[beg..<end]
} }
@discardableResult @discardableResult
private mutating func advance() -> Character { private mutating func advance() -> UInt8 {
defer { defer {
self.string = string[string.index(after: string.startIndex)...] self.string = string[string.index(after: string.startIndex)...]
} }
return self.string[self.string.startIndex] return self.string[self.string.startIndex]
} }
private mutating func advance(_ len: Int) -> Substring {
self.advance(self.string.index(self.string.startIndex, offsetBy: len))
}
} }
extension ApkVersionReader { extension ApkVersionReader {
@ -159,11 +159,11 @@ extension ApkVersionReader {
} }
enum TokenPart { enum TokenPart {
case digit(_ number: UInt, _ string: Substring?) case digit(_ number: UInt, _ string: ArraySlice<UInt8>?)
case letter(_ char: Character) case letter(_ char: UInt8)
case suffix(_ suffix: VersionSuffix) case suffix(_ suffix: VersionSuffix)
case suffixNumber(_ number: UInt) case suffixNumber(_ number: UInt)
case commitHash(_ hash: Substring) case commitHash(_ hash: ArraySlice<UInt8>)
case revision(_ number: UInt) case revision(_ number: UInt)
case end case end
} }
@ -198,3 +198,15 @@ extension ApkVersionReader.TokenPart: Comparable {
return lhs.order < rhs.order return lhs.order < rhs.order
} }
} }
fileprivate extension UInt8 {
@inline(__always) var isHexDigit: Bool {
switch self {
case UInt8(ascii: "0")...UInt8(ascii: "9"),
UInt8(ascii: "A")...UInt8(ascii: "F"),
UInt8(ascii: "a")...UInt8(ascii: "f"):
true
default: false
}
}
}