diff --git a/Guides/Chunked.md b/Guides/Chunked.md index b8d7fca5..c92d365c 100644 --- a/Guides/Chunked.md +++ b/Guides/Chunked.md @@ -3,8 +3,8 @@ [[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/Chunked.swift) | [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/ChunkedTests.swift)] -Break a collection into subsequences where consecutive elements pass a binary -predicate, or where all elements in each chunk project to the same value. +Break a collection into subsequences where consecutive elements pass a binary or +reducing predicate, or where all elements in each chunk project to the same value. Also, includes a `chunks(ofCount:)` that breaks a collection into subsequences of a given `count`. @@ -20,6 +20,20 @@ let chunks = numbers.chunked(by: { $0 <= $1 }) // [[10, 20, 30], [10, 40, 40], [10, 20]] ``` +`chunkedByReduction(into:_)` uses a reducing predicate to chunk the base collection +into subsequences of elements that reduce to a value for which the predicate returns `true`. +For example, you can chunk a collection of numbers into sequences whose sum does not +exceed some maximum value. + +```swift +let numbers = [16, 8, 8, 19, 12, 5] +let chunks = numbers.chunkedByReduction(into: 0) { sum, n in + sum += n + return sum <= 16 +} +// [[16], [8, 8], [19], [12], [5]] +``` + The `chunk(on:)` method, by contrast, takes a projection of each element and separates chunks where the projection of two consecutive elements is not equal. The result includes both the projected value and the subsequence diff --git a/Sources/Algorithms/Chunked.swift b/Sources/Algorithms/Chunked.swift index 30db1607..1d442de7 100644 --- a/Sources/Algorithms/Chunked.swift +++ b/Sources/Algorithms/Chunked.swift @@ -579,3 +579,214 @@ extension ChunkedByCount: LazySequenceProtocol where Base: LazySequenceProtocol {} extension ChunkedByCount: LazyCollectionProtocol where Base: LazyCollectionProtocol {} + +//===----------------------------------------------------------------------===// +// lazy.chunkedByReduction(into:_) +//===----------------------------------------------------------------------===// + +/// A collection that lazily chunks a base collection into subsequences using +/// the given reducing predicate. +/// +/// - Note: This type is the result of +/// +/// x.chunkedByReduction(into:_) +/// +/// where `x` conforms to `LazyCollectionProtocol`. +public struct ChunkedByReduction { + /// The collection that this instance provides a view onto. + @usableFromInline + internal let base: Base + + /// Initial value passed to the reducing predicate. + @usableFromInline + internal let initialValue: Accumulator + + /// The reducing predicate function. + @usableFromInline + internal let predicate: (inout Accumulator, Base.Element) -> Bool + + /// The precomputed start index. + @usableFromInline + internal var _startIndex: Index + + @inlinable + internal init( + base: Base, + initialValue: Accumulator, + predicate: @escaping (inout Accumulator, Base.Element) -> Bool + ) { + self.base = base + self.initialValue = initialValue + self.predicate = predicate + + self._startIndex = Index(baseRange: base.startIndex.. + + @inlinable + internal init(baseRange: Range) { + self.baseRange = baseRange + } + + @inlinable + public static func == (lhs: Index, rhs: Index) -> Bool { + // Since each index represents the range of a disparate chunk, no two + // unique indices will have the same lower bound. + lhs.baseRange.lowerBound == rhs.baseRange.lowerBound + } + + @inlinable + public static func < (lhs: Index, rhs: Index) -> Bool { + // Only use the lower bound to test for ordering, as above. + lhs.baseRange.lowerBound < rhs.baseRange.lowerBound + } + } + + /// Returns the index in the chunked collection of the chunk starting at the given + /// base collection index. + @inlinable + internal func indexForChunk( + startingAt lowerBound: Base.Index + ) -> Index { + guard lowerBound < base.endIndex else { return endIndex } + + var accumulator = initialValue + var i = lowerBound + + while i != base.endIndex && predicate(&accumulator, base[i]) { + base.formIndex(after: &i) + } + + if i == lowerBound { base.formIndex(after: &i) } + + return Index(baseRange: lowerBound.. Index { + precondition(i != endIndex, "Can't advance past endIndex") + + return indexForChunk(startingAt: i.baseRange.upperBound) + } + + @inlinable + public subscript(position: Index) -> Base.SubSequence { + precondition(position != endIndex, "Can't subscript using endIndex") + return base[position.baseRange] + } +} + +extension ChunkedByReduction.Index: Hashable where Base.Index: Hashable {} + +extension LazyCollectionProtocol { + /// Lazily returns a collection of subsequences of this collection, chunked by + /// the given reducing predicate. + /// + /// This example shows how to lazily chunk a list of integers into + /// subsequences that sum to no more than 16. + /// + /// let chunks = [16, 8, 8, 19, 12, 5].lazy.chunkedByReduction(into: 0) { sum, n in + /// sum += n + /// return sum <= 16 + /// } + /// + /// for chunk in chunks { + /// print(chunk) + /// } + /// // Prints: + /// // [16] + /// // [8, 8] + /// // [19] + /// // [12] + /// // [5] + /// + /// Note that a single element which fails the predicate is included in the resulting collection. + /// + /// - Complexity: O(*n*), because the start index is pre-computed. + public func chunkedByReduction( + into initialValue: Accumulator, + _ predicate: @escaping (inout Accumulator, Element) -> Bool + ) -> ChunkedByReduction { + ChunkedByReduction( + base: self, + initialValue: initialValue, + predicate: predicate + ) + } +} + +//===----------------------------------------------------------------------===// +// chunkedByReduction(into:_) +//===----------------------------------------------------------------------===// + +extension Collection { + /// Eagerly returns a collection of subsequences of this collection, chunked by + /// the given reducing predicate. + /// + /// This example shows how to lazily chunk a list of integers into + /// subsequences that sum to no more than 16. + /// + /// let chunks = [16, 8, 8, 19, 12, 5].chunkedByReduction(into: 0) { sum, n in + /// sum += n + /// return sum <= 16 + /// } + /// + /// for chunk in chunks { + /// print(chunk) + /// } + /// // Prints: + /// // [16] + /// // [8, 8] + /// // [19] + /// // [12] + /// // [5] + /// + /// - Complexity: O(*n*), where *n* is the length of this collection. + public func chunkedByReduction( + into initialValue: Accumulator, + _ predicate: @escaping (inout Accumulator, Element) throws -> Bool + ) rethrows -> [SubSequence] { + guard !isEmpty else { return [] } + + var result: [SubSequence] = [] + var accumulator = initialValue + var start = startIndex + var i = start + + while start < endIndex { + while try i != endIndex && predicate(&accumulator, self[i]) { + formIndex(after: &i) + } + + if i == start { formIndex(after: &i) } + + result.append(self[start.. Bool = { sum, elem in + sum += elem.width + return sum <= 16 + } + + fileprivate let intPredicate: (inout Int, Int) -> Bool = { sum, elem in + sum += elem + return sum <= 16 + } + + func testSumObjectProperty() throws { + let things = [16, 8, 8, 5, 5, 5, 19, 4, 4, 4, 4, 4].map { Thing(width: $0) } + let expectedChunks: [[Thing]] = [ + [16].map { Thing(width: $0) }, + [8, 8].map { Thing(width: $0) }, + [5, 5, 5].map { Thing(width: $0) }, + [19].map { Thing(width: $0) }, + [4, 4, 4, 4].map { Thing(width: $0) }, + [4].map { Thing(width: $0) } + ] + + validateChunkedByReduction( + base: things, + predicate: thingPredicate, + initialValue: 0, + expectedResult: expectedChunks + ) + } + + func testAveragingPredicate() throws { + let samples = [2.5, 16.2, 1.5, 3.14, 5.0, 5.75, 7.9, 10.2, 18.6] + let expectedChunks = [ + [2.5], + [16.2], + [1.5, 3.14, 5.0, 5.75, 7.9], + [10.2], + [18.6] + ] + + validateChunkedByReduction( + base: samples, + predicate: { result, elem in + result.0 += elem + result.1 += 1 + return result.0/Double(result.1) <= 5.0 + }, + initialValue: (0.0, 0), + expectedResult: expectedChunks + ) + } + + func testEmpty() throws { + let things: [Thing] = [] + validateChunkedByReduction( + base: things, + predicate: thingPredicate, + initialValue: 0, + expectedResult: [] + ) + } + + func testAllFailPredicate() throws { + validateChunkedByReduction( + base: [19, 19, 19, 19], + predicate: intPredicate, + initialValue: 0, + expectedResult: [[19], [19], [19], [19]] + ) + } + + func testNoneFailPredicate() throws { + validateChunkedByReduction( + base: [1, 1, 1, 1], + predicate: intPredicate, + initialValue: 0, + expectedResult: [[1, 1, 1, 1]] + ) + } +} + +fileprivate func validateChunkedByReduction( + base: Base, + predicate: @escaping (inout Accumulator, Base.Element) -> Bool, + initialValue: Accumulator, + expectedResult: [[Base.Element]] +) +where Base.Element: Equatable { + let eagerChunks = base.chunkedByReduction(into: initialValue, predicate) + XCTAssertEqual(eagerChunks.map { Array($0) }, expectedResult) + + let lazyChunks = base.lazy.chunkedByReduction(into: initialValue, predicate) + XCTAssertEqual(lazyChunks.map { Array($0) }, expectedResult) +}