mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-11-30 09:41:11 -05:00
614 lines
13 KiB
Go
614 lines
13 KiB
Go
|
package roaring
|
||
|
|
||
|
import (
|
||
|
"container/heap"
|
||
|
"fmt"
|
||
|
"runtime"
|
||
|
"sync"
|
||
|
)
|
||
|
|
||
|
var defaultWorkerCount = runtime.NumCPU()
|
||
|
|
||
|
type bitmapContainerKey struct {
|
||
|
key uint16
|
||
|
idx int
|
||
|
bitmap *Bitmap
|
||
|
}
|
||
|
|
||
|
type multipleContainers struct {
|
||
|
key uint16
|
||
|
containers []container
|
||
|
idx int
|
||
|
}
|
||
|
|
||
|
type keyedContainer struct {
|
||
|
key uint16
|
||
|
container container
|
||
|
idx int
|
||
|
}
|
||
|
|
||
|
type bitmapContainerHeap []bitmapContainerKey
|
||
|
|
||
|
func (h bitmapContainerHeap) Len() int { return len(h) }
|
||
|
func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
|
||
|
func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||
|
|
||
|
func (h *bitmapContainerHeap) Push(x interface{}) {
|
||
|
// Push and Pop use pointer receivers because they modify the slice's length,
|
||
|
// not just its contents.
|
||
|
*h = append(*h, x.(bitmapContainerKey))
|
||
|
}
|
||
|
|
||
|
func (h *bitmapContainerHeap) Pop() interface{} {
|
||
|
old := *h
|
||
|
n := len(old)
|
||
|
x := old[n-1]
|
||
|
*h = old[0 : n-1]
|
||
|
return x
|
||
|
}
|
||
|
|
||
|
func (h bitmapContainerHeap) Peek() bitmapContainerKey {
|
||
|
return h[0]
|
||
|
}
|
||
|
|
||
|
func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
|
||
|
k := h.Peek()
|
||
|
key = k.key
|
||
|
container = k.bitmap.highlowcontainer.containers[k.idx]
|
||
|
|
||
|
newIdx := k.idx + 1
|
||
|
if newIdx < k.bitmap.highlowcontainer.size() {
|
||
|
k = bitmapContainerKey{
|
||
|
k.bitmap.highlowcontainer.keys[newIdx],
|
||
|
newIdx,
|
||
|
k.bitmap,
|
||
|
}
|
||
|
(*h)[0] = k
|
||
|
heap.Fix(h, 0)
|
||
|
} else {
|
||
|
heap.Pop(h)
|
||
|
}
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
|
||
|
if h.Len() == 0 {
|
||
|
return multipleContainers{}
|
||
|
}
|
||
|
|
||
|
key, container := h.popIncrementing()
|
||
|
containers = append(containers, container)
|
||
|
|
||
|
for h.Len() > 0 && key == h.Peek().key {
|
||
|
_, container = h.popIncrementing()
|
||
|
containers = append(containers, container)
|
||
|
}
|
||
|
|
||
|
return multipleContainers{
|
||
|
key,
|
||
|
containers,
|
||
|
-1,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
|
||
|
// Initialize heap
|
||
|
var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
|
||
|
for _, bitmap := range bitmaps {
|
||
|
if !bitmap.IsEmpty() {
|
||
|
key := bitmapContainerKey{
|
||
|
bitmap.highlowcontainer.keys[0],
|
||
|
0,
|
||
|
bitmap,
|
||
|
}
|
||
|
h = append(h, key)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
heap.Init(&h)
|
||
|
|
||
|
return h
|
||
|
}
|
||
|
|
||
|
func repairAfterLazy(c container) container {
|
||
|
switch t := c.(type) {
|
||
|
case *bitmapContainer:
|
||
|
if t.cardinality == invalidCardinality {
|
||
|
t.computeCardinality()
|
||
|
}
|
||
|
|
||
|
if t.getCardinality() <= arrayDefaultMaxSize {
|
||
|
return t.toArrayContainer()
|
||
|
} else if c.(*bitmapContainer).isFull() {
|
||
|
return newRunContainer16Range(0, MaxUint16)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return c
|
||
|
}
|
||
|
|
||
|
func toBitmapContainer(c container) container {
|
||
|
switch t := c.(type) {
|
||
|
case *arrayContainer:
|
||
|
return t.toBitmapContainer()
|
||
|
case *runContainer16:
|
||
|
if !t.isFull() {
|
||
|
return t.toBitmapContainer()
|
||
|
}
|
||
|
}
|
||
|
return c
|
||
|
}
|
||
|
|
||
|
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
|
||
|
expectedKeys := -1
|
||
|
appendedKeys := 0
|
||
|
keys := make([]uint16, 0)
|
||
|
containers := make([]container, 0)
|
||
|
for appendedKeys != expectedKeys {
|
||
|
select {
|
||
|
case item := <-resultChan:
|
||
|
if len(keys) <= item.idx {
|
||
|
keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
|
||
|
containers = append(containers, make([]container, item.idx-len(containers)+1)...)
|
||
|
}
|
||
|
keys[item.idx] = item.key
|
||
|
containers[item.idx] = item.container
|
||
|
|
||
|
appendedKeys++
|
||
|
case msg := <-expectedKeysChan:
|
||
|
expectedKeys = msg
|
||
|
}
|
||
|
}
|
||
|
answer := &Bitmap{
|
||
|
roaringArray{
|
||
|
make([]uint16, 0, expectedKeys),
|
||
|
make([]container, 0, expectedKeys),
|
||
|
make([]bool, 0, expectedKeys),
|
||
|
false,
|
||
|
nil,
|
||
|
},
|
||
|
}
|
||
|
for i := range keys {
|
||
|
if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
|
||
|
answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bitmapChan <- answer
|
||
|
}
|
||
|
|
||
|
// ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
|
||
|
// where the parameter "parallelism" determines how many workers are to be used
|
||
|
// (if it is set to 0, a default number of workers is chosen)
|
||
|
// ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
|
||
|
func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||
|
|
||
|
bitmapCount := len(bitmaps)
|
||
|
if bitmapCount == 0 {
|
||
|
return NewBitmap()
|
||
|
} else if bitmapCount == 1 {
|
||
|
return bitmaps[0].Clone()
|
||
|
}
|
||
|
|
||
|
if parallelism == 0 {
|
||
|
parallelism = defaultWorkerCount
|
||
|
}
|
||
|
|
||
|
h := newBitmapContainerHeap(bitmaps...)
|
||
|
|
||
|
bitmapChan := make(chan *Bitmap)
|
||
|
inputChan := make(chan multipleContainers, 128)
|
||
|
resultChan := make(chan keyedContainer, 32)
|
||
|
expectedKeysChan := make(chan int)
|
||
|
|
||
|
pool := sync.Pool{
|
||
|
New: func() interface{} {
|
||
|
return make([]container, 0, len(bitmaps))
|
||
|
},
|
||
|
}
|
||
|
|
||
|
orFunc := func() {
|
||
|
// Assumes only structs with >=2 containers are passed
|
||
|
for input := range inputChan {
|
||
|
c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
|
||
|
for _, next := range input.containers[2:] {
|
||
|
c = c.lazyIOR(next)
|
||
|
}
|
||
|
c = repairAfterLazy(c)
|
||
|
kx := keyedContainer{
|
||
|
input.key,
|
||
|
c,
|
||
|
input.idx,
|
||
|
}
|
||
|
resultChan <- kx
|
||
|
pool.Put(input.containers[:0])
|
||
|
}
|
||
|
}
|
||
|
|
||
|
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||
|
|
||
|
for i := 0; i < parallelism; i++ {
|
||
|
go orFunc()
|
||
|
}
|
||
|
|
||
|
idx := 0
|
||
|
for h.Len() > 0 {
|
||
|
ck := h.Next(pool.Get().([]container))
|
||
|
if len(ck.containers) == 1 {
|
||
|
resultChan <- keyedContainer{
|
||
|
ck.key,
|
||
|
ck.containers[0],
|
||
|
idx,
|
||
|
}
|
||
|
pool.Put(ck.containers[:0])
|
||
|
} else {
|
||
|
ck.idx = idx
|
||
|
inputChan <- ck
|
||
|
}
|
||
|
idx++
|
||
|
}
|
||
|
expectedKeysChan <- idx
|
||
|
|
||
|
bitmap := <-bitmapChan
|
||
|
|
||
|
close(inputChan)
|
||
|
close(resultChan)
|
||
|
close(expectedKeysChan)
|
||
|
|
||
|
return bitmap
|
||
|
}
|
||
|
|
||
|
// ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
|
||
|
// where the parameter "parallelism" determines how many workers are to be used
|
||
|
// (if it is set to 0, a default number of workers is chosen)
|
||
|
func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||
|
bitmapCount := len(bitmaps)
|
||
|
if bitmapCount == 0 {
|
||
|
return NewBitmap()
|
||
|
} else if bitmapCount == 1 {
|
||
|
return bitmaps[0].Clone()
|
||
|
}
|
||
|
|
||
|
if parallelism == 0 {
|
||
|
parallelism = defaultWorkerCount
|
||
|
}
|
||
|
|
||
|
h := newBitmapContainerHeap(bitmaps...)
|
||
|
|
||
|
bitmapChan := make(chan *Bitmap)
|
||
|
inputChan := make(chan multipleContainers, 128)
|
||
|
resultChan := make(chan keyedContainer, 32)
|
||
|
expectedKeysChan := make(chan int)
|
||
|
|
||
|
andFunc := func() {
|
||
|
// Assumes only structs with >=2 containers are passed
|
||
|
for input := range inputChan {
|
||
|
c := input.containers[0].and(input.containers[1])
|
||
|
for _, next := range input.containers[2:] {
|
||
|
if c.getCardinality() == 0 {
|
||
|
break
|
||
|
}
|
||
|
c = c.iand(next)
|
||
|
}
|
||
|
|
||
|
// Send a nil explicitly if the result of the intersection is an empty container
|
||
|
if c.getCardinality() == 0 {
|
||
|
c = nil
|
||
|
}
|
||
|
|
||
|
kx := keyedContainer{
|
||
|
input.key,
|
||
|
c,
|
||
|
input.idx,
|
||
|
}
|
||
|
resultChan <- kx
|
||
|
}
|
||
|
}
|
||
|
|
||
|
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||
|
|
||
|
for i := 0; i < parallelism; i++ {
|
||
|
go andFunc()
|
||
|
}
|
||
|
|
||
|
idx := 0
|
||
|
for h.Len() > 0 {
|
||
|
ck := h.Next(make([]container, 0, 4))
|
||
|
if len(ck.containers) == bitmapCount {
|
||
|
ck.idx = idx
|
||
|
inputChan <- ck
|
||
|
idx++
|
||
|
}
|
||
|
}
|
||
|
expectedKeysChan <- idx
|
||
|
|
||
|
bitmap := <-bitmapChan
|
||
|
|
||
|
close(inputChan)
|
||
|
close(resultChan)
|
||
|
close(expectedKeysChan)
|
||
|
|
||
|
return bitmap
|
||
|
}
|
||
|
|
||
|
// ParOr computes the union (OR) of all provided bitmaps in parallel,
|
||
|
// where the parameter "parallelism" determines how many workers are to be used
|
||
|
// (if it is set to 0, a default number of workers is chosen)
|
||
|
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||
|
var lKey uint16 = MaxUint16
|
||
|
var hKey uint16 = 0
|
||
|
|
||
|
bitmapsFiltered := bitmaps[:0]
|
||
|
for _, b := range bitmaps {
|
||
|
if !b.IsEmpty() {
|
||
|
bitmapsFiltered = append(bitmapsFiltered, b)
|
||
|
}
|
||
|
}
|
||
|
bitmaps = bitmapsFiltered
|
||
|
|
||
|
for _, b := range bitmaps {
|
||
|
lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
|
||
|
hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
|
||
|
}
|
||
|
|
||
|
if lKey == MaxUint16 && hKey == 0 {
|
||
|
return New()
|
||
|
} else if len(bitmaps) == 1 {
|
||
|
return bitmaps[0]
|
||
|
}
|
||
|
|
||
|
keyRange := hKey - lKey + 1
|
||
|
if keyRange == 1 {
|
||
|
// revert to FastOr. Since the key range is 0
|
||
|
// no container-level aggregation parallelism is achievable
|
||
|
return FastOr(bitmaps...)
|
||
|
}
|
||
|
|
||
|
if parallelism == 0 {
|
||
|
parallelism = defaultWorkerCount
|
||
|
}
|
||
|
|
||
|
var chunkSize int
|
||
|
var chunkCount int
|
||
|
if parallelism*4 > int(keyRange) {
|
||
|
chunkSize = 1
|
||
|
chunkCount = int(keyRange)
|
||
|
} else {
|
||
|
chunkCount = parallelism * 4
|
||
|
chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
|
||
|
}
|
||
|
|
||
|
if chunkCount*chunkSize < int(keyRange) {
|
||
|
// it's fine to panic to indicate an implementation error
|
||
|
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
|
||
|
}
|
||
|
|
||
|
chunks := make([]*roaringArray, chunkCount)
|
||
|
|
||
|
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
|
||
|
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
|
||
|
|
||
|
orFunc := func() {
|
||
|
for spec := range chunkSpecChan {
|
||
|
ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
|
||
|
for _, b := range bitmaps[2:] {
|
||
|
ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
|
||
|
}
|
||
|
|
||
|
for i, c := range ra.containers {
|
||
|
ra.containers[i] = repairAfterLazy(c)
|
||
|
}
|
||
|
|
||
|
chunkChan <- parChunk{ra, spec.idx}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for i := 0; i < parallelism; i++ {
|
||
|
go orFunc()
|
||
|
}
|
||
|
|
||
|
go func() {
|
||
|
for i := 0; i < chunkCount; i++ {
|
||
|
spec := parChunkSpec{
|
||
|
start: uint16(int(lKey) + i*chunkSize),
|
||
|
end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
|
||
|
idx: int(i),
|
||
|
}
|
||
|
chunkSpecChan <- spec
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
chunksRemaining := chunkCount
|
||
|
for chunk := range chunkChan {
|
||
|
chunks[chunk.idx] = chunk.ra
|
||
|
chunksRemaining--
|
||
|
if chunksRemaining == 0 {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
close(chunkChan)
|
||
|
close(chunkSpecChan)
|
||
|
|
||
|
containerCount := 0
|
||
|
for _, chunk := range chunks {
|
||
|
containerCount += chunk.size()
|
||
|
}
|
||
|
|
||
|
result := Bitmap{
|
||
|
roaringArray{
|
||
|
containers: make([]container, containerCount),
|
||
|
keys: make([]uint16, containerCount),
|
||
|
needCopyOnWrite: make([]bool, containerCount),
|
||
|
},
|
||
|
}
|
||
|
|
||
|
resultOffset := 0
|
||
|
for _, chunk := range chunks {
|
||
|
copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
|
||
|
copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
|
||
|
copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
|
||
|
resultOffset += chunk.size()
|
||
|
}
|
||
|
|
||
|
return &result
|
||
|
}
|
||
|
|
||
|
type parChunkSpec struct {
|
||
|
start uint16
|
||
|
end uint16
|
||
|
idx int
|
||
|
}
|
||
|
|
||
|
type parChunk struct {
|
||
|
ra *roaringArray
|
||
|
idx int
|
||
|
}
|
||
|
|
||
|
func (c parChunk) size() int {
|
||
|
return c.ra.size()
|
||
|
}
|
||
|
|
||
|
func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
|
||
|
for idx, key := range ra.keys {
|
||
|
if key >= start && key <= last {
|
||
|
return idx
|
||
|
} else if key > last {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
return ra.size()
|
||
|
}
|
||
|
|
||
|
func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||
|
answer := newRoaringArray()
|
||
|
length1 := ra1.size()
|
||
|
length2 := ra2.size()
|
||
|
|
||
|
idx1 := parNaiveStartAt(ra1, start, last)
|
||
|
idx2 := parNaiveStartAt(ra2, start, last)
|
||
|
|
||
|
var key1 uint16
|
||
|
var key2 uint16
|
||
|
if idx1 < length1 && idx2 < length2 {
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
|
||
|
for key1 <= last && key2 <= last {
|
||
|
|
||
|
if key1 < key2 {
|
||
|
answer.appendCopy(*ra1, idx1)
|
||
|
idx1++
|
||
|
if idx1 == length1 {
|
||
|
break
|
||
|
}
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
} else if key1 > key2 {
|
||
|
answer.appendCopy(*ra2, idx2)
|
||
|
idx2++
|
||
|
if idx2 == length2 {
|
||
|
break
|
||
|
}
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
} else {
|
||
|
c1 := ra1.getFastContainerAtIndex(idx1, false)
|
||
|
|
||
|
answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||
|
idx1++
|
||
|
idx2++
|
||
|
if idx1 == length1 || idx2 == length2 {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if idx2 < length2 {
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
for key2 <= last {
|
||
|
answer.appendCopy(*ra2, idx2)
|
||
|
idx2++
|
||
|
if idx2 == length2 {
|
||
|
break
|
||
|
}
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if idx1 < length1 {
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
for key1 <= last {
|
||
|
answer.appendCopy(*ra1, idx1)
|
||
|
idx1++
|
||
|
if idx1 == length1 {
|
||
|
break
|
||
|
}
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
}
|
||
|
}
|
||
|
return answer
|
||
|
}
|
||
|
|
||
|
func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||
|
length1 := ra1.size()
|
||
|
length2 := ra2.size()
|
||
|
|
||
|
idx1 := 0
|
||
|
idx2 := parNaiveStartAt(ra2, start, last)
|
||
|
|
||
|
var key1 uint16
|
||
|
var key2 uint16
|
||
|
if idx1 < length1 && idx2 < length2 {
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
|
||
|
for key1 <= last && key2 <= last {
|
||
|
if key1 < key2 {
|
||
|
idx1++
|
||
|
if idx1 >= length1 {
|
||
|
break
|
||
|
}
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
} else if key1 > key2 {
|
||
|
ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
|
||
|
ra1.needCopyOnWrite[idx1] = true
|
||
|
idx2++
|
||
|
idx1++
|
||
|
length1++
|
||
|
if idx2 >= length2 {
|
||
|
break
|
||
|
}
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
} else {
|
||
|
c1 := ra1.getFastContainerAtIndex(idx1, true)
|
||
|
|
||
|
ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||
|
ra1.needCopyOnWrite[idx1] = false
|
||
|
idx1++
|
||
|
idx2++
|
||
|
if idx1 >= length1 || idx2 >= length2 {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
key1 = ra1.getKeyAtIndex(idx1)
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if idx2 < length2 {
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
for key2 <= last {
|
||
|
ra1.appendCopy(*ra2, idx2)
|
||
|
idx2++
|
||
|
if idx2 >= length2 {
|
||
|
break
|
||
|
}
|
||
|
key2 = ra2.getKeyAtIndex(idx2)
|
||
|
}
|
||
|
}
|
||
|
return ra1
|
||
|
}
|