-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathinvertible_bloom_filter.go
161 lines (134 loc) · 3.67 KB
/
invertible_bloom_filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package difference_digest
import (
"database/sql"
"fmt"
)
// InvertibleBloomFilter is a data structure for compactly storing a recoverable representation of a set
// See: https://www.ics.uci.edu/~eppstein/pubs/EppGooUye-SIGCOMM-11.pdf
type InvertibleBloomFilter struct {
Cells []IBFCell
Size int
}
// IBFCell represents one cell of an Invertible Bloom Filter
type IBFCell struct {
IDSum Bitmap
HashSum Bitmap
Count int64
}
// Insert inserts an element into the IBF Cell
func (b *IBFCell) Insert(s uint64) {
b.IDSum.XOR(ToBitmap(s))
b.HashSum.XOR(ToBitmap(checkSumHash(s)))
b.Count++
}
// Subtract the value of another IBF Cell
func (b *IBFCell) Subtract(b2 *IBFCell) {
b.IDSum.XOR(&b2.IDSum)
b.HashSum.XOR(&b2.HashSum)
b.Count -= b2.Count
}
// IsPure returns true when the IBFCell has a Count of 1 or -1 and the HashSum is identical to the IDSum
func (b *IBFCell) IsPure() bool {
return (b.Count == 1 || b.Count == -1) && b.HashSum.Uint64() == checkSumHash(b.IDSum.Uint64())
}
// IsZero returns true when the IBFCell is empty (all values equal to 0)
func (b *IBFCell) IsZero() bool {
return b.IDSum.IsZero() && b.HashSum.IsZero() && b.Count == 0
}
// NewIBF initalizes an empty InvertibleBloomFilter
func NewIBF(size int) *InvertibleBloomFilter {
return &InvertibleBloomFilter{
Cells: make([]IBFCell, size),
Size: size,
}
}
// EncodeIBFDB encodes an InvertibleBloomFilter from a column in a database table;
// currently, only PostgreSQL is supported and column mast have type BIGINT/INT8
func EncodeIBFDB(size int, db *sql.DB, table string, column string) (*InvertibleBloomFilter, error) {
rows, err := db.Query(fmt.Sprintf(query("ibf"), table, column, size))
if err != nil {
return nil, err
}
defer rows.Close()
b := NewIBF(size)
for rows.Next() {
var (
cell int
IDSum, HashSum uint64
Count int64
)
err := rows.Scan(&cell, &IDSum, &HashSum, &Count)
if err != nil {
return nil, err
}
idBitmap := ToBitmap(IDSum)
hashBitmap := ToBitmap(HashSum)
el := IBFCell{
IDSum: *idBitmap,
HashSum: *hashBitmap,
Count: Count,
}
b.Cells[cell] = el
}
return b, nil
}
// Add inserts an element into the InvertibleBloomFilter
func (ibf *InvertibleBloomFilter) Add(s uint64) {
for _, h := range indiciesHashes(s) {
j := h % uint64(ibf.Size)
ibf.Cells[j].Insert(s)
}
}
// Subtract computes the difference between 2 Invertible Bloom Filter
func (ibf *InvertibleBloomFilter) Subtract(ibf2 *InvertibleBloomFilter) *InvertibleBloomFilter {
difference := NewIBF(ibf.Size)
copy(difference.Cells, ibf.Cells)
for j := 0; j < ibf.Size; j++ {
difference.Cells[j].Subtract(&ibf2.Cells[j])
}
return difference
}
// Decode recovers the Cells represented in the Invertible Bloom Filter (use only after performing a Subtract())
func (ibf *InvertibleBloomFilter) Decode() (aWithoutB []uint64, bWithoutA []uint64, ok bool) {
pureList := make([]int, 0)
for {
n := len(pureList) - 1
if n == -1 {
for j := 0; j < ibf.Size; j++ {
if ibf.Cells[j].IsPure() {
pureList = append(pureList, j)
}
}
if len(pureList) == 0 {
break
}
continue
}
j := pureList[n]
pureList = pureList[:n]
if !ibf.Cells[j].IsPure() {
continue
}
s := ibf.Cells[j].IDSum
c := ibf.Cells[j].Count
if c > 0 {
aWithoutB = append(aWithoutB, s.Uint64())
} else {
bWithoutA = append(bWithoutA, s.Uint64())
}
for _, h := range indiciesHashes(s.Uint64()) {
j2 := h % uint64(ibf.Size)
ibf.Cells[j2].IDSum.XOR(&s)
ibf.Cells[j2].HashSum.XOR(ToBitmap(checkSumHash(s.Uint64())))
ibf.Cells[j2].Count -= c
}
}
for j := 0; j < ibf.Size; j++ {
if !ibf.Cells[j].IsZero() {
ok = false
return
}
}
ok = true
return
}