7 "golang.org/x/crypto/blake2b"
10// see https://en.wikipedia.org/wiki/Bloom_filter
12var errWidth = errors.New("k and width wider than 256 bits and width not more than 32")
13var errPowerOfTwo = errors.New("data not a power of two")
15// Bloom is a bloom filter.
18 k int // Number of bits we store/lookup in the bloom filter per value.
19 w int // Number of bits needed to address a single bit position.
23func bloomWidth(fileSize int) int {
25 for bits := uint32(fileSize * 8); bits > 1; bits >>= 1 {
31// BloomValid returns an error if the bloom file parameters are not correct.
32func BloomValid(fileSize int, k int) error {
33 _, err := bloomValid(fileSize, k)
37func bloomValid(fileSize, k int) (int, error) {
38 w := bloomWidth(fileSize)
39 if 1<<w != fileSize*8 {
40 return 0, errPowerOfTwo
42 if k*w > 256 || w > 32 {
48// NewBloom returns a bloom filter with given initial data.
50// The number of bits in data must be a power of 2.
51// K is the number of "hashes" (bits) to store/lookup for each value stored.
52// Width is calculated as the number of bits needed to represent a single bit/hash
53// position in the data.
55// For each value stored/looked up, a hash over the value is calculated. The hash
56// is split into "k" values that are "width" bits wide, each used to lookup a bit.
57// K * width must not exceed 256.
58func NewBloom(data []byte, k int) (*Bloom, error) {
59 w, err := bloomValid(len(data), k)
71func (b *Bloom) Add(s string) {
72 h := hash([]byte(s), b.w)
73 for i := 0; i < b.k; i++ {
78func (b *Bloom) Has(s string) bool {
79 h := hash([]byte(s), b.w)
80 for i := 0; i < b.k; i++ {
81 if !b.has(h.nextPos()) {
88func (b *Bloom) Bytes() []byte {
92func (b *Bloom) Modified() bool {
96// Ones returns the number of ones.
97func (b *Bloom) Ones() (n int) {
98 for _, d := range b.data {
99 for i := 0; i < 8; i++ {
109func (b *Bloom) Write(path string) error {
110 f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0660)
114 if _, err := f.Write(b.data); err != nil {
118 if err := f.Close(); err != nil {
125func (b *Bloom) has(p int) bool {
126 v := b.data[p>>3] >> (7 - (p & 7))
130func (b *Bloom) set(p int) {
133 var v byte = 1 << (7 - bi)
134 if b.data[by]&v == 0 {
141 width int // Number of bits for each position.
142 buf []byte // Remaining bytes to use for next position.
143 cur uint64 // Bits to read next position from. Replenished from buf.
144 ncur int // Number of bits available in cur. We consume the highest bits first.
147func hash(v []byte, width int) *bits {
148 buf := blake2b.Sum256(v)
149 return &bits{width: width, buf: buf[:]}
152// nextPos returns the next bit position.
153func (b *bits) nextPos() (v int) {
154 if b.width > b.ncur {
155 for len(b.buf) > 0 && b.ncur < 64-8 {
157 b.cur |= uint64(b.buf[0])
162 v = int((b.cur >> (b.ncur - b.width)) & ((1 << b.width) - 1))