7 "golang.org/x/crypto/blake2b"
9 "github.com/mjl-/mox/mlog"
12// see https://en.wikipedia.org/wiki/Bloom_filter
14var errWidth = errors.New("k and width wider than 256 bits and width not more than 32")
15var errPowerOfTwo = errors.New("data not a power of two")
17// Bloom is a bloom filter.
20 k int // Number of bits we store/lookup in the bloom filter per value.
21 w int // Number of bits needed to address a single bit position.
24 log mlog.Log // For cid logging.
27func bloomWidth(fileSize int) int {
29 for bits := uint32(fileSize * 8); bits > 1; bits >>= 1 {
35// BloomValid returns an error if the bloom file parameters are not correct.
36func BloomValid(fileSize int, k int) error {
37 _, err := bloomValid(fileSize, k)
41func bloomValid(fileSize, k int) (int, error) {
42 w := bloomWidth(fileSize)
43 if 1<<w != fileSize*8 {
44 return 0, errPowerOfTwo
46 if k*w > 256 || w > 32 {
52// NewBloom returns a bloom filter with given initial data.
54// The number of bits in data must be a power of 2.
55// K is the number of "hashes" (bits) to store/lookup for each value stored.
56// Width is calculated as the number of bits needed to represent a single bit/hash
57// position in the data.
59// For each value stored/looked up, a hash over the value is calculated. The hash
60// is split into "k" values that are "width" bits wide, each used to lookup a bit.
61// K * width must not exceed 256.
62func NewBloom(log mlog.Log, data []byte, k int) (*Bloom, error) {
63 w, err := bloomValid(len(data), k)
76func (b *Bloom) Add(s string) {
77 h := hash([]byte(s), b.w)
83func (b *Bloom) Has(s string) bool {
84 h := hash([]byte(s), b.w)
86 if !b.has(h.nextPos()) {
93func (b *Bloom) Bytes() []byte {
97func (b *Bloom) Modified() bool {
101// Ones returns the number of ones.
102func (b *Bloom) Ones() (n int) {
103 for _, d := range b.data {
114func (b *Bloom) Write(path string) error {
115 f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0660)
119 if _, err := f.Write(b.data); err != nil {
121 b.log.Check(xerr, "closing bloom file after write failed")
124 if err := f.Close(); err != nil {
131func (b *Bloom) has(p int) bool {
132 v := b.data[p>>3] >> (7 - (p & 7))
136func (b *Bloom) set(p int) {
139 var v byte = 1 << (7 - bi)
140 if b.data[by]&v == 0 {
147 width int // Number of bits for each position.
148 buf []byte // Remaining bytes to use for next position.
149 cur uint64 // Bits to read next position from. Replenished from buf.
150 ncur int // Number of bits available in cur. We consume the highest bits first.
153func hash(v []byte, width int) *bits {
154 buf := blake2b.Sum256(v)
155 return &bits{width: width, buf: buf[:]}
158// nextPos returns the next bit position.
159func (b *bits) nextPos() (v int) {
160 if b.width > b.ncur {
161 for len(b.buf) > 0 && b.ncur < 64-8 {
163 b.cur |= uint64(b.buf[0])
168 v = int((b.cur >> (b.ncur - b.width)) & ((1 << b.width) - 1))