Browse Source

Initial import.

Yawning Angel 1 year ago
commit
5249ee5290
3 changed files with 359 additions and 0 deletions
  1. 122 0
      LICENSE
  2. 146 0
      bloom.go
  3. 91 0
      bloom_test.go

+ 122 - 0
LICENSE

@@ -0,0 +1,122 @@
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+    HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+  i. the right to reproduce, adapt, distribute, perform, display,
+     communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+     likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+     subject to the limitations in paragraph 4(a), below;
+  v. rights protecting the extraction, dissemination, use and reuse of data
+     in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+     European Parliament and of the Council of 11 March 1996 on the legal
+     protection of databases, and under any national implementation
+     thereof, including any amended or successor version of such
+     directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+     world based on applicable law or treaty, and any national
+     implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+    surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+    warranties of any kind concerning the Work, express, implied,
+    statutory or otherwise, including without limitation warranties of
+    title, merchantability, fitness for a particular purpose, non
+    infringement, or the absence of latent or other defects, accuracy, or
+    the present or absence of errors, whether or not discoverable, all to
+    the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+    that may apply to the Work or any use thereof, including without
+    limitation any person's Copyright and Related Rights in the Work.
+    Further, Affirmer disclaims responsibility for obtaining any necessary
+    consents, permissions or other rights required for any use of the
+    Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+    party to this document and has no duty or obligation with respect to
+    this CC0 or use of the Work.
+

+ 146 - 0
bloom.go

@@ -0,0 +1,146 @@
+// bloom.go - Bloom filter.
+// Written in 2015 by Yawning Angel
+//
+// To the extent possible under law, the author(s) have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along
+// with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+// Package bloom implements a Bloom Filter.
+//
+// Note that the test and set operation is not constant time, and the the max
+// backing size is limited to 2^31 bytes.
+package bloom
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/dchest/siphash"
+)
+
+const maxNrHashes = 32
+
+// Filter is a bloom filter.
+type Filter struct {
+	b        []byte
+	hashMask uint32
+
+	k1, k2 uint64
+
+	nrHashes     int
+	nrEntriesMax int
+	nrEntries    int
+}
+
+// New constructs a new Filter with a filter set size 2^mLn2 bits, and false
+// postive rate p.
+func New(rand io.Reader, mLn2 int, p float64) (*Filter, error) {
+	const (
+		ln2     = 0.69314718055994529
+		ln2Sq   = 0.48045301391820139
+		maxMln2 = 31
+	)
+
+	var key [16]byte
+	if _, err := io.ReadFull(rand, key[:]); err != nil {
+		return nil, err
+	}
+
+	if mLn2 > maxMln2 {
+		return nil, fmt.Errorf("requested filter too large: %d", mLn2)
+	}
+
+	m := 1 << uint32(mLn2)
+	n := -1.0 * float64(m) * ln2Sq / math.Log(p)
+	k := int((float64(m) * ln2 / n) + 0.5)
+
+	f := new(Filter)
+	f.k1 = binary.BigEndian.Uint64(key[0:8])
+	f.k2 = binary.BigEndian.Uint64(key[8:16])
+	f.nrEntriesMax = int(n)
+	f.nrHashes = k
+	f.hashMask = uint32(m - 1)
+	if f.nrHashes < 2 {
+		f.nrHashes = 2
+	}
+	if f.nrHashes > maxNrHashes {
+		return nil, fmt.Errorf("requested parameters need too many hashes")
+	}
+	f.b = make([]byte, m/8)
+	return f, nil
+}
+
+// MaxEntries returns the maximum capacity of the Filter.
+func (f *Filter) MaxEntries() int {
+	return f.nrEntriesMax
+}
+
+// Entries returns the number of entries that have been inserted into the
+// Filter.
+func (f *Filter) Entries() int {
+	return f.nrEntries
+}
+
+// TestAndSet tests the Filter for a given value's membership, adds the value
+// to the filter, and returns true iff it was present at the time of the call.
+func (f *Filter) TestAndSet(b []byte) bool {
+	var hashes [maxNrHashes]uint32
+	f.getHashes(b, &hashes)
+
+	// Just return true iff the entry is present.
+	if f.test(&hashes) {
+		return true
+	}
+
+	// Add and return false.
+	f.add(&hashes)
+	f.nrEntries++
+	return false
+}
+
+// Test tests the Filter for a given value's membership and returns true iff
+// it is present (or a false positive).
+func (f *Filter) Test(b []byte) bool {
+	var hashes [maxNrHashes]uint32
+	f.getHashes(b, &hashes)
+
+	return f.test(&hashes)
+}
+
+func (f *Filter) getHashes(b []byte, hashes *[maxNrHashes]uint32) {
+	// Per "Less Hashing, Same Performance: Building a Better Bloom Filter"
+	// (Kirsch and Miteznmacher), with a suitably good PRF, only two calls to
+	// the hash algorithm are needed.  As SipHash-2-4 returns a 64 bit digest,
+	// and we use 32 bit hashes for the filter, this results in only one
+	// invocation of SipHash-2-4.
+
+	baseHash := siphash.Hash(f.k1, f.k2, b)
+	hashes[0] = uint32(baseHash & math.MaxUint32)
+	hashes[1] = uint32(baseHash >> 32)
+	for i := 2; i < f.nrHashes; i++ {
+		hashes[i] = hashes[0] + uint32(i)*hashes[1]
+	}
+}
+
+func (f *Filter) test(hashes *[maxNrHashes]uint32) bool {
+	for i := 0; i < f.nrHashes; i++ {
+		idx := hashes[i] & f.hashMask
+		if 0 == f.b[idx/8]&(1<<(idx&7)) {
+			// Break out early if there is a miss.
+			return false
+		}
+	}
+	return true
+}
+
+func (f *Filter) add(hashes *[maxNrHashes]uint32) {
+	for i := 0; i < f.nrHashes; i++ {
+		idx := hashes[i] & f.hashMask
+		f.b[idx/8] |= (1 << (idx & 7))
+	}
+}

+ 91 - 0
bloom_test.go

@@ -0,0 +1,91 @@
+// bloom_test.go - Bloom filter tests.
+// Written in 2017 by Yawning Angel
+//
+// To the extent possible under law, the author(s) have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along
+// with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+package bloom
+
+import (
+	"crypto/rand"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestFilter(t *testing.T) {
+	const (
+		entryLength       = 32
+		falsePositiveRate = 0.01
+		filterSize        = 15 // 2^15 bits = 4 KiB
+	)
+
+	assert := assert.New(t)
+	require := require.New(t)
+
+	// 4 KiB filter, 0.01 false positive rate.
+	f, err := New(rand.Reader, filterSize, falsePositiveRate)
+	require.NoError(err, "New()")
+	assert.Equal(0, f.Entries(), "Entries(), empty filter")
+	assert.Equal(4096, len(f.b), "Backing store size")
+
+	// I could assert on these since the values calculated by New() are
+	// supposed to be optimal, but I won't for now.
+	t.Logf("Hashes: %v", f.nrHashes)         // 7 hashes is "ideal".
+	t.Logf("MaxEntries: %v", f.MaxEntries()) // 3418 entries with these params.
+
+	// Generate enough entries to fully saturate the filter.
+	max := f.MaxEntries()
+	entries := make(map[[entryLength]byte]bool)
+	for count := 0; count < max; {
+		var ent [entryLength]byte
+		rand.Read(ent[:])
+
+		// This needs to ignore false positives.
+		if !f.TestAndSet(ent[:]) {
+			entries[ent] = true
+			count++
+		}
+	}
+	assert.Equal(max, f.Entries(), "After populating")
+
+	// Ensure that all the entries are present in the filter.
+	idx := 0
+	for ent := range entries {
+		assert.True(f.Test(ent[:]), "Test(ent #: %v)", idx)
+		assert.True(f.TestAndSet(ent[:]), "TestAndSet(ent #: %v)", idx)
+		idx++
+	}
+
+	// Test the false positive rate, by generating another set of entries
+	// NOT in the filter, and counting the false positives.
+	//
+	// This may have suprious failures once in a blue moon because the
+	// algorithm is probabalistic, but that's *exceedingly* unlikely with
+	// the chosen delta.
+	randomEntries := make(map[[entryLength]byte]bool)
+	for count := 0; count < max; {
+		var ent [entryLength]byte
+		rand.Read(ent[:])
+		if !entries[ent] && !randomEntries[ent] {
+			randomEntries[ent] = true
+			count++
+		}
+	}
+	falsePositives := 0
+	for ent := range randomEntries {
+		if f.Test(ent[:]) {
+			falsePositives++
+		}
+	}
+	observedP := float64(falsePositives) / float64(max)
+	t.Logf("Observed False Positive Rate: %v", observedP)
+	assert.InDelta(falsePositiveRate, observedP, 0.02, "False positive rate")
+
+	assert.Equal(max, f.Entries(), "After tests") // Should still be = max.
+}