Browse Source

Initial import.

Yawning Angel 5 years ago
commit
072f307c51
2 changed files with 276 additions and 0 deletions
  1. 122 0
      LICENSE
  2. 154 0
      a2filter.go

+ 122 - 0
LICENSE

@@ -0,0 +1,122 @@
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+    HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+  i. the right to reproduce, adapt, distribute, perform, display,
+     communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+     likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+     subject to the limitations in paragraph 4(a), below;
+  v. rights protecting the extraction, dissemination, use and reuse of data
+     in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+     European Parliament and of the Council of 11 March 1996 on the legal
+     protection of databases, and under any national implementation
+     thereof, including any amended or successor version of such
+     directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+     world based on applicable law or treaty, and any national
+     implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+    surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+    warranties of any kind concerning the Work, express, implied,
+    statutory or otherwise, including without limitation warranties of
+    title, merchantability, fitness for a particular purpose, non
+    infringement, or the absence of latent or other defects, accuracy, or
+    the present or absence of errors, whether or not discoverable, all to
+    the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+    that may apply to the Work or any use thereof, including without
+    limitation any person's Copyright and Related Rights in the Work.
+    Further, Affirmer disclaims responsibility for obtaining any necessary
+    consents, permissions or other rights required for any use of the
+    Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+    party to this document and has no duty or obligation with respect to
+    this CC0 or use of the Work.
+

+ 154 - 0
a2filter.go

@@ -0,0 +1,154 @@
+// a2filter.go - A2 bloom filter
+//
+// To the extent possible under law, the Yawning Angel waived all copyright
+// and related or neighboring rights to or-ctl-filter, using the creative
+// commons "cc0" public domain dedication. See LICENSE or
+// <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
+
+// Package a2filter implements a SipHash-2-4 based Active-Active Bloom Filter.
+// It is designed to be stable over time even when filled to max capacity by
+// implementing the active-active buffering (A2 buffering) scheme presented in
+// "Aging Bloom Filter with Two Active Buffers for Dynamic Sets" (MyungKeun
+// Yoon).
+//
+// Note that none of the operations on the filter are constant time, and the
+// the max backing Bloom Filter size is limited to 2^31 bytes.  This package is
+// threadsafe.
+package a2filter
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"sync"
+
+	"github.com/dchest/siphash"
+)
+
+const (
+	ln2         = 0.69314718055994529
+	ln2Sq       = 0.48045301391820139
+	maxMln2     = 31
+	maxNrHashes = 32
+)
+
+// A2Filter is an Active-Active Bloom Filter.
+type A2Filter struct {
+	sync.Mutex
+	k1, k2 uint64
+
+	nrEntries    int
+	nrEntriesMax int
+
+	nrHashes int
+	hashMask uint32
+	active1  []byte
+	active2  []byte
+}
+
+// New constructs a new A2Filter with a filter set size 2^mLn2, and false
+// postive rate p.  The actual in memory footprint of the datastructure will be
+// approximately 2^(mLn2+1) bits due to the double buffered nature of the
+// filter.
+func New(mLn2 int, p float64) (*A2Filter, error) {
+	var key [16]byte
+	_, err := rand.Read(key[:])
+	if err != nil {
+		return nil, err
+	}
+
+	if mLn2 > maxMln2 {
+		return nil, fmt.Errorf("requested filter too large: %d", mLn2)
+	}
+
+	m := 1 << uint32(mLn2)
+	n := -1.0 * float64(m) * ln2Sq / math.Log(p)
+	k := int((float64(m) * ln2 / n) + 0.5)
+
+	f := new(A2Filter)
+	f.k1 = binary.BigEndian.Uint64(key[0:8])
+	f.k2 = binary.BigEndian.Uint64(key[8:16])
+	f.nrEntriesMax = int(n)
+	f.nrHashes = k
+	f.hashMask = uint32(m - 1)
+	if f.nrHashes < 2 {
+		f.nrHashes = 2
+	}
+	if f.nrHashes > maxNrHashes {
+		return nil, fmt.Errorf("requested parameters need too many hashes")
+	}
+	f.active1 = make([]byte, m/8)
+	f.active2 = make([]byte, m/8)
+	return f, nil
+}
+
+// TestAndSet tests the A2Filter for a given value's membership, adds the
+// value to the filter and returns if it was present at the time of the call.
+func (f *A2Filter) TestAndSet(b []byte) bool {
+	hashes := f.getHashes(b)
+
+	f.Lock()
+	defer f.Unlock()
+
+	// If the member is present in Active1, just return.
+	if f.testCache(f.active1, hashes) {
+		return true
+	}
+
+	// Test Active2 for membership, and add the value to Active1.
+	ret := f.testCache(f.active2, hashes)
+	if f.nrEntries++; f.nrEntries > f.nrEntriesMax {
+		// Active1 is full, clear Active2 and swap the buffers, this leaves
+		// Active1 empty, and Active2 populated to saturation, immediately
+		// after the tested entry will be added to Active1.
+		f.active2 = make([]byte, len(f.active2))
+		f.active1, f.active2 = f.active2, f.active1
+		f.nrEntries = 1
+	}
+	f.addActive1(hashes)
+	return ret
+}
+
+// MaxEntries returns the maximum capacity of the A2Filter.  This value is
+// usually an underestimate as the filter is double buffered, however entry
+// count accounting is only done for Active1, so Active2 should be ignored in
+// calculations.
+func (f *A2Filter) MaxEntries() int {
+	return f.nrEntriesMax
+}
+
+func (f *A2Filter) testCache(cache []byte, hashes []uint32) bool {
+	for i := 0; i < f.nrHashes; i++ {
+		idx := hashes[i] & f.hashMask
+		if 0 == cache[idx/8]&(1<<(idx&7)) {
+			// Break out early if there is a miss.
+			return false
+		}
+	}
+	return true
+}
+
+func (f *A2Filter) addActive1(hashes []uint32) {
+	for i := 0; i < f.nrHashes; i++ {
+		idx := hashes[i] & f.hashMask
+		f.active1[idx/8] |= (1 << (idx & 7))
+	}
+}
+
+func (f *A2Filter) getHashes(b []byte) []uint32 {
+	// Per "Less Hashing, Same Performance: Building a Better Bloom Filter"
+	// (Kirsch and Miteznmacher), with a suitably good PRF, only two calls to
+	// the hash algorithm are needed.  As SipHash-2-4 returns a 64 bit digest,
+	// and we use 32 bit hashes for the filter, this results in only one
+	// invocation of SipHash-2-4.
+
+	hashes := make([]uint32, f.nrHashes)
+	baseHash := siphash.Hash(f.k1, f.k2, b)
+	hashes[0] = uint32(baseHash & math.MaxUint32)
+	hashes[1] = uint32(baseHash >> 32)
+	for i := 2; i < f.nrHashes; i++ {
+		hashes[i] = hashes[0] + uint32(i)*hashes[1]
+	}
+	return hashes
+}