Skip to content

Commit

Permalink
re jaybaird#2, add changelog, union, intersection and copy to BloomFi…
Browse files Browse the repository at this point in the history
…lter, not yet available in SBF. Bumped version number to 1.1
  • Loading branch information
jaybaird committed Mar 11, 2011
1 parent e505911 commit 0688743
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Changes in 1.1
==============
Added copy, intersection and union functions to BloomFilter
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) <2009> <Jay Baird and Bob Ippolito>
Copyright (c) <2011> <Jay Baird and Bob Ippolito>

Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
Expand Down
29 changes: 28 additions & 1 deletion pybloom/pybloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
except ImportError:
raise ImportError('pybloom requires bitarray >= 0.3.4')

__version__ = '1.0.3'
__version__ = '1.1'
__author__ = "Jay Baird <[email protected]>, Bob Ippolito <[email protected]>,\
Marius Eriksen <[email protected]>, Alex Brassetvik <[email protected]>"

Expand Down Expand Up @@ -187,6 +187,33 @@ def add(self, key, skip_check=False):
self.count += 1
return False

def copy(self):
"""Return a copy of this bloom filter.
"""
new_filter = BloomFilter(self.capacity, self.error_rate)
new_filter.bitarray = self.bitarray.copy()
return new_filter

def union(self, other):
""" Calculates the union of the two underlying bitarrays and returns
a new bloom filter object."""
new_bloom = self.copy()
new_bloom.bitarray = new_bloom.bitarray | other.bitarray
return new_bloom

def __or__(self, other):
return self.union(other)

def intersection(self, other):
""" Calculates the union of the two underlying bitarrays and returns
a new bloom filter object."""
new_bloom = self.copy()
new_bloom.bitarray = new_bloom.bitarray & other.bitarray
return new_bloom

def __and__(self, other):
return self.intersection(other)

def tofile(self, f):
"""Write the bloom filter to file object `f'. Underlying bits
are written as machine values. This is much more space
Expand Down
27 changes: 27 additions & 0 deletions pybloom/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,33 @@ def additional_tests():
suite.addTest(doctest.DocFileSuite(readme_fn, module_relative=False))
return suite

class TestUnionIntersection(unittest.TestCase):
def test_union(self):
bloom_one = BloomFilter(100, 0.001)
bloom_two = BloomFilter(100, 0.001)
chars = [chr(i) for i in range(97, 123)]
for char in chars[len(chars)/2:]:
bloom_one.add(char)
for char in chars[:len(chars)/2]:
bloom_two.add(char)
new_bloom = bloom_one.union(bloom_two)
for char in chars:
assert(char in new_bloom)

def test_intersection(self):
bloom_one = BloomFilter(100, 0.001)
bloom_two = BloomFilter(100, 0.001)
chars = [chr(i) for i in range(97, 123)]
for char in chars:
bloom_one.add(char)
for char in chars[:len(chars)/2]:
bloom_two.add(char)
new_bloom = bloom_one.intersection(bloom_two)
for char in chars[:len(chars)/2]:
assert(char in new_bloom)
for char in chars[len(chars)/2:]:
assert(char not in new_bloom)

class Serialization(unittest.TestCase):
SIZE = 12345
EXPECTED = set([random.randint(0, 10000100) for _ in xrange(SIZE)])
Expand Down

0 comments on commit 0688743

Please sign in to comment.