1using System; 2using System.IO; 3using System.Text; 4 5namespace WindowCapture.Helpers 6{ 7 /// <summary> 8 /// Bloom Filter: probabilistic set membership test. 9 /// Replaces HashSet of 1.5M strings (~60MB) with ~2MB bit array. 10 /// False positive rate < 1%. No false negatives. 11 /// </summary> 12 public class BloomFilter 13 { 14 private byte[] data; // bit array stored as bytes 15 private int bitCount; // total bits 16 private int hashCount; // number of hash functions 17 18 public int BitCount { get { return bitCount; } } 19 20 /// <summary>Create empty bloom filter. bitsPerItem=16 gives FPR < 0.5% with 3 hashes.</summary> 21 public BloomFilter(int expectedItems, int bitsPerItem = 16, int numHashes = 3) 22 { 23 bitCount = expectedItems * bitsPerItem; 24 // Round up to byte boundary 25 data = new byte[(bitCount + 7) / 8]; 26 hashCount = numHashes; 27 } 28 29 /// <summary>Load from binary file.</summary> 30 public BloomFilter(string filePath) 31 { 32 byte[] raw = File.ReadAllBytes(filePath); 33 // Header: 4 bytes bitCount + 4 bytes hashCount 34 bitCount = BitConverter.ToInt32(raw, 0); 35 hashCount = BitConverter.ToInt32(raw, 4); 36 data = new byte[raw.Length - 8]; 37 Buffer.BlockCopy(raw, 8, data, 0, data.Length); 38 } 39 40 public void Add(string item) 41 { 42 string lower = item.ToLower(); 43 uint h1 = MurmurHash(lower, 0); 44 uint h2 = MurmurHash(lower, h1); 45 for (int i = 0; i < hashCount; i++) 46 { 47 uint pos = (uint)((h1 + (ulong)i * h2) % (uint)bitCount); 48 data[pos / 8] |= (byte)(1 << (int)(pos % 8)); 49 } 50 } 51 52 public bool MayContain(string item) 53 { 54 string lower = item.ToLower(); 55 uint h1 = MurmurHash(lower, 0); 56 uint h2 = MurmurHash(lower, h1); 57 for (int i = 0; i < hashCount; i++) 58 { 59 uint pos = (uint)((h1 + (ulong)i * h2) % (uint)bitCount); 60 if ((data[pos / 8] & (1 << (int)(pos % 8))) == 0) 61 return false; 62 } 63 return true; 64 } 65 66 /// <summary>Save to binary file.</summary> 67 public void Save(string filePath) 68 { 69 using (var fs = new FileStream(filePath, FileMode.Create)) 70 using (var bw = new BinaryWriter(fs)) 71 { 72 bw.Write(bitCount); 73 bw.Write(hashCount); 74 bw.Write(data); 75 } 76 } 77 78 // MurmurHash3 - fast, good distribution 79 private static uint MurmurHash(string key, uint seed) 80 { 81 byte[] bytes = Encoding.UTF8.GetBytes(key); 82 uint h = seed; 83 uint c1 = 0xcc9e2d51, c2 = 0x1b873593; 84 int len = bytes.Length; 85 int blocks = len / 4; 86 87 for (int i = 0; i < blocks; i++) 88 { 89 uint k = BitConverter.ToUInt32(bytes, i * 4); 90 k *= c1; k = RotateLeft(k, 15); k *= c2; 91 h ^= k; h = RotateLeft(h, 13); h = h * 5 + 0xe6546b64; 92 } 93 94 uint tail = 0; 95 int tailIdx = blocks * 4; 96 switch (len & 3) 97 { 98 case 3: tail ^= (uint)bytes[tailIdx + 2] << 16; goto case 2; 99 case 2: tail ^= (uint)bytes[tailIdx + 1] << 8; goto case 1; 100 case 1: tail ^= bytes[tailIdx]; tail *= c1; tail = RotateLeft(tail, 15); tail *= c2; h ^= tail; break; 101 } 102 103 h ^= (uint)len; 104 h ^= h >> 16; h *= 0x85ebca6b; 105 h ^= h >> 13; h *= 0xc2b2ae35; 106 h ^= h >> 16; 107 return h; 108 } 109 110 private static uint RotateLeft(uint x, int r) { return (x << r) | (x >> (32 - r)); } 111 } 112}