windowcapture
исходный код / Helpers/BloomFilter.cs

BloomFilter.cs

112 строк · 3,896 байт · модуль Helpers
  1using System;
  2using System.IO;
  3using System.Text;
  4
  5namespace WindowCapture.Helpers
  6{
  7    /// <summary>
  8    /// Bloom Filter: probabilistic set membership test.
  9    /// Replaces HashSet of 1.5M strings (~60MB) with ~2MB bit array.
 10    /// False positive rate < 1%. No false negatives.
 11    /// </summary>
 12    public class BloomFilter
 13    {
 14        private byte[] data;  // bit array stored as bytes
 15        private int bitCount; // total bits
 16        private int hashCount; // number of hash functions
 17
 18        public int BitCount { get { return bitCount; } }
 19
 20        /// <summary>Create empty bloom filter. bitsPerItem=16 gives FPR < 0.5% with 3 hashes.</summary>
 21        public BloomFilter(int expectedItems, int bitsPerItem = 16, int numHashes = 3)
 22        {
 23            bitCount = expectedItems * bitsPerItem;
 24            // Round up to byte boundary
 25            data = new byte[(bitCount + 7) / 8];
 26            hashCount = numHashes;
 27        }
 28
 29        /// <summary>Load from binary file.</summary>
 30        public BloomFilter(string filePath)
 31        {
 32            byte[] raw = File.ReadAllBytes(filePath);
 33            // Header: 4 bytes bitCount + 4 bytes hashCount
 34            bitCount = BitConverter.ToInt32(raw, 0);
 35            hashCount = BitConverter.ToInt32(raw, 4);
 36            data = new byte[raw.Length - 8];
 37            Buffer.BlockCopy(raw, 8, data, 0, data.Length);
 38        }
 39
 40        public void Add(string item)
 41        {
 42            string lower = item.ToLower();
 43            uint h1 = MurmurHash(lower, 0);
 44            uint h2 = MurmurHash(lower, h1);
 45            for (int i = 0; i < hashCount; i++)
 46            {
 47                uint pos = (uint)((h1 + (ulong)i * h2) % (uint)bitCount);
 48                data[pos / 8] |= (byte)(1 << (int)(pos % 8));
 49            }
 50        }
 51
 52        public bool MayContain(string item)
 53        {
 54            string lower = item.ToLower();
 55            uint h1 = MurmurHash(lower, 0);
 56            uint h2 = MurmurHash(lower, h1);
 57            for (int i = 0; i < hashCount; i++)
 58            {
 59                uint pos = (uint)((h1 + (ulong)i * h2) % (uint)bitCount);
 60                if ((data[pos / 8] & (1 << (int)(pos % 8))) == 0)
 61                    return false;
 62            }
 63            return true;
 64        }
 65
 66        /// <summary>Save to binary file.</summary>
 67        public void Save(string filePath)
 68        {
 69            using (var fs = new FileStream(filePath, FileMode.Create))
 70            using (var bw = new BinaryWriter(fs))
 71            {
 72                bw.Write(bitCount);
 73                bw.Write(hashCount);
 74                bw.Write(data);
 75            }
 76        }
 77
 78        // MurmurHash3 - fast, good distribution
 79        private static uint MurmurHash(string key, uint seed)
 80        {
 81            byte[] bytes = Encoding.UTF8.GetBytes(key);
 82            uint h = seed;
 83            uint c1 = 0xcc9e2d51, c2 = 0x1b873593;
 84            int len = bytes.Length;
 85            int blocks = len / 4;
 86
 87            for (int i = 0; i < blocks; i++)
 88            {
 89                uint k = BitConverter.ToUInt32(bytes, i * 4);
 90                k *= c1; k = RotateLeft(k, 15); k *= c2;
 91                h ^= k; h = RotateLeft(h, 13); h = h * 5 + 0xe6546b64;
 92            }
 93
 94            uint tail = 0;
 95            int tailIdx = blocks * 4;
 96            switch (len & 3)
 97            {
 98                case 3: tail ^= (uint)bytes[tailIdx + 2] << 16; goto case 2;
 99                case 2: tail ^= (uint)bytes[tailIdx + 1] << 8; goto case 1;
100                case 1: tail ^= bytes[tailIdx]; tail *= c1; tail = RotateLeft(tail, 15); tail *= c2; h ^= tail; break;
101            }
102
103            h ^= (uint)len;
104            h ^= h >> 16; h *= 0x85ebca6b;
105            h ^= h >> 13; h *= 0xc2b2ae35;
106            h ^= h >> 16;
107            return h;
108        }
109
110        private static uint RotateLeft(uint x, int r) { return (x << r) | (x >> (32 - r)); }
111    }
112}