budd/budd-common/src/main/java/io/github/ehlxr/util/MyBloomFilter.java

127 lines
3.7 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
* The MIT License (MIT)
*
* Copyright © 2021 xrv <xrv@live.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package io.github.ehlxr.util;
import java.util.BitSet;
/**
* @author ehlxr
* @since 2021-12-16 09:36.
*/
public class MyBloomFilter {
/**
* 一个长度为10 亿的比特位
*/
private static final int DEFAULT_SIZE = 256 << 22;
/**
* 为了降低错误率使用加法hash算法所以定义一个8个元素的质数数组
*/
private static final int[] SEEDS = {3, 5, 7, 11, 13, 31, 37, 61};
/**
* 相当于构建 8 个不同的hash算法
*/
private static final HashFunction[] HASH_FUNCS = new HashFunction[SEEDS.length];
/**
* 初始化布隆过滤器的 bitmap
*/
private static final BitSet BIT_SET = new BitSet(DEFAULT_SIZE);
/**
* 添加数据
*
* @param value 需要加入的值
*/
public static void add(String value) {
if (value != null) {
for (HashFunction f : HASH_FUNCS) {
//计算 hash 值并修改 bitmap 中相应位置为 true
BIT_SET.set(f.hash(value), true);
}
}
}
/**
* 判断相应元素是否存在
*
* @param value 需要判断的元素
* @return 结果
*/
public static boolean contains(String value) {
if (value == null) {
return false;
}
boolean ret = true;
for (HashFunction f : HASH_FUNCS) {
ret = BIT_SET.get(f.hash(value));
//一个 hash 函数返回 false 则跳出循环
if (!ret) {
break;
}
}
return ret;
}
/**
* 模拟用户是不是会员,或用户在不在线。。。
*/
public static void main(String[] args) {
for (int i = 0; i < SEEDS.length; i++) {
HASH_FUNCS[i] = new HashFunction(DEFAULT_SIZE, SEEDS[i]);
}
// 添加1亿数据
for (int i = 0; i < 1_0000_0000; i++) {
add(String.valueOf(i));
}
String id = "123456789";
add(id);
System.out.println(contains(id)); // true
System.out.println("" + contains("234567890")); //false
}
}
class HashFunction {
private final int size;
private final int seed;
public HashFunction(int size, int seed) {
this.size = size;
this.seed = seed;
}
public int hash(String value) {
int result = 0;
int len = value.length();
for (int i = 0; i < len; i++) {
result = seed * result + value.charAt(i);
}
int r = (size - 1) & result;
return (size - 1) & result;
}
}