127 lines
3.7 KiB
Java
127 lines
3.7 KiB
Java
/*
|
||
* The MIT License (MIT)
|
||
*
|
||
* Copyright © 2021 xrv <xrv@live.com>
|
||
*
|
||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
* of this software and associated documentation files (the "Software"), to deal
|
||
* in the Software without restriction, including without limitation the rights
|
||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
* copies of the Software, and to permit persons to whom the Software is
|
||
* furnished to do so, subject to the following conditions:
|
||
*
|
||
* The above copyright notice and this permission notice shall be included in
|
||
* all copies or substantial portions of the Software.
|
||
*
|
||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
* THE SOFTWARE.
|
||
*/
|
||
|
||
package io.github.ehlxr.util;
|
||
|
||
import java.util.BitSet;
|
||
|
||
/**
|
||
* @author ehlxr
|
||
* @since 2021-12-16 09:36.
|
||
*/
|
||
public class MyBloomFilter {
|
||
/**
|
||
* 一个长度为10 亿的比特位
|
||
*/
|
||
private static final int DEFAULT_SIZE = 256 << 22;
|
||
|
||
/**
|
||
* 为了降低错误率,使用加法hash算法,所以定义一个8个元素的质数数组
|
||
*/
|
||
private static final int[] SEEDS = {3, 5, 7, 11, 13, 31, 37, 61};
|
||
|
||
/**
|
||
* 相当于构建 8 个不同的hash算法
|
||
*/
|
||
private static final HashFunction[] HASH_FUNCS = new HashFunction[SEEDS.length];
|
||
|
||
/**
|
||
* 初始化布隆过滤器的 bitmap
|
||
*/
|
||
private static final BitSet BIT_SET = new BitSet(DEFAULT_SIZE);
|
||
|
||
/**
|
||
* 添加数据
|
||
*
|
||
* @param value 需要加入的值
|
||
*/
|
||
public static void add(String value) {
|
||
if (value != null) {
|
||
for (HashFunction f : HASH_FUNCS) {
|
||
//计算 hash 值并修改 bitmap 中相应位置为 true
|
||
BIT_SET.set(f.hash(value), true);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 判断相应元素是否存在
|
||
*
|
||
* @param value 需要判断的元素
|
||
* @return 结果
|
||
*/
|
||
public static boolean contains(String value) {
|
||
if (value == null) {
|
||
return false;
|
||
}
|
||
boolean ret = true;
|
||
for (HashFunction f : HASH_FUNCS) {
|
||
ret = BIT_SET.get(f.hash(value));
|
||
//一个 hash 函数返回 false 则跳出循环
|
||
if (!ret) {
|
||
break;
|
||
}
|
||
}
|
||
return ret;
|
||
}
|
||
|
||
/**
|
||
* 模拟用户是不是会员,或用户在不在线。。。
|
||
*/
|
||
public static void main(String[] args) {
|
||
for (int i = 0; i < SEEDS.length; i++) {
|
||
HASH_FUNCS[i] = new HashFunction(DEFAULT_SIZE, SEEDS[i]);
|
||
}
|
||
|
||
// 添加1亿数据
|
||
for (int i = 0; i < 1_0000_0000; i++) {
|
||
add(String.valueOf(i));
|
||
}
|
||
String id = "123456789";
|
||
add(id);
|
||
|
||
System.out.println(contains(id)); // true
|
||
System.out.println("" + contains("234567890")); //false
|
||
}
|
||
}
|
||
|
||
class HashFunction {
|
||
private final int size;
|
||
private final int seed;
|
||
|
||
public HashFunction(int size, int seed) {
|
||
this.size = size;
|
||
this.seed = seed;
|
||
}
|
||
|
||
public int hash(String value) {
|
||
int result = 0;
|
||
int len = value.length();
|
||
for (int i = 0; i < len; i++) {
|
||
result = seed * result + value.charAt(i);
|
||
}
|
||
int r = (size - 1) & result;
|
||
return (size - 1) & result;
|
||
}
|
||
} |