哪个散列函数更适合表示小散列表中的128位随机id

Question

在我的课程中，我有以下练习：

我有128位的GUID（全球唯一标识符）。

哪个哈希函数更好地表示hashID为000到899的桶中的值，每个桶有100个空闲位置来存储哈希冲突？

我想比较以下哈希函数：

a) h(a) = a mod 900
b) h(a) = a mod 887
c) h(a) = a^2 mod 887
d) there are not enough information to answer this question

我得到了什么：

我认为使用^ 2并不是更好，因为它只会在前几千个ID中给我们带来好处，它们应该更好地分布但是之后，我可能需要进行更多的碰撞探测以将这些值存储在其他桶。

我试图完成上面描述的行为：在下面的代码片段中，我生成90000个“随机”唯一数字，这些数字存储在地图中，其中散列函数遵循mod 900.我知道由于某些原因，素数是首选使用用于散列函数。

随机性仅实现最大32位。但我认为这不应该太重要，因为我没有使用128位最大值。

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 900);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

具有相同功能但使用mod 887的下一个代码段：

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

和^ 2：

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(Math.pow(getRandomInt(2147483647),2), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

一切都在里面：

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 900);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

m = null;
uniqueMap = new Map();
map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

m = null;
uniqueMap = new Map();
map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(Math.pow(getRandomInt(2147483647),2), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

如果我正在比较这3个方法，他们告诉我，mod a ^ 2的最高碰撞次数高于887和900，而没有为guid供电。所以我认为这不是正确的答案。

但是我应该如何比较其他两个呢？他们向我展示了类似的峰值，只有很小的差异。

Answer 1

您可以通过简单地检查具有较少数量的因子来比较其他两个，因为素数具有较少因素用于散列。

两者之间的差异可以忽略的原因主要是由于您使用的哈希函数。您的散列函数已经提供了良好的分布值。但问题是关于直接比较。最好的方法是选择素数为887的素数

在cs.stackexchange中有一个非常好的解释

请访问此链接了解更多信息https://cs.stackexchange.com/questions/11029/why-is-it-best-to-use-a-prime-number-as-a-mod-in-a-hashing-function

这是关于模块化哈希https://algs4.cs.princeton.edu/34hash/的更多细节

哪个散列函数更适合表示小散列表中的128位随机id

问题描述投票：3回答：1

1个回答

最新问题

哪个散列函数更适合表示小散列表中的128位随机id

问题描述 投票：3回答：1

1个回答

最新问题

问题描述投票：3回答：1