数据结构与算法-哈希表快速入门

tech2025-10-11 7

生活中常见的简单哈希表

leetcode案例

如下所示，可以看出哈希表是利用空间来换时间效率的，键值是通过自我定义的hash方式来计算。

/** * 第387号问题 https://leetcode-cn.com/problems/first-unique-character-in-a-string/submissions/ */ public class Solution { /** * 传入一个字符串 * @param s * @return 个数为1的字符在字符串中的索引位置 */ public int firstUniqChar(String s) { int[] freq = new int[26]; for (int i = 0; i < s.length(); i++) freq[s.charAt(i) - 'a']++; for (int i = 0; i < s.length(); i++) if (freq[s.charAt(i) - 'a'] == 1) return i; return -1; } }

hash函数的设计

对于整型

尽可能用趋近于元素个数的素数

对于浮点型

如下图所示，浮点数也都是32位或64位的二进制表示，同样可以转成整型进行处理

字符串和自定义类型

同样可以转成整型进行处理，具体实现如下图所示，即以进制的方式存储即可

对于上述的式子，我们可以做出如下的优化

自定义hash的示例

package com.study.hashTableDemo; import java.util.Objects; public class Student { int grade; int cls; String firstName; String lastName; Student(int grade, int cls, String firstName, String lastName) { this.grade = grade; this.cls = cls; this.firstName = firstName; this.lastName = lastName; } @Override public int hashCode() { int hash=0; int B=31; hash=hash*B+((Integer)grade).hashCode(); hash=hash*B+((Integer)cls).hashCode(); hash=hash*B+firstName.toLowerCase().hashCode(); hash=hash*B+ lastName.toLowerCase().hashCode(); return hash; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Student student = (Student) o; return grade == student.grade && cls == student.cls && Objects.equals(firstName, student.firstName) && Objects.equals(lastName, student.lastName); } }

小结

对于hash函数的设计我们可以遵守一下原则：

一致性：对于同样一个键值key算出来的hash值必须一致高效性：计算高效且简便均匀性哈希必须均与分布

hash的冲突处理

链地址法

关于java中的hashMap

本质就是一个treemap数组，treeset也是hashset数组，在java 8以前数组的每一个位置都为链表，而java 8之当冲突达到一个程度之后就会转成一颗红黑树

hash链地址法具体剖析

假设我们有M个地址，N个元素，当我们用数组内部元素为链表时，复杂度为O(N/M),而平衡树则是O(log(N/M)),由该复杂度我们可以看出，若需要平均复杂度达到O(1)级别，必须合理增加M的大小，这时候我们就需要在存放元素的时候考虑缩容和扩容。具体的实现如下完整代码所示，扩容操作复杂度虽然为O(n),但是经过均摊复杂度分析之后只需要O(1)级别的复杂度即可

package com.study.hashTableDemo; import java.util.TreeMap; public class HashTable<K, V> { //treepmap底层使用红黑树实现的 private TreeMap<K, V>[] hashtable; //扩容后的容量参考 private final int[] capacity = {53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741}; // 平均每个map中元素个数上限 private static final int upperTol = 10; // 平均每个map中元素个数下限 private static final int lowerTol = 2; // 平均每个map中元素个数初始化的索引 private int capacityIndex = 0; private int size; //计算hash值所需要的hash码 private int M; public HashTable() { this.M = capacity[capacityIndex]; hashtable = new TreeMap[capacity[0]]; for (int i = 0; i < M; i++) { hashtable[i] = new TreeMap<>(); } } private int hash(K key) { //整型为4个字节这样做是去掉高位的符号 return (key.hashCode() & 0x7fffffff) % M; } public int getSize() { return this.size; } /** * 添加节点 * * @param key * @param value */ public void add(K key, V value) { TreeMap map = hashtable[hash(key)]; if (map.containsKey(key)) { map.put(key, value); } else { map.put(key, value); size++; } //由公式N/M =>(size)>=upperTol 得size >= upperTol,即每个位置挂的元素超过上线后就需要进行扩容 if (size >= upperTol * M && capacityIndex + 1 < capacity.length) resize(capacity[++capacityIndex]); } public V remove(K key) { V ret = null; TreeMap<K, V> map = hashtable[hash(key)]; if (map.containsKey(key)) { ret = map.remove(key); size--; } //由公式N/M =>(size)<lowerTol 得size < upperTol,即每个位置挂的元素超过上线后就需要进行缩容 if (size < lowerTol * M && capacityIndex - 1 >= 0) resize(capacity[--capacityIndex]); return ret; } private void resize(int newM) { TreeMap<K, V>[] newHashTable = new TreeMap[newM]; for (int i = 0; i < newM; i++) { newHashTable[i] = new TreeMap<K, V>(); } //记录旧的M进行遍历 int oldM = M; //修改为新的m进行新的hash this.M = newM; for (int i = 0; i < oldM; i++) { TreeMap<K, V> map = hashtable[i]; for (K key : map.keySet()) { newHashTable[hash(key)].put(key, map.get(key)); } } this.hashtable = newHashTable; } public void set(K key, V value) { TreeMap<K, V> map = hashtable[hash(key)]; if (!map.containsKey(key)) throw new IllegalArgumentException(" value does't exsis!"); map.put(key, value); } public boolean contains(K key) { return hashtable[hash(key)].containsKey(key); } public V get(K key) { return hashtable[hash(key)].get(key); } }

最新回复(0)