Hashtables and Hashmaps

Blooverh · Blooverh · commit 49f326d83a91 · 2023-07-29T21:18:23.000-05:00
diff --git a/LeetCode/ArraysAndHashing/ValidSudoku.py b/LeetCode/ArraysAndHashing/ValidSudoku.py
@@ -37,6 +37,7 @@ def isValidSudoku(self, board: List[List[str]]) -> bool:
 - After processing all the cells, the method checks if the length of res is equal to the 
     length of the set of res
 
+    !Note: SETS CANNOT HAVE DUPLICATE ELEMENTS HENCE WHY WE CHECK LEN(RES) AND LEN(SET(RES))
     Because different length on res and set(res) means that there are the same tuples in res.
     Moreover, we need to notice that tuples representing different groups are never equal 
     (since tuple for row is Tuple[int, str] type, tuple for column is Tuple[str, int] 
diff --git a/Maps_HashTables_SkipLists/ChainHashMap.py b/Maps_HashTables_SkipLists/ChainHashMap.py
@@ -0,0 +1,36 @@
+import HashMapBase
+import UnsortedTableMap
+class ChainHashMap(HashMapBase):
+    """Hash map implementation using Separate Chaining Collision handling"""
+
+    def _bucket_getitem(self, j, k):
+        bucket= self._table[j] #bucket is a list at position j 
+        if bucket is None: #if bucket does not exist 
+            raise KeyError("Key Error" + repr(k)) #no match found because bucket does not exist 
+        
+    def _bucket_setitem(self, j, k, v):
+        if self._table[j] is None: 
+            self._table[j] = UnsortedTableMap() # creates a new bucket (unsorted table) to the table 
+        
+        oldsize = len(self._table[j])
+
+        self._table[j][k] = v #assign value to the key k on bucket j
+
+        if len(self._table[j]) > oldsize: # if bucket at j is > then oldsize new element was inserted so increase n
+            self._n +=1
+    
+    def _bucket_delitem(self, j, k):
+        bucket = self._table[j]
+
+        if bucket is None: 
+            """if bucket does not exist raise error"""
+            raise KeyError("Key error" + repr(k))
+        
+        del bucket[k] # delete item with key k if key k exists in bucket j 
+    
+    def __iter__(self):
+        """Iterator to retrieve keys in hashmap without deleting them"""
+        for bucket in self._table:
+            if bucket is not None:
+                for key in bucket:
+                    yield key 
diff --git a/Maps_HashTables_SkipLists/HashMapBase.py b/Maps_HashTables_SkipLists/HashMapBase.py
@@ -0,0 +1,53 @@
+from random import randrange
+import MapBase
+
+class HashMapBase(MapBase):
+    """ADT class for map using Hash-Tables with MAD compression"""
+
+    def __init__(self, cap=11, p=109345121):
+        """Create an empty hashtable map
+        cap - capacity 
+        p - positive prime numbver used for MAD by default """
+
+        self._table=cap * [None] # creates an empty list containing 11 entries of value None
+        self._n=0 # no items present in the list; number of entries is 0 by default
+        self._prime= p
+        self._scale = 1 + randrange(p-1) #scale from 1 to p-1 for MAD picks a random number
+        self._shift = randrange(p) #shift from 0 to p-1 for MAD
+
+    def _hash_function(self, k): 
+        """Performs Python's built in calaulation for creating the hashcode of a key 
+        hash_function(k) """
+
+        return (hash(k) * self._scale + self._shift) % self._prime % len(self._table)
+    
+    def __length__(self):
+        return self._n #returns the number of distinct items present in the table at the time of method call. 
+    
+    def __getitem__(self, k):
+        j = self._hash_function(k) # j holds hash code of the key k 
+
+        return self,self._bucket_getitem(j, k) # may raise key error if hashcode does not exist for a key k
+    
+    def __Setitem__(self, k,v):
+        j = self._hash_function(k)
+        self._bucket_setitem(j,k,v) #sub routine maintains self._n
+        # increase n if item is added to hashtable 
+        if len(self._table) // 2 < self._n: # keep load factor of hash table under 0.5, if surpasses 
+            self._resize(2 * len(self._table) -1 ) #resise table by creating a new table double the size
+            # 2 * x - 1 is often a prime number
+    def __delitem__(self, k):
+        j= self._hash_function(k) 
+        self._bucket_delitem(j, k) #may raise key error if hashcode does not match to the key k
+        self._n -=1 # decrease number of items
+    
+    def _resise(self, c):
+        """if load factor passes 0.5 create a new table with capacity c size as long as that capacity c 
+        will have the load factor under 0.5 and copy all items to the new table
+        This is done to keep the load factor under or equal to 0.5 for better collision handling."""
+
+        old=list(self.items()) # use iteration to record existing items 
+        self._table = c * [None] # reset table to desirable capacity 
+        self._n = 0 # n recomputed during subsequent adds
+        for (k,v) in old:
+            self[k] = v #reinsert old key-value pair into resized table
diff --git a/Maps_HashTables_SkipLists/ProbeHashMapping.py b/Maps_HashTables_SkipLists/ProbeHashMapping.py
@@ -0,0 +1,68 @@
+"""
+Open addressing with linear probing. 
+In order to support deletions, we place a special marker in a table location at 
+which an item has been deleted, so that we can distinguish between it and a location 
+that has always been empty
+
+OPPEN ADDRESSING CHALLENGE:
+    - properly trace the series of probes when collisions occur during an insertion or
+    search for an item."""
+import HashMapBase
+class ProbeHashMap(HashMapBase):
+    """HashMap implemented with linear probing for collision resolutions"""
+
+    _AVAIL = object() #sentinel marks location of previous deletion
+
+    def _is_available(self, j):
+        """Return true if index j is available in the table"""
+        return self._table[j] is None or self._table[j] is ProbeHashMap._AVAIL
+    
+    def _find_slot(self, j ,k):
+        """search for key k in bucket at index j
+        
+        Return (success, index) tuple, described as follow:
+        If match was found, success is True and index denotes its location 
+        If no match found, success is False and index denotes first available slots """
+
+        firstAvail= None
+
+        while True:
+            if firstAvail is None:
+                firstAvail = j #mark this as first available 
+            
+            if self._table[j] is None:
+                return (False, firstAvail) #search has failed 
+            elif k == self._table[j]._key:
+                return (True , j) #found a match 
+            
+            j = (j+1) % len(self._table) #keep looking cyclically
+
+    def _bucket_getitem(self, j ,k):
+        found, s = self._find_slot(j, k) #find slot at bucket j the key k to retrieve the Item
+
+        if not found:
+            raise KeyError("Key Error" + repr(k)) #no match found
+        
+        return self._table[s]._value #return the element at position s of the hashtable 
+
+    def _bucket_setitem(self, j, k, v):
+        found, s =  self._find_slot(j,k) #find at bucket j the key k 
+
+        if not found:
+            self._table[s] = self._Item(k,v) #insert new item
+            self._n +=1 # increase size 
+        else:
+            self._table[s]._value= v #overwrite existing
+
+    def _bucket_delitem(self, j , k):
+        found, s = self._find_slot(j,k) # find slot in j with key k to delete 
+
+        if not found:
+            raise KeyError("Key error" + repr(k))
+
+        self._table[s] = ProbeHashMap._AVAIL #mark slot as vacated after deletion
+
+    def __iter__(self):
+        for j in range(len(self._table)): # Scan the entire table
+            if not self._is_available(j):
+                yield self._table[j]._key # yields every key in hashtable when iterating