C語言資料結構與演算法 - 散列表

概述

散列表是一種資料結構，其插入和搜尋操作速度非常快，與散列表的大小無關。其時間複雜度接近常數或O(1)。散列表使用陣列作為儲存介質，並使用雜湊技術生成元素插入或查詢的索引。

雜湊

雜湊是一種將一系列鍵值轉換為陣列索引範圍的技術。我們將使用模運算子來獲取一系列鍵值。考慮一個大小為20的散列表示例，以及要儲存的以下專案。專案採用(鍵，值)格式。

(1,20)
(2,70)
(42,80)
(4,25)
(12,44)
(14,32)
(17,11)
(13,78)
(37,98)

序號	鍵	雜湊值	陣列索引
1	1	1 % 20 = 1	1
2	2	2 % 20 = 2	2
3	42	42 % 20 = 2	2
4	4	4 % 20 = 4	4
5	12	12 % 20 = 12	12
6	14	14 % 20 = 14	14
7	17	17 % 20 = 17	17
8	13	13 % 20 = 13	13
9	37	37 % 20 = 17	17

線性探測

我們可以看到，所使用的雜湊技術可能會建立已經使用的陣列索引。在這種情況下，我們可以透過查詢下一個空單元格來搜尋陣列中的下一個空位置，直到找到一個空單元格。此技術稱為線性探測。

序號	鍵	雜湊值	陣列索引	線性探測後，陣列索引
1	1	1 % 20 = 1	1	1
2	2	2 % 20 = 2	2	2
3	42	42 % 20 = 2	2	3
4	4	4 % 20 = 4	4	4
5	12	12 % 20 = 12	12	12
6	14	14 % 20 = 14	14	14
7	17	17 % 20 = 17	17	17
8	13	13 % 20 = 13	13	13
9	37	37 % 20 = 17	17	18

基本操作

以下是散列表的基本主要操作。

搜尋 - 在散列表中搜索元素。
插入 - 將元素插入到散列表中。
刪除 - 從散列表中刪除元素。

資料項

定義一個數據項，其中包含一些資料和鍵，根據該鍵在散列表中進行搜尋。

struct DataItem {
   int data;   
   int key;
};

雜湊方法

定義一個雜湊方法來計算資料項鍵的雜湊碼。

int hashCode(int key){
   return key % SIZE;
}

搜尋操作

每當要搜尋元素時，計算傳遞的鍵的雜湊碼，並使用該雜湊碼作為陣列中的索引來定位元素。如果在計算出的雜湊碼處找不到元素，則使用線性探測來獲取前面的元素。

struct DataItem *search(int key){               
   //get the hash 
   int hashIndex = hashCode(key);        
   
   //move in array until an empty 
   while(hashArray[hashIndex] !=NULL){
      if(hashArray[hashIndex]->key == key)
         return hashArray[hashIndex]; 
      //go to next cell
      ++hashIndex;
      //wrap around the table
      hashIndex %= SIZE;
   }
   return NULL;        
}

插入操作

每當要插入元素時，計算傳遞的鍵的雜湊碼，並使用該雜湊碼作為陣列中的索引來定位索引。如果在計算出的雜湊碼處找到元素，則使用線性探測查詢空位置。

void insert(int key,int data){
   struct DataItem *item = (struct DataItem*) malloc(sizeof(struct DataItem));
   item->data = data;  
   item->key = key;     

   //get the hash 
   int hashIndex = hashCode(key);

   //move in array until an empty or deleted cell
   while(hashArray[hashIndex] !=NULL &&
      hashArray[hashIndex]->key != -1){
      //go to next cell
      ++hashIndex;
      //wrap around the table
      hashIndex %= SIZE;
   }
   hashArray[hashIndex] = item;        
}

刪除操作

每當要刪除元素時，計算傳遞的鍵的雜湊碼，並使用該雜湊碼作為陣列中的索引來定位索引。如果在計算出的雜湊碼處找不到元素，則使用線性探測來獲取前面的元素。找到後，在那裡儲存一個虛擬項以保持散列表的效能完整。

struct DataItem* delete(struct DataItem* item){
   int key = item->key;

   //get the hash 
   int hashIndex = hashCode(key);

   //move in array until an empty 
   while(hashArray[hashIndex] !=NULL){
      if(hashArray[hashIndex]->key == key){
         struct DataItem* temp = hashArray[hashIndex]; 
         
         //assign a dummy item at deleted position
         hashArray[hashIndex] = dummyItem; 
         return temp;
      }               
      //go to next cell
      ++hashIndex;
      //wrap around the table
      hashIndex %= SIZE;
   }
   return NULL;        
}

示例

線上演示

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>

#define SIZE 20

struct DataItem {
   int data;   
   int key;
};

struct DataItem* hashArray[SIZE]; 
struct DataItem* dummyItem;
struct DataItem* item;

int hashCode(int key){
   return key % SIZE;
}

struct DataItem *search(int key){               
   //get the hash
   int hashIndex = hashCode(key);        
   
   //move in array until an empty 
   while(hashArray[hashIndex] !=NULL){
      if(hashArray[hashIndex]->key == key)
         return hashArray[hashIndex]; 
      //go to next cell
      ++hashIndex;
      //wrap around the table
      hashIndex %= SIZE;
   }        
   return NULL;        
}

void insert(int key,int data){
   struct DataItem *item = (struct DataItem*) malloc(sizeof(struct DataItem));
   item->data = data;  
   item->key = key;     

   //get the hash 
   int hashIndex = hashCode(key);

   //move in array until an empty or deleted cell
   while(hashArray[hashIndex] !=NULL &&
      hashArray[hashIndex]->key != -1){
      //go to next cell
      ++hashIndex;
      //wrap around the table
      hashIndex %= SIZE;
   }
   hashArray[hashIndex] = item;        
}
struct DataItem* delete(struct DataItem* item){
   int key = item->key;

   //get the hash 
   int hashIndex = hashCode(key);

   //move in array until an empty 
   while(hashArray[hashIndex] !=NULL){
      if(hashArray[hashIndex]->key == key){
         struct DataItem* temp = hashArray[hashIndex]; 
         //assign a dummy item at deleted position
         hashArray[hashIndex] = dummyItem; 
         return temp;
      }
      //go to next cell
      ++hashIndex;
      //wrap around the table
      hashIndex %= SIZE;
   }
   return NULL;        
}
void display(){
   int i=0;
   for(i=0; i<SIZE; i++) {
      if(hashArray[i] != NULL)
         printf(" (%d,%d)",hashArray[i]->key,hashArray[i]->data);
      else
         printf(" ~~ ");
   }
   printf("\n");
}
int main(){
   dummyItem = (struct DataItem*) malloc(sizeof(struct DataItem));
   dummyItem->data = -1;  
   dummyItem->key = -1; 

   insert(1, 20);
   insert(2, 70);
   insert(42, 80);
   insert(4, 25);
   insert(12, 44);
   insert(14, 32);
   insert(17, 11);
   insert(13, 78);
   insert(37, 97);

   display();
   item = search(37);

   if(item != NULL){
      printf("Element found: %d\n", item->data);
   } else {
      printf("Element not found\n");
   }

   delete(item);
   item = search(37);

   if(item != NULL){
      printf("Element found: %d\n", item->data);
   } else {
      printf("Element not found\n");
   }
}

如果我們編譯並執行上面的程式，它將產生以下結果：

 ~~  (1,20)  (2,70)  (42,80)  (4,25)  ~~  ~~  ~~  ~~  ~~  ~~  ~~ (12,44)  (13,78)  (14,32)  ~~  ~~  (17,11)  (37,97)  ~~ 
Element found: 97
Element not found

列印頁面