
- 資料結構與演算法
- DSA - 首頁
- DSA - 概述
- DSA - 環境搭建
- DSA - 演算法基礎
- DSA - 漸進分析
- 資料結構
- DSA - 資料結構基礎
- DSA - 資料結構和型別
- DSA - 陣列資料結構
- 連結串列
- DSA - 連結串列資料結構
- DSA - 雙向連結串列資料結構
- DSA - 迴圈連結串列資料結構
- 棧與佇列
- DSA - 棧資料結構
- DSA - 表示式解析
- DSA - 佇列資料結構
- 搜尋演算法
- DSA - 搜尋演算法
- DSA - 線性搜尋演算法
- DSA - 二分搜尋演算法
- DSA - 插值搜尋
- DSA - 跳躍搜尋演算法
- DSA - 指數搜尋
- DSA - 斐波那契搜尋
- DSA - 子列表搜尋
- DSA - 雜湊表
- 排序演算法
- DSA - 排序演算法
- DSA - 氣泡排序演算法
- DSA - 插入排序演算法
- DSA - 選擇排序演算法
- DSA - 歸併排序演算法
- DSA - 希爾排序演算法
- DSA - 堆排序
- DSA - 桶排序演算法
- DSA - 計數排序演算法
- DSA - 基數排序演算法
- DSA - 快速排序演算法
- 圖資料結構
- DSA - 圖資料結構
- DSA - 深度優先遍歷
- DSA - 廣度優先遍歷
- DSA - 生成樹
- 樹資料結構
- DSA - 樹資料結構
- DSA - 樹的遍歷
- DSA - 二叉搜尋樹
- DSA - AVL 樹
- DSA - 紅黑樹
- DSA - B 樹
- DSA - B+ 樹
- DSA - 伸展樹
- DSA - 字典樹
- DSA - 堆資料結構
- 遞迴
- DSA - 遞迴演算法
- DSA - 使用遞迴實現漢諾塔
- DSA - 使用遞迴實現斐波那契數列
- 分治法
- DSA - 分治法
- DSA - 最大最小問題
- DSA - Strassen 矩陣乘法
- DSA - Karatsuba 演算法
- 貪心演算法
- DSA - 貪心演算法
- DSA - 旅行商問題(貪心演算法)
- DSA - Prim 最小生成樹
- DSA - Kruskal 最小生成樹
- DSA - Dijkstra 最短路徑演算法
- DSA - 地圖著色演算法
- DSA - 分數揹包問題
- DSA - 帶截止日期的作業排序
- DSA - 最優合併模式演算法
- 動態規劃
- DSA - 動態規劃
- DSA - 矩陣鏈乘法
- DSA - Floyd-Warshall 演算法
- DSA - 0-1 揹包問題
- DSA - 最長公共子序列演算法
- DSA - 旅行商問題(動態規劃)
- 近似演算法
- DSA - 近似演算法
- DSA - 頂點覆蓋演算法
- DSA - 集合覆蓋問題
- DSA - 旅行商問題(近似演算法)
- 隨機化演算法
- DSA - 隨機化演算法
- DSA - 隨機化快速排序演算法
- DSA - Karger 最小割演算法
- DSA - Fisher-Yates 洗牌演算法
- DSA 有用資源
- DSA - 問答
- DSA - 快速指南
- DSA - 有用資源
- DSA - 討論
Kasai 演算法
Kasai 演算法 用於根據給定的字尾陣列和文字構建最長公共字首 (LCP) 陣列。構建 LCP 陣列後,可以有效地在給定文字中搜索模式。我們已經討論了幾種可以有效解決模式匹配問題的演算法,包括 KMP 演算法、Boyer-Moore 演算法和 Rabin-Karp 演算法。在本教程中,我們將探討 Kasai 演算法。
Kasai 演算法的工作原理?
要理解Kasai 演算法,我們首先需要學習該演算法的兩個核心概念:
字尾陣列 - 這是一個數組,按字典順序儲存給定文字中所有後綴的起始索引。
LCP 陣列 - 顧名思義,兩個字串的最長公共字首 (簡稱 LCP) 是最長的既是兩個字串字首的字串。
Kasai 演算法基於以下觀察結果:
如果從位置i 和j 開始的兩個字尾的 LCP 為k,則從i+1 和j+1 開始的字尾的 LCP 至少為k-1,除非其中一個是字尾陣列中的最後一個字尾。這是因為在移除第一個字元後,字尾中字元的相對順序保持不變,除非它們到達文字的結尾。因此,我們可以利用此屬性在後綴陣列的線性掃描中計算 LCP 值,從第一個字尾開始,並將當前 LCP 值儲存在變數k 中。
每當我們移動到下一個字尾對時,我們將k減 1,然後只要位置i+k 和j+k 處的字元匹配,就遞增k。為了找到下一個字尾對,我們使用一個反向陣列,它將每個字尾索引對映到其在後綴陣列中的位置。
讓我們考慮 Kasai 演算法的輸入輸出場景:
Input: string: "AABAAABCEDBABCDDEBC" Output: Suffix Array: 0 1 9 3 8 2 14 10 4 11 5 15 7 12 13 6 Common Prefix Array: 1 2 3 0 4 1 2 2 0 1 1 1 1 0 1 0
示例
以下示例在不同的程式語言中實際演示了 Kasai 演算法。
#include<stdio.h> #include<string.h> #include<stdlib.h> // Defining a structure to represent suffix struct suffixes { int index; int rank[2]; }; // function to compare two suffixes int compare(const void* a, const void* b) { struct suffixes* suf1 = (struct suffixes*)a; struct suffixes* suf2 = (struct suffixes*)b; // If first rank is same if(suf1->rank[0] == suf2->rank[0]) { // Compare second rank return (suf1->rank[1] < suf2->rank[1]) ? -1 : 1; }else { return (suf1->rank[0] < suf2->rank[0]) ? -1 : 1; } } // function to build suffix array int* createSuffArray(char* orgnlString, int n) { struct suffixes suffArray[n]; for (int i = 0; i < n; i++) { suffArray[i].index = i; // Rank based on character itself suffArray[i].rank[0] = orgnlString[i] - 'a'; // Next rank is next character suffArray[i].rank[1] = ((i+1)<n)?(orgnlString[i+1]-'a'):-1; } // Sorting the suffixes qsort(suffArray, n, sizeof(struct suffixes), compare); int index[n]; for (int k = 4; k < 2*n; k = k*2) { int currRank = 0; int prevRank = suffArray[0].rank[0]; suffArray[0].rank[0] = currRank; index[suffArray[0].index] = 0; // to assign rank and index values to first suffix for (int i = 1; i < n; i++) { if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i-1].rank[1]) { prevRank = suffArray[i].rank[0]; // If same as previous rank, assign the same new rank suffArray[i].rank[0] = currRank; } else{ prevRank = suffArray[i].rank[0]; // increment rank and assign suffArray[i].rank[0] = ++currRank; } index[suffArray[i].index] = i; } for (int i = 0; i < n; i++) { int nextIndex = suffArray[i].index + k/2; suffArray[i].rank[1] = (nextIndex < n)? suffArray[index[nextIndex]].rank[0]: -1; } qsort(suffArray, n, sizeof(struct suffixes), compare); } // to store indexes of all sorted suffixes int* suffixVector = (int*)malloc(n * sizeof(int)); for (int i = 0; i < n; i++) suffixVector[i] = suffArray[i].index; return suffixVector; } // applying Kasai's algorithm to build LCP array int* kasaiAlgorithm(char* orgnlString, int* suffixVector, int n) { // To store lcp array int* longPrefix = (int*)malloc(n * sizeof(int)); // To store inverse of suffix array elements int* suffixInverse = (int*)malloc(n * sizeof(int)); // to fill values in suffixInverse[] array for (int i=0; i < n; i++) suffixInverse[suffixVector[i]] = i; int k = 0; for (int i=0; i<n; i++) { if (suffixInverse[i] == n-1) { k = 0; continue; } int j = suffixVector[suffixInverse[i]+1]; while (i+k<n && j+k<n && orgnlString[i+k]==orgnlString[j+k]) k++; longPrefix[suffixInverse[i]] = k; if (k>0) k--; } return longPrefix; } void displayArray(int* vec, int n) { for (int i = 0; i < n; i++) printf("%d ", vec[i]); printf("\n"); } int main() { char orgnlString[] = "AAABCAEAAABCBDDAAAABC"; int n = strlen(orgnlString); int* suffArray = createSuffArray(orgnlString, n); printf("Suffix Array is: \n"); displayArray(suffArray, n); // calling function to build LCP array int* prefixCommn = kasaiAlgorithm(orgnlString, suffArray, n); // Print the LCP array printf("Common Prefix Array is: \n"); displayArray(prefixCommn, n); return 0; }
#include<iostream> #include<vector> #include<algorithm> using namespace std; // Defining a structure to represent suffix struct suffixes { int index; int rank[2]; }; // function to compare two suffixes bool compare(suffixes suf1, suffixes suf2) { // If first rank is same if(suf1.rank[0] == suf2.rank[0]) { // Compare second rank if(suf1.rank[1] < suf2.rank[1]) return true; else return false; }else { if(suf1.rank[0] < suf2.rank[0]) return true; else return false; } } // function to build suffix array vector<int> createSuffArray(string orgnlString) { int n = orgnlString.size(); suffixes suffArray[n]; for (int i = 0; i < n; i++) { suffArray[i].index = i; // Rank based on character itself suffArray[i].rank[0] = orgnlString[i] - 'a'; // Next rank is next character suffArray[i].rank[1] = ((i+1)<n)?(orgnlString[i+1]-'a'):-1; } // Sorting the suffixes sort(suffArray, suffArray+n, compare); int index[n]; for (int k = 4; k < 2*n; k = k*2) { int currRank = 0; int prevRank = suffArray[0].rank[0]; suffArray[0].rank[0] = currRank; index[suffArray[0].index] = 0; // to assign rank and index values to first suffix for (int i = 1; i < n; i++) { if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i-1].rank[1]) { prevRank = suffArray[i].rank[0]; // If same as previous rank, assign the same new rank suffArray[i].rank[0] = currRank; } else{ prevRank = suffArray[i].rank[0]; // increment rank and assign suffArray[i].rank[0] = ++currRank; } index[suffArray[i].index] = i; } for (int i = 0; i < n; i++) { int nextIndex = suffArray[i].index + k/2; suffArray[i].rank[1] = (nextIndex < n)? suffArray[index[nextIndex]].rank[0]: -1; } sort(suffArray, suffArray+n, compare); } // to store indexes of all sorted suffixes vector<int>suffixVector; for (int i = 0; i < n; i++) suffixVector.push_back(suffArray[i].index); return suffixVector; } // applying Kasai's algorithm to build LCP array vector<int> kasaiAlgorithm(string orgnlString, vector<int> suffixVector) { int n = suffixVector.size(); // To store lcp array vector<int> longPrefix(n, 0); // To store inverse of suffix array elements vector<int> suffixInverse(n, 0); // to fill values in suffixInverse[] array for (int i=0; i < n; i++) suffixInverse[suffixVector[i]] = i; int k = 0; for (int i=0; i<n; i++) { if (suffixInverse[i] == n-1) { k = 0; continue; } int j = suffixVector[suffixInverse[i]+1]; while (i+k<n && j+k<n && orgnlString[i+k]==orgnlString[j+k]) k++; longPrefix[suffixInverse[i]] = k; if (k>0) k--; } return longPrefix; } void displayArray(vector<int> vec) { vector<int>::iterator it; for (it = vec.begin(); it < vec.end() ; it++) cout << *it << " "; cout << endl; } int main() { string orgnlString = "AAABCAEAAABCBDDAAAABC"; vector<int>suffArray = createSuffArray(orgnlString); int n = suffArray.size(); cout<< "Suffix Array is: "<<endl; displayArray(suffArray); // calling function to build LCP array vector<int>prefixCommn = kasaiAlgorithm(orgnlString, suffArray); // Print the LCP array cout<< "Common Prefix Array is: "<<endl; displayArray(prefixCommn); }
import java.util.Arrays; public class Main { // Defining a class to represent suffix static class suffixes { int index; int[] rank = new int[2]; } // method to compare two suffixes static int compare(suffixes suf1, suffixes suf2) { // If first rank is same if (suf1.rank[0] == suf2.rank[0]) { // Compare second rank if (suf1.rank[1] < suf2.rank[1]) return -1; else return 1; } else { if (suf1.rank[0] < suf2.rank[0]) return -1; else return 1; } } // method to build suffix array static int[] createSuffArray(String orgnlString) { int n = orgnlString.length(); suffixes[] suffArray = new suffixes[n]; for (int i = 0; i < n; i++) { suffArray[i] = new suffixes(); suffArray[i].index = i; // Rank based on character itself suffArray[i].rank[0] = orgnlString.charAt(i) - 'a'; // Next rank is next character suffArray[i].rank[1] = ((i + 1) < n) ? (orgnlString.charAt(i + 1) - 'a') : -1; } // Sorting the suffixes Arrays.sort(suffArray, Main::compare); int[] index = new int[n]; for (int k = 4; k < 2 * n; k = k * 2) { int currRank = 0; int prevRank = suffArray[0].rank[0]; suffArray[0].rank[0] = currRank; index[suffArray[0].index] = 0; // to assign rank and index values to first suffix for (int i = 1; i < n; i++) { if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i - 1].rank[1]) { prevRank = suffArray[i].rank[0]; // If same as previous rank, assign the same new rank suffArray[i].rank[0] = currRank; } else { prevRank = suffArray[i].rank[0]; // increment rank and assign suffArray[i].rank[0] = ++currRank; } index[suffArray[i].index] = i; } for (int i = 0; i < n; i++) { int nextIndex = suffArray[i].index + k / 2; suffArray[i].rank[1] = (nextIndex < n) ? suffArray[index[nextIndex]].rank[0] : -1; } Arrays.sort(suffArray, Main::compare); } // to store indexes of all sorted suffixes int[] suffixVector = new int[n]; for (int i = 0; i < n; i++) suffixVector[i] = suffArray[i].index; return suffixVector; } // applying Kasai's algorithm to build LCP array static int[] kasaiAlgorithm(String orgnlString, int[] suffixVector) { int n = suffixVector.length; // To store lcp array int[] longPrefix = new int[n]; // To store inverse of suffix array elements int[] suffixInverse = new int[n]; // to fill values in suffixInverse[] array for (int i = 0; i < n; i++) suffixInverse[suffixVector[i]] = i; int k = 0; for (int i = 0; i < n; i++) { if (suffixInverse[i] == n - 1) { k = 0; continue; } int j = suffixVector[suffixInverse[i] + 1]; while (i + k < n && j + k < n && orgnlString.charAt(i + k) == orgnlString.charAt(j + k)) k++; longPrefix[suffixInverse[i]] = k; if (k > 0) k--; } return longPrefix; } static void displayArray(int[] vec) { for (int i : vec) System.out.print(i + " "); System.out.println(); } public static void main(String[] args) { String orgnlString = "AAABCAEAAABCBDDAAAABC"; int[] suffArray = createSuffArray(orgnlString); System.out.println("Suffix Array is: "); displayArray(suffArray); // calling method to build LCP array int[] prefixCommn = kasaiAlgorithm(orgnlString, suffArray); // Print the LCP array System.out.println("Common Prefix Array is: "); displayArray(prefixCommn); } }
# Defining a class to represent suffix class Suffix: def __init__(self): self.index = 0 self.rank = [0, 0] # function to compare two suffixes def compare(a, b): if a.rank[0] == b.rank[0]: if a.rank[1] < b.rank[1]: return -1 else: return 1 else: if a.rank[0] < b.rank[0]: return -1 else: return 1 # function to build suffix array def createSuffArray(orgnlString): n = len(orgnlString) suffArray = [Suffix() for _ in range(n)] for i in range(n): suffArray[i].index = i suffArray[i].rank[0] = ord(orgnlString[i]) - ord('a') suffArray[i].rank[1] = ord(orgnlString[i + 1]) - ord('a') if ((i + 1) < n) else -1 suffArray = sorted(suffArray, key=lambda x: (x.rank[0], x.rank[1])) ind = [0]*n k = 4 while k < 2*n: rank = 0 prev_rank = suffArray[0].rank[0] suffArray[0].rank[0] = rank ind[suffArray[0].index] = 0 for i in range(1, n): if suffArray[i].rank[0] == prev_rank and suffArray[i].rank[1] == suffArray[i - 1].rank[1]: prev_rank = suffArray[i].rank[0] suffArray[i].rank[0] = rank else: prev_rank = suffArray[i].rank[0] rank += 1 suffArray[i].rank[0] = rank ind[suffArray[i].index] = i for i in range(n): nextIndex = suffArray[i].index + k//2 suffArray[i].rank[1] = suffArray[ind[nextIndex]].rank[0] if (nextIndex < n) else -1 suffArray = sorted(suffArray, key=lambda x: (x.rank[0], x.rank[1])) k *= 2 suffixVector = [0]*n for i in range(n): suffixVector[i] = suffArray[i].index return suffixVector # applying Kasai's algorithm to build LCP array def kasaiAlgorithm(orgnlString, suffixVector): n = len(suffixVector) longPrefix = [0]*n suffixInverse = [0]*n for i in range(n): suffixInverse[suffixVector[i]] = i k = 0 for i in range(n): if suffixInverse[i] == n - 1: k = 0 continue j = suffixVector[suffixInverse[i] + 1] while i + k < n and j + k < n and orgnlString[i + k] == orgnlString[j + k]: k += 1 longPrefix[suffixInverse[i]] = k if k > 0: k -= 1 return longPrefix # Function to print an array def displayArray(vec): for i in vec: print(i, end=' ') print() def main(): orgnlString = "AAABCAEAAABCBDDAAAABC" suffArray = createSuffArray(orgnlString) print("Suffix Array is: ") displayArray(suffArray) prefixCommn = kasaiAlgorithm(orgnlString, suffArray) print("Common Prefix Array is: ") displayArray(prefixCommn) if __name__ == "__main__": main()
輸出
Suffix Array is: 15 0 7 16 17 1 8 2 9 18 5 19 3 10 12 4 11 20 14 13 6 Common Prefix Array is: 3 5 5 2 4 4 4 3 3 3 0 2 2 2 0 1 1 1 1 0 0
廣告