- 資料結構與演算法
- DSA - 首頁
- DSA - 概述
- DSA - 環境搭建
- DSA - 演算法基礎
- DSA - 漸進分析
- 資料結構
- DSA - 資料結構基礎
- DSA - 資料結構和型別
- DSA - 陣列資料結構
- 連結串列
- DSA - 連結串列資料結構
- DSA - 雙向連結串列資料結構
- DSA - 迴圈連結串列資料結構
- 棧與佇列
- DSA - 棧資料結構
- DSA - 表示式解析
- DSA - 佇列資料結構
- 搜尋演算法
- DSA - 搜尋演算法
- DSA - 線性搜尋演算法
- DSA - 二分搜尋演算法
- DSA - 插值搜尋
- DSA - 跳躍搜尋演算法
- DSA - 指數搜尋
- DSA - 斐波那契搜尋
- DSA - 子列表搜尋
- DSA - 雜湊表
- 排序演算法
- DSA - 排序演算法
- DSA - 氣泡排序演算法
- DSA - 插入排序演算法
- DSA - 選擇排序演算法
- DSA - 歸併排序演算法
- DSA - 希爾排序演算法
- DSA - 堆排序
- DSA - 桶排序演算法
- DSA - 計數排序演算法
- DSA - 基數排序演算法
- DSA - 快速排序演算法
- 圖資料結構
- DSA - 圖資料結構
- DSA - 深度優先遍歷
- DSA - 廣度優先遍歷
- DSA - 生成樹
- 樹資料結構
- DSA - 樹資料結構
- DSA - 樹的遍歷
- DSA - 二叉搜尋樹
- DSA - AVL 樹
- DSA - 紅黑樹
- DSA - B 樹
- DSA - B+ 樹
- DSA - 伸展樹
- DSA - 字典樹
- DSA - 堆資料結構
- 遞迴
- DSA - 遞迴演算法
- DSA - 使用遞迴實現漢諾塔
- DSA - 使用遞迴實現斐波那契數列
- 分治法
- DSA - 分治法
- DSA - 最大最小問題
- DSA - Strassen 矩陣乘法
- DSA - Karatsuba 演算法
- 貪心演算法
- DSA - 貪心演算法
- DSA - 旅行商問題(貪心演算法)
- DSA - Prim 最小生成樹
- DSA - Kruskal 最小生成樹
- DSA - Dijkstra 最短路徑演算法
- DSA - 地圖著色演算法
- DSA - 分數揹包問題
- DSA - 帶截止日期的作業排序
- DSA - 最優合併模式演算法
- 動態規劃
- DSA - 動態規劃
- DSA - 矩陣鏈乘法
- DSA - Floyd-Warshall 演算法
- DSA - 0-1 揹包問題
- DSA - 最長公共子序列演算法
- DSA - 旅行商問題(動態規劃)
- 近似演算法
- DSA - 近似演算法
- DSA - 頂點覆蓋演算法
- DSA - 集合覆蓋問題
- DSA - 旅行商問題(近似演算法)
- 隨機化演算法
- DSA - 隨機化演算法
- DSA - 隨機化快速排序演算法
- DSA - Karger 最小割演算法
- DSA - Fisher-Yates 洗牌演算法
- DSA 有用資源
- DSA - 問答
- DSA - 快速指南
- DSA - 有用資源
- DSA - 討論
Kasai 演算法
Kasai 演算法 用於根據給定的字尾陣列和文字構建最長公共字首 (LCP) 陣列。構建 LCP 陣列後,可以有效地在給定文字中搜索模式。我們已經討論了幾種可以有效解決模式匹配問題的演算法,包括 KMP 演算法、Boyer-Moore 演算法和 Rabin-Karp 演算法。在本教程中,我們將探討 Kasai 演算法。
Kasai 演算法的工作原理?
要理解Kasai 演算法,我們首先需要學習該演算法的兩個核心概念:
字尾陣列 - 這是一個數組,按字典順序儲存給定文字中所有後綴的起始索引。
LCP 陣列 - 顧名思義,兩個字串的最長公共字首 (簡稱 LCP) 是最長的既是兩個字串字首的字串。
Kasai 演算法基於以下觀察結果:
如果從位置i 和j 開始的兩個字尾的 LCP 為k,則從i+1 和j+1 開始的字尾的 LCP 至少為k-1,除非其中一個是字尾陣列中的最後一個字尾。這是因為在移除第一個字元後,字尾中字元的相對順序保持不變,除非它們到達文字的結尾。因此,我們可以利用此屬性在後綴陣列的線性掃描中計算 LCP 值,從第一個字尾開始,並將當前 LCP 值儲存在變數k 中。
每當我們移動到下一個字尾對時,我們將k減 1,然後只要位置i+k 和j+k 處的字元匹配,就遞增k。為了找到下一個字尾對,我們使用一個反向陣列,它將每個字尾索引對映到其在後綴陣列中的位置。
讓我們考慮 Kasai 演算法的輸入輸出場景:
Input: string: "AABAAABCEDBABCDDEBC" Output: Suffix Array: 0 1 9 3 8 2 14 10 4 11 5 15 7 12 13 6 Common Prefix Array: 1 2 3 0 4 1 2 2 0 1 1 1 1 0 1 0
示例
以下示例在不同的程式語言中實際演示了 Kasai 演算法。
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
// Defining a structure to represent suffix
struct suffixes {
int index;
int rank[2];
};
// function to compare two suffixes
int compare(const void* a, const void* b) {
struct suffixes* suf1 = (struct suffixes*)a;
struct suffixes* suf2 = (struct suffixes*)b;
// If first rank is same
if(suf1->rank[0] == suf2->rank[0]) {
// Compare second rank
return (suf1->rank[1] < suf2->rank[1]) ? -1 : 1;
}else {
return (suf1->rank[0] < suf2->rank[0]) ? -1 : 1;
}
}
// function to build suffix array
int* createSuffArray(char* orgnlString, int n) {
struct suffixes suffArray[n];
for (int i = 0; i < n; i++) {
suffArray[i].index = i;
// Rank based on character itself
suffArray[i].rank[0] = orgnlString[i] - 'a';
// Next rank is next character
suffArray[i].rank[1] = ((i+1)<n)?(orgnlString[i+1]-'a'):-1;
}
// Sorting the suffixes
qsort(suffArray, n, sizeof(struct suffixes), compare);
int index[n];
for (int k = 4; k < 2*n; k = k*2) {
int currRank = 0;
int prevRank = suffArray[0].rank[0];
suffArray[0].rank[0] = currRank;
index[suffArray[0].index] = 0;
// to assign rank and index values to first suffix
for (int i = 1; i < n; i++) {
if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i-1].rank[1]) {
prevRank = suffArray[i].rank[0];
// If same as previous rank, assign the same new rank
suffArray[i].rank[0] = currRank;
} else{
prevRank = suffArray[i].rank[0];
// increment rank and assign
suffArray[i].rank[0] = ++currRank;
}
index[suffArray[i].index] = i;
}
for (int i = 0; i < n; i++) {
int nextIndex = suffArray[i].index + k/2;
suffArray[i].rank[1] = (nextIndex < n)? suffArray[index[nextIndex]].rank[0]: -1;
}
qsort(suffArray, n, sizeof(struct suffixes), compare);
}
// to store indexes of all sorted suffixes
int* suffixVector = (int*)malloc(n * sizeof(int));
for (int i = 0; i < n; i++)
suffixVector[i] = suffArray[i].index;
return suffixVector;
}
// applying Kasai's algorithm to build LCP array
int* kasaiAlgorithm(char* orgnlString, int* suffixVector, int n) {
// To store lcp array
int* longPrefix = (int*)malloc(n * sizeof(int));
// To store inverse of suffix array elements
int* suffixInverse = (int*)malloc(n * sizeof(int));
// to fill values in suffixInverse[] array
for (int i=0; i < n; i++)
suffixInverse[suffixVector[i]] = i;
int k = 0;
for (int i=0; i<n; i++) {
if (suffixInverse[i] == n-1) {
k = 0;
continue;
}
int j = suffixVector[suffixInverse[i]+1];
while (i+k<n && j+k<n && orgnlString[i+k]==orgnlString[j+k])
k++;
longPrefix[suffixInverse[i]] = k;
if (k>0)
k--;
}
return longPrefix;
}
void displayArray(int* vec, int n) {
for (int i = 0; i < n; i++)
printf("%d ", vec[i]);
printf("\n");
}
int main() {
char orgnlString[] = "AAABCAEAAABCBDDAAAABC";
int n = strlen(orgnlString);
int* suffArray = createSuffArray(orgnlString, n);
printf("Suffix Array is: \n");
displayArray(suffArray, n);
// calling function to build LCP array
int* prefixCommn = kasaiAlgorithm(orgnlString, suffArray, n);
// Print the LCP array
printf("Common Prefix Array is: \n");
displayArray(prefixCommn, n);
return 0;
}
#include<iostream>
#include<vector>
#include<algorithm>
using namespace std;
// Defining a structure to represent suffix
struct suffixes {
int index;
int rank[2];
};
// function to compare two suffixes
bool compare(suffixes suf1, suffixes suf2) {
// If first rank is same
if(suf1.rank[0] == suf2.rank[0]) {
// Compare second rank
if(suf1.rank[1] < suf2.rank[1])
return true;
else
return false;
}else {
if(suf1.rank[0] < suf2.rank[0])
return true;
else
return false;
}
}
// function to build suffix array
vector<int> createSuffArray(string orgnlString) {
int n = orgnlString.size();
suffixes suffArray[n];
for (int i = 0; i < n; i++) {
suffArray[i].index = i;
// Rank based on character itself
suffArray[i].rank[0] = orgnlString[i] - 'a';
// Next rank is next character
suffArray[i].rank[1] = ((i+1)<n)?(orgnlString[i+1]-'a'):-1;
}
// Sorting the suffixes
sort(suffArray, suffArray+n, compare);
int index[n];
for (int k = 4; k < 2*n; k = k*2) {
int currRank = 0;
int prevRank = suffArray[0].rank[0];
suffArray[0].rank[0] = currRank;
index[suffArray[0].index] = 0;
// to assign rank and index values to first suffix
for (int i = 1; i < n; i++) {
if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i-1].rank[1]) {
prevRank = suffArray[i].rank[0];
// If same as previous rank, assign the same new rank
suffArray[i].rank[0] = currRank;
} else{
prevRank = suffArray[i].rank[0];
// increment rank and assign
suffArray[i].rank[0] = ++currRank;
}
index[suffArray[i].index] = i;
}
for (int i = 0; i < n; i++) {
int nextIndex = suffArray[i].index + k/2;
suffArray[i].rank[1] = (nextIndex < n)? suffArray[index[nextIndex]].rank[0]: -1;
}
sort(suffArray, suffArray+n, compare);
}
// to store indexes of all sorted suffixes
vector<int>suffixVector;
for (int i = 0; i < n; i++)
suffixVector.push_back(suffArray[i].index);
return suffixVector;
}
// applying Kasai's algorithm to build LCP array
vector<int> kasaiAlgorithm(string orgnlString, vector<int> suffixVector) {
int n = suffixVector.size();
// To store lcp array
vector<int> longPrefix(n, 0);
// To store inverse of suffix array elements
vector<int> suffixInverse(n, 0);
// to fill values in suffixInverse[] array
for (int i=0; i < n; i++)
suffixInverse[suffixVector[i]] = i;
int k = 0;
for (int i=0; i<n; i++) {
if (suffixInverse[i] == n-1) {
k = 0;
continue;
}
int j = suffixVector[suffixInverse[i]+1];
while (i+k<n && j+k<n && orgnlString[i+k]==orgnlString[j+k])
k++;
longPrefix[suffixInverse[i]] = k;
if (k>0)
k--;
}
return longPrefix;
}
void displayArray(vector<int> vec) {
vector<int>::iterator it;
for (it = vec.begin(); it < vec.end() ; it++)
cout << *it << " ";
cout << endl;
}
int main() {
string orgnlString = "AAABCAEAAABCBDDAAAABC";
vector<int>suffArray = createSuffArray(orgnlString);
int n = suffArray.size();
cout<< "Suffix Array is: "<<endl;
displayArray(suffArray);
// calling function to build LCP array
vector<int>prefixCommn = kasaiAlgorithm(orgnlString, suffArray);
// Print the LCP array
cout<< "Common Prefix Array is: "<<endl;
displayArray(prefixCommn);
}
import java.util.Arrays;
public class Main {
// Defining a class to represent suffix
static class suffixes {
int index;
int[] rank = new int[2];
}
// method to compare two suffixes
static int compare(suffixes suf1, suffixes suf2) {
// If first rank is same
if (suf1.rank[0] == suf2.rank[0]) {
// Compare second rank
if (suf1.rank[1] < suf2.rank[1])
return -1;
else
return 1;
} else {
if (suf1.rank[0] < suf2.rank[0])
return -1;
else
return 1;
}
}
// method to build suffix array
static int[] createSuffArray(String orgnlString) {
int n = orgnlString.length();
suffixes[] suffArray = new suffixes[n];
for (int i = 0; i < n; i++) {
suffArray[i] = new suffixes();
suffArray[i].index = i;
// Rank based on character itself
suffArray[i].rank[0] = orgnlString.charAt(i) - 'a';
// Next rank is next character
suffArray[i].rank[1] = ((i + 1) < n) ? (orgnlString.charAt(i + 1) - 'a') : -1;
}
// Sorting the suffixes
Arrays.sort(suffArray, Main::compare);
int[] index = new int[n];
for (int k = 4; k < 2 * n; k = k * 2) {
int currRank = 0;
int prevRank = suffArray[0].rank[0];
suffArray[0].rank[0] = currRank;
index[suffArray[0].index] = 0;
// to assign rank and index values to first suffix
for (int i = 1; i < n; i++) {
if (suffArray[i].rank[0] == prevRank && suffArray[i].rank[1] == suffArray[i - 1].rank[1]) {
prevRank = suffArray[i].rank[0];
// If same as previous rank, assign the same new rank
suffArray[i].rank[0] = currRank;
} else {
prevRank = suffArray[i].rank[0];
// increment rank and assign
suffArray[i].rank[0] = ++currRank;
}
index[suffArray[i].index] = i;
}
for (int i = 0; i < n; i++) {
int nextIndex = suffArray[i].index + k / 2;
suffArray[i].rank[1] = (nextIndex < n) ? suffArray[index[nextIndex]].rank[0] : -1;
}
Arrays.sort(suffArray, Main::compare);
}
// to store indexes of all sorted suffixes
int[] suffixVector = new int[n];
for (int i = 0; i < n; i++)
suffixVector[i] = suffArray[i].index;
return suffixVector;
}
// applying Kasai's algorithm to build LCP array
static int[] kasaiAlgorithm(String orgnlString, int[] suffixVector) {
int n = suffixVector.length;
// To store lcp array
int[] longPrefix = new int[n];
// To store inverse of suffix array elements
int[] suffixInverse = new int[n];
// to fill values in suffixInverse[] array
for (int i = 0; i < n; i++)
suffixInverse[suffixVector[i]] = i;
int k = 0;
for (int i = 0; i < n; i++) {
if (suffixInverse[i] == n - 1) {
k = 0;
continue;
}
int j = suffixVector[suffixInverse[i] + 1];
while (i + k < n && j + k < n && orgnlString.charAt(i + k) == orgnlString.charAt(j + k))
k++;
longPrefix[suffixInverse[i]] = k;
if (k > 0)
k--;
}
return longPrefix;
}
static void displayArray(int[] vec) {
for (int i : vec)
System.out.print(i + " ");
System.out.println();
}
public static void main(String[] args) {
String orgnlString = "AAABCAEAAABCBDDAAAABC";
int[] suffArray = createSuffArray(orgnlString);
System.out.println("Suffix Array is: ");
displayArray(suffArray);
// calling method to build LCP array
int[] prefixCommn = kasaiAlgorithm(orgnlString, suffArray);
// Print the LCP array
System.out.println("Common Prefix Array is: ");
displayArray(prefixCommn);
}
}
# Defining a class to represent suffix
class Suffix:
def __init__(self):
self.index = 0
self.rank = [0, 0]
# function to compare two suffixes
def compare(a, b):
if a.rank[0] == b.rank[0]:
if a.rank[1] < b.rank[1]:
return -1
else:
return 1
else:
if a.rank[0] < b.rank[0]:
return -1
else:
return 1
# function to build suffix array
def createSuffArray(orgnlString):
n = len(orgnlString)
suffArray = [Suffix() for _ in range(n)]
for i in range(n):
suffArray[i].index = i
suffArray[i].rank[0] = ord(orgnlString[i]) - ord('a')
suffArray[i].rank[1] = ord(orgnlString[i + 1]) - ord('a') if ((i + 1) < n) else -1
suffArray = sorted(suffArray, key=lambda x: (x.rank[0], x.rank[1]))
ind = [0]*n
k = 4
while k < 2*n:
rank = 0
prev_rank = suffArray[0].rank[0]
suffArray[0].rank[0] = rank
ind[suffArray[0].index] = 0
for i in range(1, n):
if suffArray[i].rank[0] == prev_rank and suffArray[i].rank[1] == suffArray[i - 1].rank[1]:
prev_rank = suffArray[i].rank[0]
suffArray[i].rank[0] = rank
else:
prev_rank = suffArray[i].rank[0]
rank += 1
suffArray[i].rank[0] = rank
ind[suffArray[i].index] = i
for i in range(n):
nextIndex = suffArray[i].index + k//2
suffArray[i].rank[1] = suffArray[ind[nextIndex]].rank[0] if (nextIndex < n) else -1
suffArray = sorted(suffArray, key=lambda x: (x.rank[0], x.rank[1]))
k *= 2
suffixVector = [0]*n
for i in range(n):
suffixVector[i] = suffArray[i].index
return suffixVector
# applying Kasai's algorithm to build LCP array
def kasaiAlgorithm(orgnlString, suffixVector):
n = len(suffixVector)
longPrefix = [0]*n
suffixInverse = [0]*n
for i in range(n):
suffixInverse[suffixVector[i]] = i
k = 0
for i in range(n):
if suffixInverse[i] == n - 1:
k = 0
continue
j = suffixVector[suffixInverse[i] + 1]
while i + k < n and j + k < n and orgnlString[i + k] == orgnlString[j + k]:
k += 1
longPrefix[suffixInverse[i]] = k
if k > 0:
k -= 1
return longPrefix
# Function to print an array
def displayArray(vec):
for i in vec:
print(i, end=' ')
print()
def main():
orgnlString = "AAABCAEAAABCBDDAAAABC"
suffArray = createSuffArray(orgnlString)
print("Suffix Array is: ")
displayArray(suffArray)
prefixCommn = kasaiAlgorithm(orgnlString, suffArray)
print("Common Prefix Array is: ")
displayArray(prefixCommn)
if __name__ == "__main__":
main()
輸出
Suffix Array is: 15 0 7 16 17 1 8 2 9 18 5 19 3 10 12 4 11 20 14 13 6 Common Prefix Array is: 3 5 5 2 4 4 4 3 3 3 0 2 2 2 0 1 1 1 1 0 0
廣告