Knuth-Morris-Pratt 演算法



用於模式匹配的 KMP 演算法

KMP 演算法用於解決模式匹配問題,模式匹配問題是在文字中查詢給定模式的所有出現位置的任務。在查詢多個模式時非常有用。例如,如果文字是“aabbaaccaabbaadde”,模式是“aabaa”,則模式在文字中出現了兩次,分別位於索引 0 和 8 處。

此問題的樸素解決方案是從最左邊的位置開始,向右移動,將模式與文字的每個可能的子字串進行比較。這需要 O(n*m) 時間,其中 'n' 是文字的長度,'m' 是模式的長度。

當我們處理長文字文件時,蠻力法和樸素方法可能會導致冗餘比較。為了避免這種冗餘,Knuth、Morris 和 Pratt 開發了一種線性序列匹配演算法,稱為 KMP 模式匹配演算法。它也稱為 Knuth Morris Pratt 模式匹配演算法。

KMP 演算法如何工作?

KMP 演算法從左到右開始搜尋操作。它使用字首函式來避免在搜尋模式時進行不必要的比較。此函式儲存迄今為止匹配的字元數,稱為LPS 值。KMP 演算法涉及以下步驟:

  • 定義字首函式。

  • 滑動模式以進行比較。

  • 如果所有字元都匹配,則表示已找到匹配項。

  • 如果沒有匹配,則使用字首函式跳過不必要的比較。如果與不匹配字元之前的字元的 LPS 值為“0”,則從模式的索引 0 開始與文字中的下一個字元進行比較。但是,如果 LPS 值大於“0”,則從等於之前不匹配字元的 LPS 值的索引值開始比較。

KMP Solution

KMP 演算法需要 O(n + m) 時間和 O(m) 空間。它比樸素解決方案更快,因為它跳過了冗餘比較,並且每個文字字元最多隻比較一次。

讓我們透過一個示例瞭解模式匹配問題的輸入輸出場景:

Input:
main String: "AAAABCAAAABCBAAAABC" 
pattern: "AAABC"
Output:
Pattern found at position: 1
Pattern found at position: 7
Pattern found at position: 14

示例

以下示例從實踐上說明了用於模式匹配的 KMP 演算法。

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
// function to find prefix
void prefixSearch(char* pat, int m, int* pps) {
   int length = 0; 
   // array to store prefix
   pps[0] = 0;      
   int i = 1; 
   while(i < m) { 
      // to check if the current character matches the previous character
      if(pat[i] == pat[length]) { 
          // increment the length
         length++; 
         // store the length in the prefix array  
         pps[i] = length;  
      }else { 
         if(length != 0) { 
            // to update length of previous prefix length
            length = pps[length - 1]; 
            i--; 
         } else 
            // if the length is 0, store 0 in the prefix array
            pps[i] = 0; 
      }
      i++; // incrementing i 
   }
}
// function to search for pattern
void patrnSearch(char* orgnString, char* patt, int m, int *locArray, int *loc) {
   int n, i = 0, j = 0; 
   n = strlen(orgnString); 
   // array to store the prefix values  
   int* prefixArray = (int*)malloc(m * sizeof(int)); // allocate memory for the prefix array
   // calling prefix function to fill the prefix array
   prefixSearch(patt, m, prefixArray); 
   *loc = 0; // initialize the location index
   while(i < n) { 
       // checking if main string character matches pattern string character
      if(orgnString[i] == patt[j]) { 
         // increment both i and j
         i++; 
         j++; 
      }
      // if j and m are equal pattern is found
      if(j == m) { 
         // store the location of the pattern
         locArray[*loc] = i-j; 
         (*loc)++; // increment the location index
          // update j to the previous prefix value
         j = prefixArray[j-1];
      // checking if i is less than n and the current characters do not match
      }else if(i < n && patt[j] != orgnString[i]) { 
         if(j != 0) 
            // update j to the previous prefix value
            j = prefixArray[j-1]; 
         // if j is zero    
         else 
            i++; // increment i
      }
   }
   free(prefixArray); // free the memory of the prefix array
}
int main() {
    // declare the original text
   char* orgnStr = "AAAABCAEAAABCBDDAAAABC"; 
   // pattern to be found
   char* patrn = "AAABC"; 
   // get the size of the pattern
   int m = strlen(patrn);
   // array to store the locations of the pattern
   int locationArray[strlen(orgnStr)]; 
   // to store the number of locations
   int index; 
   // calling pattern search function
   patrnSearch(orgnStr, patrn, m, locationArray, &index); 
   // to loop through location array
   for(int i = 0; i<index; i++) { 
      // print the location of the pattern
      printf("Pattern found at location: %d\n", locationArray[i]); 
   }
}
#include<iostream>
using namespace std; 
// function to find prefix
void prefixSearch(string pattern, int m, int storePrefx[]) {
   int length = 0; 
   // array to store prefix
   storePrefx[0] = 0;      
   int i = 1; 
   while(i < m) { 
      // to check if the current character matches the previous character
      if(pattern[i] == pattern[length]) { 
         // increment the length
         length++; 
         // store the length in the prefix array  
         storePrefx[i] = length;  
      }else { 
         if(length != 0) { 
            // to update length of previous prefix length
            length = storePrefx[length - 1]; 
            i--; 
         } else 
            // if the length is 0, store 0 in the prefix array
            storePrefx[i] = 0; 
      }
      i++; // incrementing i 
   }
}
// function to search for pattern
void patrnSearch(string orgnString, string patt, int *locArray, int &loc) {
   int n, m, i = 0, j = 0; 
   n = orgnString.size(); 
   m = patt.size(); 
   // array to store the prefix values  
   int prefixArray[m];  
   // calling prefix function to fill the prefix array
   prefixSearch(patt, m, prefixArray); 
   loc = 0; // initialize the location index
   while(i < n) { 
      // checking if main string character matches pattern string character
      if(orgnString[i] == patt[j]) { 
         // increment both i and j
         i++; 
         j++; 
      }
      // if j and m are equal pattern is found
      if(j == m) { 
         // store the location of the pattern
         locArray[loc] = i-j; 
         loc++; // increment the location index
         // update j to the previous prefix value
         j = prefixArray[j-1];
      // checking if i is less than n and the current characters do not match
      }else if(i < n && patt[j] != orgnString[i]) { 
         if(j != 0) 
            // update j to the previous prefix value
            j = prefixArray[j-1]; 
         // if j is zero    
         else 
            i++; // increment i
      }
   }
}
int main() {
   // declare the original text
   string orgnStr = "AAAABCAEAAABCBDDAAAABC"; 
   // pattern to be found
   string patrn = "AAABC"; 
   // array to store the locations of the pattern
   int locationArray[orgnStr.size()]; 
   // to store the number of locations
   int index; 
   // calling pattern search function
   patrnSearch(orgnStr, patrn, locationArray, index); 
   // to loop through location array
   for(int i = 0; i<index; i++) { 
      // print the location of the pattern
      cout << "Pattern found at location: " <<locationArray[i] << endl; 
   }
}
import java.io.*; 
// class to implement the KMP algorithm
public class KMPalgo {
   // function to find prefix
   public static void prefixSearch(String pat, int m, int[] storePrefx) {
      int length = 0;
      // array to store prefix
      storePrefx[0] = 0;
      int i = 1;
      while (i < m) {
         // to check if the current character matches the previous character
         if (pat.charAt(i) == pat.charAt(length)) {
            // increment the length
            length++;
            // store the length in the prefix array
            storePrefx[i] = length;
         } else {
            if (length != 0) {
               // to update length of previous prefix length
               length = storePrefx[length - 1];
               i--;
            } else
               // if the length is 0, store 0 in the prefix array
               storePrefx[i] = 0;
            }
         i++; // incrementing i
      }
   }
   // function to search for pattern
   public static int patrnSearch(String orgnString, String patt, int[] locArray) {
      int n, m, i = 0, j = 0;
      n = orgnString.length();
      m = patt.length();
      // array to store the prefix values
      int[] prefixArray = new int[m]; // allocate memory for the prefix array
      // calling prefix function to fill the prefix array
      prefixSearch(patt, m, prefixArray);
      int loc = 0; // initialize the location index
      while (i < n) {
         // checking if main string character matches pattern string character
         if (orgnString.charAt(i) == patt.charAt(j)) {
            // increment both i and j
            i++;
            j++;
         }
         // if j and m are equal pattern is found
         if (j == m) {
            // store the location of the pattern
            locArray[loc] = i - j;
            loc++; // increment the location index
            // update j to the previous prefix value
            j = prefixArray[j - 1];
            // checking if i is less than n and the current characters do not match
         } else if (i < n && patt.charAt(j) != orgnString.charAt(i)) {
            if (j != 0)
               // update j to the previous prefix value
               j = prefixArray[j - 1];
               // if j is zero
            else
               i++; // increment i
         }
      }
      return loc;
   }
   public static void main(String[] args) throws IOException {
      // declare the original text
      String orgnStr = "AAAABCAEAAABCBDDAAAABC";
      // pattern to be found
      String patrn = "AAABC";
      // array to store the locations of the pattern
      int[] locationArray = new int[orgnStr.length()];
      // calling pattern search function
      int index = patrnSearch(orgnStr, patrn, locationArray);
      // to loop through location array
      for (int i = 0; i < index; i++) {
         // print the location of pattern
         System.out.println("Pattern found at location: " + locationArray[i]);
      }
   }
}
# function to find prefix
def prefix_search(pattern, m, store_prefx):
   length = 0
   # array to store prefix
   store_prefx[0] = 0
   i = 1
   while i < m:
      # to check if the current character matches the previous character
      if pattern[i] == pattern[length]:
         # increment the length
         length += 1
         # store the length in the prefix array
         store_prefx[i] = length
      else:
         if length != 0:
            # to update length of previous prefix length
            length = store_prefx[length - 1]
            i -= 1
         else:
            # if the length is 0, store 0 in the prefix array
            store_prefx[i] = 0
      i += 1  # incrementing i
# function to search for pattern
def pattern_search(orgn_string, patt, loc_array):
   n = len(orgn_string)
   m = len(patt)
   i = j = loc = 0
   # array to store the prefix values
   prefix_array = [0] * m
   # calling prefix function to fill the prefix array
   prefix_search(patt, m, prefix_array)
   while i < n:
      # checking if main string character matches pattern string character
      if orgn_string[i] == patt[j]:
         # increment both i and j
         i += 1
         j += 1
      # if j and m are equal pattern is found
      if j == m:
         # store the location of the pattern
         loc_array[loc] = i - j
         loc += 1  # increment the location index
         # update j to the previous prefix value
         j = prefix_array[j - 1]
      # checking if i is less than n and the current characters do not match
      elif i < n and patt[j] != orgn_string[i]:
         if j != 0:
            # update j to the previous prefix value
            j = prefix_array[j - 1]
         else:
            i += 1  # increment i
   return loc
# main function
def main():
   # declare the original text
   orgn_str = "AAAABCAEAAABCBDDAAAABC"
   # pattern to be found
   patrn = "AAABC"
   # array to store the locations of the pattern
   location_array = [0] * len(orgn_str)
   # calling pattern search function
   index = pattern_search(orgn_str, patrn, location_array)
   # to loop through location array
   for i in range(index):
      # print the location of the pattern
      print("Pattern found at location:", location_array[i])
# call the main function
if __name__ == "__main__":
   main()

輸出

Pattern found at location: 1
Pattern found at location: 8
Pattern found at location: 17
廣告