Open In App

CSES Solution - Finding Patterns

Last Updated : 09 Apr, 2024
Suggest changes
Share
Like Article
Like
Report

Prerequisite: Aho-Corasick algorithm

Given a string and patterns, check for each pattern if it appears in the string.

Example:

Input: s = "aybabtu", patterns = {"bab", "abc", "ayba"}
Output: YES
NO
YES

Input: s = "bybabtu", patterns = {"bat", "tu"}
Output:
NO
YES

Approach:

The solution uses a method called the Aho-Corasick algorithm. Let's see how it works:

  • Building a Trie: We first create a special kind of tree called a ‘trie’ from the patterns we’re looking for. Each pattern is a path in the trie from the root to a node.
  • Failure Links: We then add "failure links" to the trie. These links help us jump back efficiently in the trie when a character doesn’t match.
  • Searching: We go through the given string, character by character, moving along the paths in the trie. If we reach the end of a path (a pattern), we know that pattern exists in the string.
  • Counting: While searching, we keep track of how many times we reach each node (pattern). This tells us how many times each pattern appears in the string.

This method is fast because it checks all patterns at the same time, instead of one by one.

Steps-by-step approach:

  • Implement a insertPattern() function to insert each pattern into the trie structure.
  • Build failure links in the trie using the buildFailureLinks() function to prepare for pattern matching.
  • Run the Aho-Corasick algorithm on the main string s using the runAhoCorasick() function to identify occurrences of patterns.
  • Perform a Depth-First Search (DFS) on the trie structure to propagate occurrences down and update pattern matches using the dfs function.
  • In the main function, input and insert patterns into the trie, build failure links, run the Aho-Corasick algorithm, and propagate occurrences using DFS.
  • Output the pattern matches by checking the patternFound array for each pattern index and print "YES" if a match is found, otherwise print "NO".

Below is the implementation of the above approach:

C++
#include <algorithm> #include <iostream> #include <queue> using namespace std; #define pb push_back // The main string and the number of patterns string s; int n, nodeIndex = 1, patternFound[500005]; // Adjacency list for the trie nodes vector<int> trieAdjList[500005]; // Structure for each node in the trie struct TrieNode {  int failureLink, child[26] = {}, occurrences = 0;  vector<int> patternIndices; } trie[500005]; // Function to insert a pattern into the trie void insertPattern(string pattern, int index) {  // Start at the root node  int node = 1;  // Traverse the pattern character by character  for (int i = 0; i < pattern.size(); i++) {  // If the current character's child node doesn't  // exist, create it  if (trie[node].child[pattern[i] - 'a'] == 0)  trie[node].child[pattern[i] - 'a']  = ++nodeIndex;  // Move to the child node  node = trie[node].child[pattern[i] - 'a'];  }  // After traversing the pattern, add the pattern index  // to the current node  trie[node].patternIndices.pb(index); } // Function to build the failure links for the Aho-Corasick // algorithm void buildFailureLinks() {  // Initialize a queue for BFS traversal  queue<int> nodeQueue;  // Start from the root node  int node = 1;  // Set failure link of root node to itself  trie[1].failureLink = 1;  // Push all child nodes of the root to the queue  for (int i = 0; i < 26; i++) {  if (trie[node].child[i])  trie[trie[node].child[i]].failureLink = node,  nodeQueue.push(trie[node].child[i]);  else  trie[node].child[i]  = 1; // If child node doesn't exist, point  // to root  }  // Perform BFS traversal to set failure links  while (!nodeQueue.empty()) {  node = nodeQueue.front();  nodeQueue.pop();  for (int i = 0; i < 26; i++) {  if (trie[node].child[i])  trie[trie[node].child[i]].failureLink  = trie[trie[node].failureLink].child[i],  nodeQueue.push(trie[node].child[i]);  else  trie[node].child[i]  = trie[trie[node].failureLink].child[i];  }  }  // Create adjacency list based on failure links  for (int i = 2; i <= nodeIndex; i++)  trieAdjList[trie[i].failureLink].pb(i); } // Function to run the Aho-Corasick algorithm on the main // string void runAhoCorasick(string s) {  // Start from the root node  for (int i = 0, node = 1; i < s.size(); i++) {  // Traverse the trie based on characters in the main  // string  node = trie[node].child[s[i] - 'a'];  // Increment occurrences at each node  trie[node].occurrences++;  } } // DFS function to propagate the occurrences down the trie int dfs(int u) {  // Initialize total occurrences at current node  int totalOccurrences = trie[u].occurrences;  // Traverse child nodes recursively  for (int v : trieAdjList[u])  totalOccurrences += dfs(v);  // Propagate occurrences to pattern indices  for (int patternIndex : trie[u].patternIndices)  patternFound[patternIndex] = totalOccurrences;  // Return total occurrences  return totalOccurrences; } // Main function int main() {  string s = "aybabtu";  int n = 3;  vector<string> patterns = { "bab", "abc", "ayba" };  // Input and insert patterns into trie  for (int i = 0; i < n; i++) {  insertPattern(patterns[i], i);  }  // Build failure links for Aho-Corasick algorithm  buildFailureLinks();  // Run Aho-Corasick algorithm on main string  runAhoCorasick(s);  // Propagate occurrences down the trie using DFS  dfs(1);  // Output pattern matches  for (int i = 0; i < n; i++)  cout << (patternFound[i] ? "YES" : "NO") << '\n'; } 
Java
/*code by flutterfly */ import java.util.*; public class AhoCorasick {  static String s;  static int n, nodeIndex = 1;  static int[] patternFound = new int[500005];  static List<Integer>[] trieAdjList = new List[500005];  // Structure for each node in the trie  static class TrieNode {  int failureLink;  int[] child = new int[26];  int occurrences = 0;  List<Integer> patternIndices = new ArrayList<>();  }  static TrieNode[] trie = new TrieNode[500005];  // Function to insert a pattern into the trie  static void insertPattern(String pattern, int index) {  // Start at the root node  int node = 1;  // Traverse the pattern character by character  for (int i = 0; i < pattern.length(); i++) {  // If the current character's child node doesn't exist, create it  if (trie[node].child[pattern.charAt(i) - 'a'] == 0)  trie[node].child[pattern.charAt(i) - 'a'] = ++nodeIndex;  // Move to the child node  node = trie[node].child[pattern.charAt(i) - 'a'];  }  // After traversing the pattern, add the pattern index to the current node  trie[node].patternIndices.add(index);  }  // Function to build the failure links for the Aho-Corasick algorithm  static void buildFailureLinks() {  // Initialize a queue for BFS traversal  Queue<Integer> nodeQueue = new LinkedList<>();  // Start from the root node  int node = 1;  // Set failure link of root node to itself  trie[1].failureLink = 1;  // Push all child nodes of the root to the queue  for (int i = 0; i < 26; i++) {  if (trie[node].child[i] != 0) {  trie[trie[node].child[i]].failureLink = node;  nodeQueue.add(trie[node].child[i]);  } else {  trie[node].child[i] = 1; // If child node doesn't exist, point to root  }  }  // Perform BFS traversal to set failure links  while (!nodeQueue.isEmpty()) {  node = nodeQueue.poll();  for (int i = 0; i < 26; i++) {  if (trie[node].child[i] != 0) {  trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i];  nodeQueue.add(trie[node].child[i]);  } else {  trie[node].child[i] = trie[trie[node].failureLink].child[i];  }  }  }  // Create adjacency list based on failure links  for (int i = 2; i <= nodeIndex; i++)  trieAdjList[trie[i].failureLink].add(i);  }  // Function to run the Aho-Corasick algorithm on the main string  static void runAhoCorasick(String s) {  // Start from the root node  for (int i = 0, node = 1; i < s.length(); i++) {  // Traverse the trie based on characters in the main string  node = trie[node].child[s.charAt(i) - 'a'];  // Increment occurrences at each node  trie[node].occurrences++;  }  }  // DFS function to propagate the occurrences down the trie  static int dfs(int u) {  // Initialize total occurrences at current node  int totalOccurrences = trie[u].occurrences;  // Traverse child nodes recursively  for (int v : trieAdjList[u])  totalOccurrences += dfs(v);  // Propagate occurrences to pattern indices  for (int patternIndex : trie[u].patternIndices)  patternFound[patternIndex] = totalOccurrences;  // Return total occurrences  return totalOccurrences;  }  // Main function  public static void main(String[] args) {  s = "aybabtu";  n = 3;  String[] patterns = { "bab", "abc", "ayba" };  // Initialize trie nodes  for (int i = 0; i <= 500004; i++) {  trie[i] = new TrieNode();  trieAdjList[i] = new ArrayList<>();  }  // Input and insert patterns into trie  for (int i = 0; i < n; i++) {  insertPattern(patterns[i], i);  }  // Build failure links for Aho-Corasick algorithm  buildFailureLinks();  // Run Aho-Corasick algorithm on main string  runAhoCorasick(s);  // Propagate occurrences down the trie using DFS  dfs(1);  // Output pattern matches  for (int i = 0; i < n; i++)  System.out.println((patternFound[i] != 0 ? "YES" : "NO"));  } } 
Python3
from collections import deque class TrieNode: def __init__(self): self.failureLink = 0 self.child = [0] * 26 self.occurrences = 0 self.patternIndices = [] def insert_pattern(pattern, index):  """Inserts a pattern into the trie.""" global nodeIndex node = 1 # Traverse the trie to insert the pattern character by character for char in pattern: char_index = ord(char) - ord('a') if trie[node].child[char_index] == 0: nodeIndex += 1 trie[node].child[char_index] = nodeIndex node = trie[node].child[char_index] # Add the pattern index to the current node trie[node].patternIndices.append(index) def build_failure_links():  """Builds failure links for the Aho-Corasick algorithm.""" nodeQueue = deque() node = 1 trie[1].failureLink = 1 # Initialize failure links for child nodes of the root for i in range(26): if trie[node].child[i] != 0: trie[trie[node].child[i]].failureLink = node nodeQueue.append(trie[node].child[i]) else: trie[node].child[i] = 1 # Perform BFS traversal to set failure links while nodeQueue: node = nodeQueue.popleft() for i in range(26): if trie[node].child[i] != 0: trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i] nodeQueue.append(trie[node].child[i]) else: trie[node].child[i] = trie[trie[node].failureLink].child[i] # Create adjacency list based on failure links for i in range(2, nodeIndex + 1): trie_adj_list[trie[i].failureLink].append(i) def run_aho_corasick(s):  """Runs the Aho-Corasick algorithm on the main string.""" node = 1 for char in s: # Traverse the trie based on characters in the main string node = trie[node].child[ord(char) - ord('a')] # Increment occurrences at each node trie[node].occurrences += 1 def dfs(u):  """DFS function to propagate the occurrences down the trie.""" total_occurrences = trie[u].occurrences # Traverse child nodes recursively for v in trie_adj_list[u]: total_occurrences += dfs(v) # Propagate occurrences to pattern indices for pattern_index in trie[u].patternIndices: pattern_found[pattern_index] = total_occurrences # Return total occurrences return total_occurrences # Sample Input s = "aybabtu" n = 3 patterns = ["bab", "abc", "ayba"] # Initialize variables nodeIndex = 1 trie = [TrieNode() for _ in range(500005)] trie_adj_list = [[] for _ in range(500005)] pattern_found = [0] * 500005 # Insert patterns into the trie for i in range(n): insert_pattern(patterns[i], i) # Build failure links for Aho-Corasick algorithm build_failure_links() # Run Aho-Corasick algorithm on the main string run_aho_corasick(s) # Propagate occurrences down the trie using DFS dfs(1) # Output pattern matches for i in range(n): print("YES" if pattern_found[i] != 0 else "NO") 
C#
//code by flutterfly using System; using System.Collections.Generic; public class Program {  // Structure for each node in the trie  public class TrieNode  {  public int failureLink;  public int[] child = new int[26];  public int occurrences = 0;  public List<int> patternIndices = new List<int>();  }  static string s;  static int n, nodeIndex = 1;  static int[] patternFound = new int[500005];  static List<int>[] trieAdjList = new List<int>[500005];  static TrieNode[] trie = new TrieNode[500005];  // Function to insert a pattern into the trie  static void InsertPattern(string pattern, int index)  {  // Start at the root node  int node = 1;  // Traverse the pattern character by character  foreach (char c in pattern)  {  int idx = c - 'a';  // If the current character's child node doesn't exist, create it  if (trie[node].child[idx] == 0)  trie[node].child[idx] = ++nodeIndex;  // Move to the child node  node = trie[node].child[idx];  }  // After traversing the pattern, add the pattern index to the current node  trie[node].patternIndices.Add(index);  }  // Function to build the failure links for the Aho-Corasick algorithm  static void BuildFailureLinks()  {  // Initialize a queue for BFS traversal  Queue<int> nodeQueue = new Queue<int>();  // Start from the root node  int node = 1;  // Set failure link of root node to itself  trie[1].failureLink = 1;  // Push all child nodes of the root to the queue  for (int i = 0; i < 26; i++)  {  if (trie[node].child[i] != 0)  {  trie[trie[node].child[i]].failureLink = node;  nodeQueue.Enqueue(trie[node].child[i]);  }  else  {  trie[node].child[i] = 1; // If child node doesn't exist, point to root  }  }  // Perform BFS traversal to set failure links  while (nodeQueue.Count > 0)  {  node = nodeQueue.Dequeue();  for (int i = 0; i < 26; i++)  {  if (trie[node].child[i] != 0)  {  trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i];  nodeQueue.Enqueue(trie[node].child[i]);  }  else  {  trie[node].child[i] = trie[trie[node].failureLink].child[i];  }  }  }  // Create adjacency list based on failure links  for (int i = 0; i <= nodeIndex; i++)  trieAdjList[i] = new List<int>();  for (int i = 2; i <= nodeIndex; i++)  trieAdjList[trie[i].failureLink].Add(i);  }  // Function to run the Aho-Corasick algorithm on the main string  static void RunAhoCorasick(string s)  {  // Start from the root node  int node = 1;  // Traverse the main string character by character  foreach (char c in s)  {  int idx = c - 'a';  // Traverse the trie based on characters in the main string  node = trie[node].child[idx];  // Increment occurrences at each node  trie[node].occurrences++;  }  }  // DFS function to propagate the occurrences down the trie  static int DFS(int u)  {  // Initialize total occurrences at current node  int totalOccurrences = trie[u].occurrences;  // Traverse child nodes recursively  foreach (int v in trieAdjList[u])  totalOccurrences += DFS(v);  // Propagate occurrences to pattern indices  foreach (int patternIndex in trie[u].patternIndices)  patternFound[patternIndex] = totalOccurrences;  // Return total occurrences  return totalOccurrences;  }  // Main function  public static void Main(string[] args)  {  s = "aybabtu";  n = 3;  string[] patterns = { "bab", "abc", "ayba" };  // Input and insert patterns into trie  for (int i = 0; i < 500005; i++)  trie[i] = new TrieNode();  for (int i = 0; i < n; i++)  InsertPattern(patterns[i], i);  // Build failure links for Aho-Corasick algorithm  BuildFailureLinks();  // Run Aho-Corasick algorithm on main string  RunAhoCorasick(s);  // Propagate occurrences down the trie using DFS  DFS(1);  // Output pattern matches  for (int i = 0; i < n; i++)  Console.WriteLine((patternFound[i] != 0 ? "YES" : "NO"));  } } 
JavaScript
//code by flutterfly // The main string and the number of patterns let s; let n; let nodeIndex = 1; let patternFound = new Array(500005).fill(0); // Adjacency list for the trie nodes let trieAdjList = new Array(500005).fill().map(() => []); // Structure for each node in the trie class TrieNode {  constructor() {  this.failureLink = 0;  this.child = new Array(26).fill(0);  this.occurrences = 0;  this.patternIndices = [];  } } let trie = new Array(500005).fill().map(() => new TrieNode()); // Function to insert a pattern into the trie function insertPattern(pattern, index) {  // Start at the root node  let node = 1;  // Traverse the pattern character by character  for (let i = 0; i < pattern.length; i++) {  // If the current character's child node doesn't exist, create it  if (trie[node].child[pattern.charCodeAt(i) - 'a'.charCodeAt(0)] === 0) {  trie[node].child[pattern.charCodeAt(i) - 'a'.charCodeAt(0)] = ++nodeIndex;  }  // Move to the child node  node = trie[node].child[pattern.charCodeAt(i) - 'a'.charCodeAt(0)];  }  // After traversing the pattern, add the pattern index to the current node  trie[node].patternIndices.push(index); } // Function to build the failure links for the Aho-Corasick algorithm function buildFailureLinks() {  // Initialize a queue for BFS traversal  let nodeQueue = [];  // Start from the root node  let node = 1;  // Set failure link of root node to itself  trie[1].failureLink = 1;  // Push all child nodes of the root to the queue  for (let i = 0; i < 26; i++) {  if (trie[node].child[i] !== 0) {  trie[trie[node].child[i]].failureLink = node;  nodeQueue.push(trie[node].child[i]);  } else {  trie[node].child[i] = 1; // If child node doesn't exist, point to root  }  }  // Perform BFS traversal to set failure links  while (nodeQueue.length > 0) {  node = nodeQueue.shift();  for (let i = 0; i < 26; i++) {  if (trie[node].child[i] !== 0) {  trie[trie[node].child[i]].failureLink = trie[trie[node].failureLink].child[i];  nodeQueue.push(trie[node].child[i]);  } else {  trie[node].child[i] = trie[trie[node].failureLink].child[i];  }  }  }  // Create adjacency list based on failure links  for (let i = 2; i <= nodeIndex; i++) {  trieAdjList[trie[i].failureLink].push(i);  } } // Function to run the Aho-Corasick algorithm on the main string function runAhoCorasick(s) {  // Start from the root node  let node = 1;  // Traverse the main string character by character  for (let i = 0; i < s.length; i++) {  // Traverse the trie based on characters in the main string  node = trie[node].child[s.charCodeAt(i) - 'a'.charCodeAt(0)];  // Increment occurrences at each node  trie[node].occurrences++;  } } // DFS function to propagate the occurrences down the trie function dfs(u) {  // Initialize total occurrences at current node  let totalOccurrences = trie[u].occurrences;  // Traverse child nodes recursively  for (let v of trieAdjList[u]) {  totalOccurrences += dfs(v);  }  // Propagate occurrences to pattern indices  for (let patternIndex of trie[u].patternIndices) {  patternFound[patternIndex] = totalOccurrences;  }  // Return total occurrences  return totalOccurrences; } // Main function function main() {  s = "aybabtu";  n = 3;  let patterns = ["bab", "abc", "ayba"];  // Input and insert patterns into trie  for (let i = 0; i < n; i++) {  insertPattern(patterns[i], i);  }  // Build failure links for Aho-Corasick algorithm  buildFailureLinks();  // Run Aho-Corasick algorithm on main string  runAhoCorasick(s);  // Propagate occurrences down the trie using DFS  dfs(1);  // Output pattern matches  for (let i = 0; i < n; i++) {  console.log(patternFound[i] ? "YES" : "NO");  } } main(); 

Output
YES NO YES

Time complexity: O(numPatterns * m + n + s), where: numPatterns is the number of patterns, m is the average length of the patterns, n is the total number of nodes in the trie, s is the length of the main string.
Auxiliary Space: O(n + numPatterns)



Similar Reads