[Work@] Substring Search - II

Substring Search - II

Given two strings s1 and s2, find the index of the first occurrence of s2 in s1 as a substring.

If no such occurence exists, return -1.

This problem is also known as finding needle in a haystack.

Use the Rabin-Karp algorithm to solve this problem.

  • rabin karp rolling hash solution
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
int findStartIndexOfSubstring(string s1, string s2) {
int n = s1.length(), m = s2.length();
if(n == m) return s1 == s2 ? 0 : -1;
if(n < m) return -1;

long long base = 1010101, mod = 1e9 + 7, d = 1;
long long target = 0, hash = 0;

for(int i = 0; i < m; i++) {
target = (target * base + s2[i]) % mod;
d = d * base % mod;
}

for(int i = 0; i < n; i++) {
hash = (hash * base + s1[i]) % mod;
if(i >= m) hash = (hash + mod - d * s1[i - m] % mod) % mod;
if(i >= m - 1 and hash == target and s1.substr(i - m + 1, m) == s2) return i - m + 1;
}


return -1;
}
  • kmp solution
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
vector<int> PI(string& s) {
int n = s.length();
vector<int> pi(n);
for(int i = 1, j = 0; i < n; i++) {
while(j > 0 and s[i] != s[j]) j = pi[j - 1];
if(s[i] == s[j]) pi[i] = ++j;
}
return pi;
}

int findStartIndexOfSubstring(string s1, string s2) {
int n = s1.length(), m = s2.length();
if(n == m) return s1 == s2 ? 0 : -1;
if(n < m) return -1;

auto pi = PI(s2);

for(int i = 0, j = 0; i < n; i++) {
while(j > 0 and s1[i] != s2[j]) j = pi[j - 1];
if(s1[i] == s2[j]) {
if(++j == m)
return i - m + 1;
}
}

return -1;
}
Author: Song Hayoung
Link: https://songhayoung.github.io/2022/06/04/PS/WorkAt/substring-search-rabin-karp/
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.