|
| 1 | +''' |
| 2 | +Given 2 strings we have to find the longest common subsequence. |
| 3 | +
|
| 4 | +First we find the length of the longest common subsequence. In order to do so we divide the problem into simpler sub-problems. |
| 5 | +We find the length of the longest subsequence of "b" and "a", then of "b" and "ab" and so on. |
| 6 | +So dp[i][j] = length of lcs if s1[0:i] and s2[0:j] which depends on dp[i-1][j] and dp[i][j-1]. |
| 7 | +A dp table for s1 = "abcd" and s2 = "baca" is shown below. |
| 8 | +
|
| 9 | + | 0 a b c d |
| 10 | +--|---------- |
| 11 | +0 | 0 0 0 0 0 |
| 12 | +b | 0 0 1 1 1 |
| 13 | +a | 0 1 1 1 1 |
| 14 | +c | 0 1 1 2 2 |
| 15 | +a | 0 1 1 2 2 |
| 16 | +
|
| 17 | +Here the lowest rigth most value is the length of the longest sub sequence of s1 and s2. |
| 18 | +The space complexity for this is O(m*n) m being the lenght of s1 and n being the length of s2. |
| 19 | +Populating the dp matrix is O(mn) time complexity. |
| 20 | +
|
| 21 | +Now to find one of the lcs (there can be more that 1) we use an algorithm explained here in the link below. |
| 22 | + (https://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Reading_out_a_LCS) |
| 23 | + This algorithm traverses the dp matric hence the time complexit of the whole process is still O(mn). |
| 24 | +
|
| 25 | +''' |
| 26 | + |
| 27 | + |
| 28 | +# function to find the longest common subsequence of two strings s2 and s2 |
| 29 | +# the function is not case dependent |
| 30 | +# we are using a matrix to store results of subproblems |
| 31 | +# hence it has a space complexity of O(n*m) |
| 32 | +def longestCommonSubsequence(s1, s2): |
| 33 | +# change the strings into strictly lowercase or uppercase |
| 34 | +# we are doing this becase our algorithm does not depend on case |
| 35 | +s1 = s1.lower() |
| 36 | +s2 = s2.lower() |
| 37 | +m = len(s1) |
| 38 | +n = len(s2) |
| 39 | + |
| 40 | + # creating a matrix to store the results of sub problems |
| 41 | +dp = [[0 for i in range(n+1)] for i in range(m+1)] |
| 42 | + |
| 43 | +# the subproblem is finding the longest common subsequence of s1[0:i] with s2[0:j] |
| 44 | +# length of longest common subsequence will be in dp[m][n] |
| 45 | +for i in range(1, m+1): |
| 46 | +for j in range(1, n+1): |
| 47 | +if s1[i-1] == s2[j-1]: |
| 48 | +dp[i][j] = min(dp[i-1][j], dp[i][j-1]) + 1 |
| 49 | +else: |
| 50 | +dp[i][j] = max(dp[i-1][j], dp[i][j-1]) |
| 51 | + |
| 52 | +return getLCS(dp, s1, s2) |
| 53 | + |
| 54 | +def getLCS(dp, s1, s2): |
| 55 | +i = len(s1) |
| 56 | +j = len(s2) |
| 57 | +k = dp[-1][-1] - 1 |
| 58 | +s = [0 for i in range(k + 1)] |
| 59 | + |
| 60 | +while k >= 0 and i > 0 and j > 0: |
| 61 | +# if s[i-1] and s[j-1] are the same, it means that that letter is part of the subsequence |
| 62 | +if s1[i-1] == s2[j-1]: |
| 63 | +s[k] = s1[i-1] |
| 64 | +k -= 1 |
| 65 | +i -= 1 |
| 66 | +j -= 1 |
| 67 | +# if they are not equal then we change path in the dp matrix |
| 68 | +# our new path will be chosen by which of dp[i-1][j] and dp[i][j-1] is greater |
| 69 | +else: |
| 70 | +if dp[i-1][j] > dp[i][j-1]: |
| 71 | +i -= 1 |
| 72 | +else: |
| 73 | +j -= 1 |
| 74 | + |
| 75 | +return "".join(s) |
| 76 | + |
| 77 | + |
| 78 | +# driver code |
| 79 | +if __name__ == "__main__": |
| 80 | +s1 = ["abcd", "baca", "aa", ""] |
| 81 | +s2 = ["baca", "babca", "aa", ""] |
| 82 | + |
| 83 | +for i in range(len(s1)): |
| 84 | +print("For s1 = %s, s2 = %s, "%(s1[i], s2[i])) |
| 85 | +print("the longest common subsequence is:", longestCommonSubsequence(s1[i], s2[i])) |
| 86 | +print() |
| 87 | + |
0 commit comments