Saturday, June 6, 2020

Needleman–Wunsch algorithm & Smith–Waterman algorithm (by Eugen Schülter)

BSA - Behind Sequence Alignment

This little program shows in 'real time' the effect of some parameters in sequence alignment functions. Both usual algorithms, Needleman-Wunsch (global alignment) and Smith-Waterman (local alignment) are supported. Not meaningful values for e.g. gap penalty etc. are wittingly accepted.

Useful to demonstrate principles of dynamic programming such as trace back.

If you have an old PC, you might need the visual basic 6.0 run time environment (available at http://support.microsoft.com/kb/192461/en).

Download from ME

Download from SOURCE




Needleman–Wunsch algorithm


Private Sub Needle()
Dim i, j, leA, leB
Dim iMatch, iGap, iExtend, iG
Dim ds, us, ls As Long
Dim sA, sB

   cmRun.Enabled = False
   cmClear.Enabled = False
   If tbMatch = "" Then tbMatch = 0
   iMatch = tbMatch.Text
   If tbMismatch = "" Then tbMismatch = 0
   iMis = tbMismatch.Text
   If tbGap = "" Then tbGap = 0
   iGap = tbGap.Text
   If tbExtend = "" Then tbExtend = 0
   iExtend = tbExtend.Text
   leA = Len(tbA)
   leB = Len(tbB)
   If tbDelay = "" Then tbDelay = 0
   tm1.Interval = CLng(tbDelay) + 1
   
   
   
'--- init ---
   ns(0, 0) = 0
   For j = 1 To leA
      ns(0, j) = j * iMis
      np(0, j) = LE
      F(map(0, j)) = ns(0, j)
      F(map(0, j)).BackColor = &HF0F4F4
      For i = leB + 1 To 24
         F(map(i, j)).BackColor = cc
         F(map(i, j)).Caption = ""
      Next i
   Next j
   For j = leA + 1 To 26
      F(map(0, j)) = ""
   Next j
   For i = 1 To leB
      ns(i, 0) = i * iMis
      np(i, 0) = UP
      F(map(i, 0)) = ns(i, 0)
      F(map(i, 0)).BackColor = &HF0F4F4
      For j = leA + 1 To 26
         F(map(i, j)).BackColor = cc
         F(map(i, j)).Caption = ""
      Next j
   Next i
   For i = leB + 1 To 24
      F(map(i, 0)) = ""
      For j = leA + 1 To 26
         F(map(i, j)).BackColor = cc
         F(map(i, j)).Caption = ""
      Next j
   Next i

'--- fill ---
   For i = 1 To leB
      For j = 1 To leA
         If tm1.Interval > 1 Then
            tm1.Enabled = True
            While tm1.Enabled = True
               DoEvents
            Wend
         End If
         iG = iGap
         If np(i - 1, j) = UP Or np(i, j - 1) = LE Then iG = iExtend
         If gRedGapA = 1 And (j = 1 Or j = leA) Then iG = iMis
         If gRedGapB = 1 And (i = 1 Or i = leB) Then iG = iMis
         ds = ns(i - 1, j - 1) + IIf(Mid$(tbA, j, 1) = Mid$(tbB, i, 1), iMatch, iMis)
         us = ns(i - 1, j) + iG
         ls = ns(i, j - 1) + iG
         If ds <= us Then
            If ds <= ls Then
               ns(i, j) = ds
               np(i, j) = DIAG
            Else
               ns(i, j) = ls
               np(i, j) = LE
            End If
         Else
            If us < ls Then
               ns(i, j) = us
               np(i, j) = UP
            Else
               ns(i, j) = ls
               np(i, j) = LE
            End If
         End If
         Call Display(i, j)
         F(map(i, j)).FontBold = fClear And IIf(ckPointers.Value, (np(i, j) <> op(i, j)), (ns(i, j) <> os(i, j)))
      Next j
   Next i
   
'--- traceback ---
   j = leA
   i = leB
   sA = ""
   sB = ""
   iG = gSearchMin
   While i > 0 Or j > 0
      F(map(i, j)).BackColor = &H40C080
      If np(i, j) = DIAG Then
         If iG = 1 Then
            While (ns(i, j - 1) <= ns(i, j)) And (np(i, j - 1) <> UP)
               sB = "-" & sB
               sA = lsa(j) & sA
               j = j - 1
               F(map(i, j)).BackColor = &H40C080
            Wend
            iG = 0
         Else
            sB = lsb(i) & sB
            sA = lsa(j) & sA
            j = j - 1
            i = i - 1
         End If
      ElseIf np(i, j) = UP Then
         sA = "-" & sA
         sB = lsb(i) & sB
         i = i - 1
      Else
         sB = "-" & sB
         sA = lsa(j) & sA
         j = j - 1
      End If
   Wend
   laRes = sA & Chr$(10) & sB

   For i = 0 To leB
      For j = 0 To leA
         os(i, j) = ns(i, j)
         op(i, j) = np(i, j)
   Next j, i
   cmRun.Enabled = True
   cmClear.Enabled = True
   fClear = True
End Sub


Smith–Waterman algorithm


Private Sub Watermann()
Dim i, j, leA, leB, iMax, jMax, maxScore
Dim iMatch, iGap, iExtend, iG
Dim ds, us, ls As Long
Dim sA, sB

   cmRun.Enabled = False
   cmClear.Enabled = False
   iMatch = tbMatch.Text
   iMis = tbMismatch.Text
   iGap = tbGap.Text
   iExtend = tbExtend.Text
   leA = Len(tbA)
   leB = Len(tbB)
   tm1.Interval = CLng(tbDelay) + 1
   
   
   
'--- init ---
   ns(0, 0) = 0
   iMax = 0: jMax = 0: maxScore = 0
   For j = 1 To leA
      ns(0, j) = 0
      np(0, j) = LE
      F(map(0, j)) = ns(0, j)
      F(map(0, j)).BackColor = &HF0F4F4
      For i = leB + 1 To 24
         F(map(i, j)).BackColor = cc
         F(map(i, j)).Caption = ""
      Next i
   Next j
   For j = leA + 1 To 26
      F(map(0, j)) = ""
   Next j
   For i = 1 To leB
      ns(i, 0) = 0
      np(i, 0) = UP
      F(map(i, 0)) = ns(i, 0)
      F(map(i, 0)).BackColor = &HF0F4F4
      For j = leA + 1 To 26
         F(map(i, j)).BackColor = cc
         F(map(i, j)).Caption = ""
      Next j
   Next i
   For i = leB + 1 To 24
      F(map(i, 0)) = ""
      For j = leA + 1 To 26
         F(map(i, j)).BackColor = cc
         F(map(i, j)).Caption = ""
      Next j
   Next i

'--- fill ---
   For i = 1 To leB
      For j = 1 To leA
         If tm1.Interval > 1 Then
            tm1.Enabled = True
            While tm1.Enabled = True
               DoEvents
            Wend
         End If
         iG = iGap
         If np(i - 1, j) = UP Or np(i, j - 1) = LE Then iG = iExtend
         If gRedGapA = 1 And (j = 1 Or j = leA) Then iG = iMis
         If gRedGapB = 1 And (i = 1 Or i = leB) Then iG = iMis
         ds = ns(i - 1, j - 1) + IIf(Mid$(tbA, j, 1) = Mid$(tbB, i, 1), iMatch, iMis)
         us = ns(i - 1, j) + iG
         ls = ns(i, j - 1) + iG
         If ds >= us Then
            If ds >= ls Then
               ns(i, j) = ds
               np(i, j) = DIAG
            Else
               ns(i, j) = ls
               np(i, j) = LE
            End If
         Else
            If us > ls Then
               ns(i, j) = us
               np(i, j) = UP
            Else
               ns(i, j) = ls
               np(i, j) = LE
            End If
         End If
         If ns(i, j) >= maxScore Then
            maxScore = ns(i, j)
            iMax = i: jMax = j
         End If
         Call Display(i, j)
         F(map(i, j)).FontBold = fClear And IIf(ckPointers.Value, (np(i, j) <> op(i, j)), (ns(i, j) <> os(i, j)))
      Next j
   Next i
   
'--- traceback ---
   j = jMax
   i = iMax
   sA = ""
   sB = ""
   While ns(i, j) > 0
      F(map(i, j)).BackColor = &H40C080
      If np(i, j) = DIAG Then
         sB = lsb(i) & sB
         sA = lsa(j) & sA
         j = j - 1
         i = i - 1
      ElseIf np(i, j) = UP Then
         sA = "-" & sA
         sB = lsb(i) & sB
         i = i - 1
      Else
         sB = "-" & sB
         sA = lsa(j) & sA
         j = j - 1
      End If
   Wend
   laRes = sA & Chr$(10) & sB

   For i = 0 To leB
      For j = 0 To leA
         os(i, j) = ns(i, j)
         op(i, j) = np(i, j)
   Next j, i
   cmRun.Enabled = True
   cmClear.Enabled = True
   fClear = True
End Sub


Source: http://www.schuelter-gm.de/download.html#bsa