DNA双序列滑动比对也是比较简单的一种比对方式算法思想大概如下:
假设有两条DNA序列:ATCGCAG 和ATC,那么进行滑动比对的过程如下
1.以空位标识符‘-’填充另一条序列即填充后的两条序列为ATCGCAG和-------ATC
2.将第二条序列每次减少一个碱基或空位标识符与第一条序列进行比对算分,这样就相当于有10种比对情况,最后得出最优分数
下图是我运行C语言代码的结果,可能有助于理解
C语言代码如下:
#include <stdio.h>
#include<string.h>
int score_array[5][5] = { {5,-4,-4,-4,-2},{-4,5,-4,-4,-2},{-4,-4,5,-4,-2},{-4,-4,-4,5,-2},{-2,-2,-2,-2,-1} };
int main()
{
void cla_score(char seq1[100], char seq2[100], char normol[5], int gap = -7);
char normol[] = "ATCGN";
char seq1[100]="\0", seq2[100]="\0",temp[100]="\0";
int i,j;
printf("Please input the sequence 1:");
scanf("%s", seq1);
printf("Please input the sequence 2:");
scanf("%s", temp);
for (i = 0; i < strlen(seq1); i++)
seq2[i] = '-';
for (i = strlen(seq2), j = 0; i < strlen(seq2) + strlen(temp); i++, j++)
seq2[i] = temp[j];
cla_score(seq1, seq2, normol);
return 0;
}
void cla_score(char seq1[100], char seq2[100], char normol[5], int gap = -7)
{
int scores[100],total_score;
int i, j, k,x,index1, index2;
for (i = 0; i < strlen(seq2); i++)
{
total_score = 0;
char temp[100] = "\0";
for (j = i, k = 0; j < strlen(seq2); j++, k++)
temp[k] = seq2[j];
printf("the sequence 1 is :%s\n", seq1);
printf("The sliding sequence is:%s\t", temp);
int aln_len = 0;
if (strlen(seq1) > strlen(temp))
aln_len = strlen(temp);
else
aln_len = strlen(seq1);
for (x = 0; x < aln_len; x++)
{
if ((seq1[x] == '-' || temp[x] == '-') && (seq1[x] != temp[x]))
total_score += gap;
else
{
for (j = 0; j < strlen(normol); j++)
{
if (seq1[x] == normol[j])
index1 = j;
if (temp[x] == normol[j])
index2 = j;
}
total_score += score_array[index1][index2];
}
}
scores[i] = total_score;
printf("It's score is :%d\n", total_score);
}
}