Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 20163 | Accepted: 8948 |
Description
As an IBM researcher, you have been tasked with writing a program that will find commonalities amongst given snippets of DNA that can be correlated with individual survey information to identify new genetic markers.
A DNA base sequence is noted by listing the nitrogen bases in the order in which they are found in the molecule. There are four bases: adenine (A), thymine (T), guanine (G), and cytosine (C). A 6-base DNA sequence could be represented as TAGACC.
Given a set of DNA base sequences, determine the longest series of bases that occurs in all of the sequences.
Input
- A single positive integer m (2 <= m <= 10) indicating the number of base sequences in this dataset.
- m lines each containing a single base sequence consisting of 60 bases.
Output
Sample Input
3
2
GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
3
GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA
GATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA
GATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA
3
CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
Sample Output
no significant commonalities
AGATAC
CATCATCAT
Source
no significant commonalities
。
#include<stdio.h>
#include<string.h>
char s[12][62],p[62];
char ans[62];
int next[62];
int N;
int getnext(int n)
{
next[0]=-1;
int i,j=1,k=-1;
while(j<n)
{
while(k>-1&&p[j]!=p[k+1])
{
k=next[k];
}
if(p[j]==p[k+1])
k++;
next[j]=k;
j++;
}
return 0;
}
int kmp(int n)
{
getnext(n);
int i,j,k,sum,mx=0;
int max=100;
for(i=1;i<N;i++)//与剩下n-1个字符匹配
{
j=0,k=0,mx=0;
while(j<60&&k<n)
{
if(p[k]==s[i][j])//匹配时
{
k++;
j++;
}
else
{
if(k==0)//回到了模式串的开头
j++;
else
k=next[k-1]+1;
}
if(mx<k)
mx=k;
}
if(max>mx)
max=mx;
}
return max;
}
int main()
{
int t;
scanf("%d",&t);
int i,j;
int len;
while(t--)
{
len=0;
scanf("%d",&N);
for(i=0;i<N;i++)
{
scanf("%s",s[i]);
//printf("%s\n",s[i]);
}
for(i=0;i<58;i++)
{
strcpy(p,s[0]+i);
p[60-i]='\0';
int mx=kmp(60-i);
if(len<mx)
{
strncpy(ans,s[0]+i,mx);
ans[mx]='\0';
len=mx;
}
else if(len==mx)
{
p[mx]='\0';
if(strcmp(p,ans)<0)
{
strcpy(ans,p);
ans[mx]='\0';
}
}
}
if(len>=3)
printf("%s\n",ans);
else
printf("no significant commonalities\n");
}
return 0;
}