Description
As an IBM researcher, you have been tasked with writing a program that will find commonalities amongst given snippets of DNA that can be correlated with individual survey information to identify new genetic markers.
A DNA base sequence is noted by listing the nitrogen bases in the order in which they are found in the molecule. There are four bases: adenine (A), thymine (T), guanine (G), and cytosine (C). A 6-base DNA sequence could be represented as TAGACC.
Given a set of DNA base sequences, determine the longest series of bases that occurs in all of the sequences.
Input
- A single positive integer m (2 <= m <= 10) indicating the number of base sequences in this dataset.
- m lines each containing a single base sequence consisting of 60 bases.
Output
Sample Input
3
2
GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
3
GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA
GATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA
GATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA
3
CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
Sample Output
no significant commonalities
AGATAC
CATCATCAT 题意及题解转自:http://blog.csdn.net/qiqijianglu/article/details/7851454
题意:求n个字符串的最长公共串。
求n个字符长度最长公共子串。对于多模式匹配问题,一般是不可以用KMP解决得,因为忒暴力。
思路很简单:我们先按字符串的长度由短到长进行快排。枚举第一个字符串的不同长度子串,判断她是否为下面多有的公共子串?如果是的话,那么我们就表明找到,则比较其长度,如果比已经找到的串长,那么就替换结果串 否则按字典序比较。取字典序考前的,就可以。
#include<iostream>
#include<cstring>
#include<cstdlib>
#include<cstdio>
#include<algorithm>
#include<cmath>
#include<queue>
#include<map>
#include<set>
#include<stack>
#include<string> #define N 65
#define M 105
#define mod 10000007
//#define p 10000007
#define mod2 1000000000
#define ll long long
#define LL long long
#define eps 1e-6
#define inf 100000000
#define maxi(a,b) (a)>(b)? (a) : (b)
#define mini(a,b) (a)<(b)? (a) : (b) using namespace std; int T;
int n;
char text[N][N];
char result[N];
int ma;
int l;
int le;
char pat[N];
int next[N];
int mma; void ini()
{
int i;
ma=-;
scanf("%d",&n);
for(i=;i<=n;i++){
scanf("%s",text[i]);
}
l=strlen(text[]);
} void get_next()
{
memset(next,-,sizeof(next));
int i,j;
j=-;next[]=-;
i=;
while(i<le)
{
if(j==- || pat[i]==pat[j]){
i++;j++;next[i]=j;
}
else{
j=next[j];
}
}
} void KMP()
{
int i,j,k,m;
mma=;
for(k=;k<=n;k++){
i=;j=;m=;
while(i<l && j<le)
{
if(j==- || text[k][i]==pat[j])
{
i++;j++;
m=max(m,j);
}
else{
j=next[j];
}
}
mma=min(m,mma);
}
} void solve()
{
int i;
char te[N];
for(i=;i<l;i++){
strcpy(pat,text[]+i);
le=strlen(pat);
get_next();
KMP();
if(mma>ma){
ma=mma;
strncpy(result,text[]+i,ma);
result[ma]='\0';
}
else if(mma==ma){
strncpy(te,text[]+i,ma);
result[ma]='\0';
if(strcmp(te,result)==-){
strcpy(result,te);
}
}
}
} void out()
{
if(ma<){
printf("no significant commonalities\n");
}
else{
printf("%s\n",result);
}
} int main()
{
//freopen("data.in","r",stdin);
//freopen("data.out","w",stdout);
scanf("%d",&T);
//for(int ccnt=1;ccnt<=T;ccnt++)
while(T--)
//scanf("%d%d",&n,&m);
//while(scanf("%s",s)!=EOF)
{
ini();
solve();
out();
}
return ;
}