C实现
截取html
设定是html字符串里只有<p>标签,且闭合
输入:"abc<p class=‘c1‘>defg</p>hijk<p style=‘width=100px;‘>lmn</p>"
截取3个字符
输出:abc
截取4个字符
输出abc<p class=‘c1‘>d</p>
1 #include<stdio.h> 2 #include<stdlib.h> 3 #include<string.h> 4 char * htmlSource="abc<p class=‘c1‘>defg</p>hijk<p style=‘width=100px;‘>lmn</p>"; 5 char * GetSubString(char* Source,int length) 6 { 7 int htmlLength=0; 8 for(int i=0;i<strlen(Source);i++) 9 { 10 if(Source[i]==‘<‘) 11 { 12 while(Source[i]!=‘>‘) 13 i++; 14 } 15 else 16 htmlLength++; 17 } 18 if(length>htmlLength) 19 { 20 printf("截取长度过长\n"); 21 return NULL; 22 } 23 else 24 { 25 char* target=(char *)malloc(sizeof(char)*(strlen(Source)+1)); 26 int sum1=0; 27 int sum2=0; 28 int flag=0; 29 for(int i=0;i<length;i++) 30 { 31 if(Source[sum2]==‘<‘) 32 { 33 if(Source[sum2+1]==‘p‘) 34 flag=1; 35 else if(Source[sum2+1]==‘/‘) 36 flag=0; 37 do 38 { 39 target[sum1++]=Source[sum2++]; 40 } 41 while(Source[sum2]!=‘>‘); 42 target[sum1++]=Source[sum2++]; 43 target[sum1++]=Source[sum2++]; 44 } 45 else 46 { 47 target[sum1++]=Source[sum2++]; 48 } 49 } 50 if(flag==1) 51 { 52 target[sum1++]=‘<‘; 53 target[sum1++]=‘/‘; 54 target[sum1++]=‘p‘; 55 target[sum1++]=‘>‘; 56 flag=0; 57 } 58 target[sum1]=‘\0‘; 59 return target; 60 } 61 } 62 void main() 63 { 64 int length=0; 65 while(length!=-1) 66 { 67 printf("输入截取长度\n"); 68 scanf("%d",&length); 69 char * target=GetSubString(htmlSource,length); 70 printf("%s\n",target); 71 free(target); 72 } 73 }