C++ 11 开始支持正则表达式了,我用的编译器是 TDM-GCC 4.9.2 64-bit,要在菜单命令:“工具-->编译选项->编译器”中设置配置,编译时加入以下命令文本框里添加 “-std=c++11”,如下图所示:
刚接触正则表达式,初步学了点皮毛记录一下。直接上代码:
#include <iostream>
#include <string>
#include <vector>
#include <regex>
using namespace std;
int regexSplit(string&,const string,vector<string>&,int);
int main(void)
{
vector <string> vect;
string str = "(12.3e+10-0.018e-5)+(11.006-7.)+.89";
string reg[11]={
"(\\d+)", /*整数,包括0开头的 */
"([1-9]\\d*)", /*错:整数,但取不到0 */
"(0|[1-9]\\d*)", /*全部整数 */
"(\\d+\\.\\d+)", /*小数不包括整数 */
"(\\d*\\.?\\d+)", /*错:整数或小数,但包括7. */
"(\\d+\\.?\\d*)", /*错:整数或小数,但包括.89 */
"(\\d+|\\d+\\.\\d+)", /*整数或小数 */
"-?(\\d+|\\d+\\.\\d+)", /*正数或负数 */
"\\([^()]*\\)", /*匹配成对的括号 */
"-?(\\d+\\.\\d+)e[+-]\\d+", /*科学记数法 */
"-?((\\d+|\\d+\\.\\d+)|(\\d+\\.\\d+)e[+-]\\d+)" /*实数 */
};
cout<<str<<endl<<"--------------"<<endl;
for (auto a:reg){
regexSplit(str,a,vect,0);
cout<<"pattern:"<<a<<endl<<"string: ";
for(auto v:vect) cout<<v<<" ";
vect.clear();
cout<<endl<<"=============="<<endl;
}
return 0;
}
int regexSplit(string &str,const string str_reg,vector<string>&vect,int pos)
{
int i=0;
if (pos!=-1) pos=0; //pos=0 匹配到的位置,pos=-1匹配位置的前一字串
regex Pattern(str_reg);
sregex_token_iterator it(str.begin(),str.end(),Pattern, pos);
sregex_token_iterator end;
for(;it!=end;++it,i++) vect.push_back(*it);
return i; //if (i==0) 没有匹配到,else 匹配到的个数 i
}
输出结果:
(12.3e+10-0.018e-5)+(11.006-7.)+.89
--------------
pattern:(\d+)
string: 12 3 10 0 018 5 11 006 7 89
==============
pattern:([1-9]\d*)
string: 12 3 10 18 5 11 6 7 89
==============
pattern:(0|[1-9]\d*)
string: 12 3 10 0 0 18 5 11 0 0 6 7 89
==============
pattern:(\d+\.\d+)
string: 12.3 0.018 11.006
==============
pattern:(\d*\.?\d+)
string: 12.3 10 0.018 5 11.006 7 .89
==============
pattern:(\d+\.?\d*)
string: 12.3 10 0.018 5 11.006 7. 89
==============
pattern:(\d+|\d+\.\d+)
string: 12.3 10 0.018 5 11.006 7 89
==============
pattern:-?(\d+|\d+\.\d+)
string: 12.3 10 -0.018 -5 11.006 -7 89
==============
pattern:\([^()]*\)
string: (12.3e+10-0.018e-5) (11.006-7.)
==============
pattern:-?(\d+\.\d+)e[+-]\d+
string: 12.3e+10 -0.018e-5
==============
pattern:-?((\d+|\d+\.\d+)|(\d+\.\d+)e[+-]\d+)
string: 12.3e+10 -0.018e-5 11.006 -7 89
==============
--------------------------------
Process exited after 0.5831 seconds with return value 0
请按任意键继续. . .
附录:
特殊字符:
characters | description | matches |
---|---|---|
. | not newline | any character exceptline terminators(LF, CR, LS, PS). |
\t | tab (HT) | a horizontal tab character (same as\u0009). |
\n | newline (LF) | a newline (line feed) character (same as\u000A). |
\v | vertical tab (VT) | a vertical tab character (same as\u000B). |
\f | form feed (FF) | a form feed character (same as\u000C). |
\r | carriage return (CR) | a carriage return character (same as\u000D). |
\cletter | control code | a control code character whosecode unit valueis the same as the remainder of dividing thecode unit valueofletterby 32. For example:\cais the same as\u0001,\cbthe same as\u0002, and so on... |
\xhh | ASCII character | a character whosecode unit valuehas an hex value equivalent to the two hex digitshh. For example:\x4cis the same asL, or\x23the same as#. |
\uhhhh | unicode character | a character whosecode unit valuehas an hex value equivalent to the four hex digitshhhh. |
\0 | null | a null character (same as\u0000). |
\int | backreference | the result of the submatch whose opening parenthesis is theint-th (intshall begin by a digit other than0). Seegroupsbelow for more info. |
\d | digit | a decimal digit character |
\D | not digit | any character that is not a decimal digit character |
\s | whitespace | a whitespace character |
\S | not whitespace | any character that is not a whitespace character |
\w | word | an alphanumeric or underscore character |
\W | not word | any character that is not an alphanumeric or underscore character |
\character | character | the charactercharacteras it is, without interpreting its special meaning within a regex expression. Anycharactercan be escaped except those which form any of the special character sequences above. Needed for:^ $ \ . * + ? ( ) [ ] { } | |
[class] | character class | the target character is part of the class |
[^class] | negated character class | the target character is not part of the class |
数量:
characters | times | effects |
---|---|---|
* | 0 or more | The preceding atom is matched 0 or more times. |
+ | 1 or more | The preceding atom is matched 1 or more times. |
? | 0 or 1 | The preceding atom is optional (matched either 0 times or once). |
{int} | int | The preceding atom is matched exactlyinttimes. |
{int,} | intor more | The preceding atom is matchedintor more times. |
{min,max} | betweenminandmax | The preceding atom is matched at leastmintimes, but not more thanmax. |
分组:
characters | description | effects |
---|---|---|
(subpattern) | Group | Creates a backreference. |
(?:subpattern) | Passive group | Does not create a backreference. |
其他:
characters | description | condition for match |
---|---|---|
^ | Beginning of line | Either it is the beginning of the target sequence, or follows aline terminator. |
$ | End of line | Either it is the end of the target sequence, or precedes aline terminator. |
| | Separator | Separates two alternative patterns or subpatterns.. |