判断给定字符串是否匹配规则表达式
#include <stdio.h>
#include <regex.h>
void test_match(const char *pattern, const char *input, int maxmatches) {
int i = 0;
int regrc = 0;
regex_t regex;
printf("pattern=%s\n", pattern);
printf("input =%s\n", input);
regrc = regcomp(®ex, pattern, REG_EXTENDED);
if (regrc != 0) {
printf("cannot regcomp pattern, return %d\n", regrc);
return;
}
regmatch_t matches[maxmatches];
regrc = regexec(®ex, input, maxmatches, matches, 0);
if (regrc == 0) {
int i;
printf("regex match found\n");
for (i = 0; i < maxmatches; i++) {
if (matches[i].rm_so != -1) {
printf(" matches[%d]=%.*s\n", i, matches[i].rm_eo - matches[i].rm_so, &input[matches[i].rm_so]);
}
}
} else if (regrc == REG_NOMATCH) {
printf("regex ok, but REG_NOMATCH\n");
} else {
char err[128] = {'\0'};
printf("regex failed, return %d\n", regrc);
regerror(regrc, ®ex, err, sizeof(err));
}
regfree(®ex);
return;
}
int main(int argc, char * argv[]) {
const char * input = "CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))";
const char * pattern = "^CAST\\(.*\\)";
int maxmatches = 1;
test_match(pattern, input, maxmatches);
}
这个例子判断给定字符串是否符合一个CAST函数调用。
运行结果:
pattern=^CAST\(.* AS ([A-Z_]+).*\)
input =CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))
regex match found
matches[0]=CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))
下面介绍第三个参数maxmatches的使用,她主要用在子匹配上。以上述为例,如果需要取出AS后面的类型值,那么我们就可以用:
const char * input = "CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))";
const char * pattern = "^CAST\\(.* AS ([A-Z_]+).*\\)";
int maxmatches = 2;
运行结果:
pattern=^CAST\(.* AS ([A-Z_]+).*\)
input =CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))
regex match found
matches[0]=CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))
matches[1]=CHAR
需要注意的点是:
- matches[0]是最长的匹配串。
- matches[1..n]是子匹配串,根据需求可以多个。
- 子匹配串在表达式中使用括号标识。
- 如果需要匹配括号符合(即不作为子匹配,只是当作普通字符),那么需要转义(\)。
- 子匹配串可以嵌套使用,例如:
const char * input = "CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))";
const char * pattern = "^CAST(.* AS ([A-Z_]+).*)";
int maxmatches = 3;
test_match(pattern, input, maxmatches);
得到的输出结果就是:
pattern=^CAST(.* AS ([A-Z_]+).*)
input =CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))
regex match found
matches[0]=CAST(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))
matches[1]=(TO_CHAR(TBL.FLD, '0000000000D') AS CHAR(12))
matches[2]=CHAR
matches[0]: 是整个匹配的串
matches[1]: 是第一个子匹配的串,即函数CAST的参数内容
matches[1]: 是第二个子匹配的串,即嵌套表达式([A-Z_]+)
的匹配子串。