在Linux中,很多应用程序都需要对正则表达式的支持(如:grep,sed,awk等)。所以提供有POSIX式regex支持:
#include <sys/types.h>
#include <regex.h>
suitable for subsequent regexec() searches. */
int regcomp(regex_t *preg, const char *regex, int cflags);
如果是正确的正则式返回0
piled pattern buffer */
int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
成功匹配返回0,pmatch[0].rm_so,pmatch[0].rm_eo分别是第一个匹配串在string中的始终位置(-1表示没有匹配的)。
注意:这里pmatch[i](i=1,2,...)不是表示第二,三…个匹配,而是第一个匹配串中的子匹配。如果要找之后的匹配应该从第一个匹配的终位置开始在string中再次regexec()
void regfree(regex_t *preg);
用完正则表达式后,或者要使用新的正则表达式的时候,我们可以用这个函数清空preg指向的regex_t结构体的内容,请记住,如果是使用新的正则表达式,一定要先清空regex_t结构体。
size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
返回regcomp/regexec的错误信息,其中errcode是regcomp/regexec的返回,errbuf是最后得到的错误信息。
使用:
先用regcomp()初始化正则式,然后用regexec()查找匹配串,最后别忘了用regfree()清除正则式。
有错误的话用regerror()来获取错误信息。例:#include <stdio.h>
//EOF
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>
#define SUBSLEN 10
#define EBUFLEN 128 /* error buffer length */
#define BUFLEN 1024 /* matched buffer length */
int
main (int argc, char **argv)
{
FILE *fp;
size_t len=0; /* store error message length */
regex_t re; /* store compilned regular expression */
regmatch_t subs[SUBSLEN]; /* store matched string position */
char matched[BUFLEN]; /* store matched strings */
char errbuf[EBUFLEN]; /* store error message */
int err, i;
char string[] = "AAaba(125){3a}babAbAbCdCd123123 11(923){82}aslfk(72){4}";
char pattern[] = "(\\([0-9]+\\))(\\{[0-9]+\\}{1})";
/*注意: 对于C/C++的正则式,这里要注意的是‘\’字符,因为C中‘\’是转义符,正则式中‘\’也是转义符,所以要匹配的信息中有‘\’时,C的正则式中就要用“\\\\”(C字符串“\\\\”=>正则式“\\”=>字符“\”)
*/
printf ("String: %s\n", string);
printf ("Pattern: \"%s\"\n", pattern);
/* compile regular expression */
err = regcomp (&re, pattern, REG_EXTENDED);
if (err) {
len = regerror (err, &re, errbuf, sizeof(errbuf));
fprintf (stderr, "error: regcomp: %s\n", errbuf);
return (1);
}
printf ("Total has subexpression: %d\n", re.re_nsub);
int offset = 0;
while (1) {
/* execute pattern match */
err = regexec (&re, string+offset, (size_t)SUBSLEN, subs, 0);
if (err == REG_NOMATCH) {
fprintf (stderr, "Sorry, no match ...\n");
regfree (&re);
return (0);
} else if (err) {
len = regerror (err, &re, errbuf, sizeof (errbuf));
fprintf (stderr, "error: regexec: %s\n", errbuf);
return (1);
}
/* if no REG_NOMATCH and no error, then pattern matched */
printf ("\nOK, has matched ...\n\n");
for (i=0; i<=re.re_nsub; i++) {
if (i==0) {
printf ("begin: %d, end: %d, ", subs[i].rm_so, subs[i].rm_eo);
} else {
printf ("subexpression %d begin: %d, end: %d, ", i, subs[i].rm_so, subs[i].rm_eo);
}
len = (int)subs[i].rm_eo - (int)subs[i].rm_so;
memcpy (matched, string + offset + subs[i].rm_so, len);
matched[len] = '\0';
printf ("match: %s\n", matched);
}
offset += (int)subs[0].rm_eo;
} /* while (1) */
regfree (&re);
return 0;
}
2007-01-21
regex in Linux with C
/* is used to compile a regular expression into a form that is
/* used to match a null-terminated string against the precom-
/* free the memory allocated to the pattern buffer by the compiling process */
/* turn the error codes that can be returned by both regcomp() and regexec() */
Posted by Davy Hawk at 1/21/2007 02:40:00 PM
Subscribe to:
Post Comments (Atom)
0 comments:
Post a Comment