Here are two simple grep implementations in C, one for the POSIX API, and one using PCRE.
POSIX <regex.h> C program -- regrep.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include <regex.h>
char enter_reverse_mode[] = "\33[7m";
char exit_reverse_mode[] = "\33[0m";
int main(int argc, char **argv)
{
const char *pattern;
int ec;
regex_t expr;
regmatch_t rm;
char buf[12];
size_t offset, length;
int flags;
assert(argc == 2);
pattern = argv[1];
if ((ec = regcomp(&expr, pattern, 0)) != 0) {
char str[256];
regerror(ec, &expr, str, sizeof str);
fprintf(stderr, "%s: %s\n", pattern, str);
return EXIT_FAILURE;
}
flags = 0;
while (fgets(buf, sizeof buf, stdin)) {
/* Find the end of the buffer */
length = strcspn(buf, "\n");
/* Check for beginning and end of line. */
if (flags & REG_NOTEOL) {
/* If the last line read was a partial line, then we are
* not at the beginning of the line. */
flags |= REG_NOTBOL;
if (buf[length] == '\n')
flags &= ~REG_NOTEOL;
}
else if (buf[length] != '\n') {
/* We've read a partial line. */
flags = REG_NOTEOL;
}
else {
/* We have a complete line. */
flags = 0;
}
/* get rid of any newline character */
buf[length] = '\0';
/* start at beginning of the buffer */
offset = 0;
while (regexec(&expr, buf + offset, 1, &rm, flags) == 0) {
assert(rm.rm_so >= 0);
/* we're not smart enough to support empty matches. */
assert(rm.rm_eo > rm.rm_so);
/* print the portion which precedes the match, then the match */
printf("%.*s%s%.*s%s",
rm.rm_so, buf + offset,
enter_reverse_mode,
rm.rm_eo - rm.rm_so, buf + offset + rm.rm_so,
exit_reverse_mode);
/* start next match at the end of this one. */
offset += rm.rm_eo;
/* we're no longer at the beginning of the line */
flags |= REG_NOTBOL;
}
/* print remainder of the line */
printf("%s", buf + offset);
/* print a newline if we're at the end of a line */
if (!(flags & REG_NOTEOL))
putchar('\n');
}
return EXIT_SUCCESS;
}
PCRE <pcre.h> C program -- pcregrep.c
/*
* On FreeBSD:
* # cd /usr/ports/devel/pcre ; make install clean
* # pkg_info | grep -i pcre
*
* compile: gcc -o test -I/usr/local/include test.c -L/usr/local/lib -lpcre
* run: ./test 'cat|dog'
* this is a dog.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <pcre.h>
char enter_reverse_mode[] = "\33[7m";
char exit_reverse_mode[] = "\33[0m";
int main(int argc, char **argv)
{
const char *pattern;
const char *errstr;
int erroffset;
pcre *expr;
char line[512];
assert(argc == 2); /* XXX fixme */
pattern = argv[1];
if (!(expr = pcre_compile(pattern, 0, &errstr, &erroffset, 0))) {
fprintf(stderr, "%s: %s\n", pattern, errstr);
return EXIT_FAILURE;
}
while (fgets(line, sizeof line, stdin)) {
size_t len = strcspn(line, "\n");
int matches[2];
int offset = 0;
int flags = 0;
line[len] = '\0';
while (0 < pcre_exec(expr, 0, line, len, offset, flags, matches, 2)) {
printf("%.*s%s%.*s%s",
matches[0] - offset, line + offset,
enter_reverse_mode,
matches[1] - matches[0], line + matches[0],
exit_reverse_mode);
offset = matches[1];
flags |= PCRE_NOTBOL;
}
printf("%s\n", line + offset);
}
return EXIT_SUCCESS;
}
No comments:
Post a Comment