diff -ur pcre2-10.23/doc/pcre2grep.1 pcre2-10-23/doc/pcre2grep.1 --- pcre2-10.23/doc/pcre2grep.1 2017-01-17 03:38:42 +1000 +++ pcre2-10-23/doc/pcre2grep.1 2017-02-17 17:05:36 +1000 @@ -1,4 +1,4 @@ -.TH PCRE2GREP 1 "31 December 2016" "PCRE2 10.23" +.TH PCRE2GREP 1 "17 February, 2017" "PCRE2 10.23" .SH NAME pcre2grep - a grep with Perl-compatible regular expressions. .SH SYNOPSIS @@ -525,6 +525,10 @@ use of JIT at run time. It is provided for testing and working round problems. It should never be needed in normal use. .TP +\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP +Show only text (see next option), which may contain escape sequences (see +that section below). +.TP \fB-o\fP, \fB--only-matching\fP Show only the part of the line that matched a pattern instead of the whole line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and @@ -534,7 +538,7 @@ lines), no output is generated, but the return code is set appropriately. If the matched portion of the line is empty, nothing is output unless the file name or line number are being printed, in which case they are shown on an -otherwise empty line. This option is mutually exclusive with +otherwise empty line. This option is mutually exclusive with \fB--output\fP, \fB--file-offsets\fP and \fB--line-offsets\fP. .TP \fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP @@ -694,6 +698,28 @@ character. Otherwise \fBpcre2grep\fP will assume that it has no data. . . +.SH "ESCAPE SEQUENCES" +.rs +.sp +The text used for \fB--output\fP may contain escape sequences started by a +dollar character: $ or ${} is replaced by the captured +substring of the given decimal number; zero will substitute the whole match. If +the number is greater than the number of capturing substrings, or if the +capture is unset, the replacement is empty. +.P +$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by +newline; $r by carriage return; $t by tab; $v by vertical tab. +.P +$o is replaced by the character represented by the given octal +number; up to three digits are processed. +.P +$x is replaced by the character represented by the given hexadecimal +number; up to two digits are processed. +.P +Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar. +. +. .SH "CALLING EXTERNAL SCRIPTS" .rs .sp @@ -744,6 +770,10 @@ callout to be ignored. If running the program fails for any reason (including the non-existence of the executable), a local matching failure occurs and the matcher backtracks in the normal way. +.P +Alternatively, if the string starts with pipe, the remainder is used as a +string for \fB--output\fP. In this case, \fB--om-separator\fP is used to +separate each callout, defaulting to newline. . . .SH "MATCHING ERRORS" @@ -793,6 +823,6 @@ .rs .sp .nf -Last updated: 31 December 2016 +Last updated: 17 February, 2017 Copyright (c) 1997-2016 University of Cambridge. .fi diff -ur pcre2-10.23/RunGrepTest pcre2-10-23/RunGrepTest --- pcre2-10.23/RunGrepTest 2017-02-12 23:52:33 +1000 +++ pcre2-10-23/RunGrepTest 2017-02-17 18:15:33 +1000 @@ -598,6 +598,10 @@ $valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep echo "RC=$?" >>testtrygrep +echo "---------------------------- Test 120 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + # Now compare the results. $cf $srcdir/testdata/grepoutput testtrygrep @@ -667,6 +671,10 @@ echo "Testing pcre2grep script callouts" $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1")' $srcdir/testdata/grepinputv >>testtrygrep + $valgrind $vjs $pcre2grep --om-separator / '(T)(?C"|$1")' $srcdir/testdata/grepinputv >>testtrygrep + # The above has no newline, which 'diff -ub' ignores, so add one. + echo / >>testtrygrep $cf $srcdir/testdata/grepoutputC testtrygrep if [ $? != 0 ] ; then exit 1; fi else diff -ur pcre2-10.23/RunGrepTest.bat pcre2-10-23/RunGrepTest.bat --- pcre2-10.23/RunGrepTest.bat 2017-02-14 20:23:29 +1000 +++ pcre2-10-23/RunGrepTest.bat 2017-02-17 18:15:36 +1000 @@ -585,6 +585,10 @@ %pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep echo RC=^%ERRORLEVEL%>>testtrygrep +echo ---------------------------- Test 120 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + :: Now compare the results. %cf% %srcdir%\testdata\grepoutput testtrygrep %cfout% @@ -654,6 +658,10 @@ echo Testing pcre2grep script callouts %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep + %pcre2grep% "(T)(?C'|$0:$1')" %srcdir%/testdata/grepinputv >>testtrygrep + %pcre2grep% --om-separator / "(T)(?C'|$1')" %srcdir%/testdata/grepinputv >>testtrygrep + :: The above has no newline, which 'diff -ub' ignores, so add one. + echo />>testtrygrep %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% if ERRORLEVEL 1 exit /b 1 ) else ( diff -ur pcre2-10.23/src/pcre2grep.c pcre2-10-23/src/pcre2grep.c --- pcre2-10.23/src/pcre2grep.c 2017-02-12 23:29:54 +1000 +++ pcre2-10-23/src/pcre2grep.c 2017-02-17 16:04:37 +1000 @@ -175,8 +175,9 @@ static const char *DEE_option = NULL; static const char *locale = NULL; static const char *newline_arg = NULL; -static const char *om_separator = ""; +static const char *om_separator = NULL; static const char *stdin_name = "(standard input)"; +static const char *output = NULL; static char *main_buffer = NULL; @@ -405,6 +406,7 @@ #else { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, #endif + { OP_STRING, 'O', &output, "output=text", "show only text" }, { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, @@ -1531,7 +1533,7 @@ case '\v': /* VT */ case '\f': /* FF */ case '\r': /* CR */ -#ifndef EBCDIE +#ifndef EBCDIC case 0x85: /* Unicode NEL */ case 0x2028: /* Unicode LS */ case 0x2029: /* Unicode PS */ @@ -1551,8 +1553,6 @@ - - /************************************************* * Print the previous "after" lines * *************************************************/ @@ -1654,6 +1654,277 @@ } +/************************************************* +* Check output text for errors * +*************************************************/ + +static BOOL +syntax_check_output(const char *string, BOOL callout) +{ +const char *begin = string; +for (; *string != 0; string++) + { + if (*string == '$') + { + PCRE2_SIZE capture_id = 0; + BOOL brace = FALSE; + + string++; + + /* Syntax error: a character must be present after $. */ + if (*string == 0) + { + if (!callout) + fprintf(stderr, "pcre2grep: Error in output at offset %d: %s\n", + (int)(string - begin), "no character after $"); + return FALSE; + } + + if (*string == '{') + { + /* Must be a decimal number in braces, e.g: {5} or {38} */ + string++; + + brace = TRUE; + } + + if ((*string >= '1' && *string <= '9') || (!callout && *string == '0')) + { + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + } + while (*string >= '0' && *string <= '9'); + + if (brace) + { + /* Syntax error: closing brace is missing. */ + if (*string != '}') + { + if (!callout) + fprintf(stderr, "pcre2grep: Error in output at offset %d: %s\n", + (int)(string - begin), "missing closing brace"); + return FALSE; + } + } + else + { + /* To negate the effect of the for. */ + string--; + } + } + else if (brace) + { + /* Syntax error: a decimal number required. */ + if (!callout) + fprintf(stderr, "pcre2grep: Error in output at offset %d: %s\n", + (int)(string - begin), "decimal number expected"); + return FALSE; + } + else if (*string == 'o') + { + string++; + + if (*string < '0' || *string > '7') + { + /* Syntax error: an octal number required. */ + if (!callout) + fprintf(stderr, "pcre2grep: Error in output at offset %d: %s\n", + (int)(string - begin), "octal number expected"); + return FALSE; + } + } + else if (*string == 'x') + { + string++; + + if (!isxdigit((unsigned char)*string)) + { + /* Syntax error: a hexdecimal number required. */ + if (!callout) + fprintf(stderr, "pcre2grep: Error in output at offset %d: %s\n", + (int)(string - begin), "hexadecimal number expected"); + return FALSE; + } + } + } + } + + return TRUE; +} + + +/************************************************* +* Display output text * +*************************************************/ + +/* Display the output, which is assumed to have already been syntax checked. +Output may contain escape sequences started by the dollar sign. The escape +sequences are substituted as follows: + + $ or ${} is replaced by the captured substring of the given + decimal number; zero will substitute the whole match. If the number is + greater than the number of capturing substrings, or if the capture is unset, + the replacement is empty. + + $a is replaced by bell. + $b is replaced by backspace. + $e is replaced by escape. + $f is replaced by form feed. + $n is replaced by newline. + $r is replaced by carriage return. + $t is replaced by tab. + $v is replaced by vertical tab. + + $o is replaced by the character represented by the given octal + number; up to three digits are processed. + + $x is replaced by the character represented by the given hexadecimal + number; up to two digits are processed. + + Any other character is substituted by itself. E.g: $$ is replaced by a single + dollar. + +Arguments: + string: the output text + callout: TRUE for the builtin callout, FALSE for --output + subject the start of the subject + ovector: capture offsets + capture_top: number of captures + +Returns: TRUE if something was output, other than newline + FALSE if nothing was output, or newline was last output +*/ + +static BOOL +display_output(const char *string, BOOL callout, PCRE2_SPTR subject, + PCRE2_SIZE *ovector, PCRE2_SIZE capture_top) +{ +BOOL printed = FALSE; + +for (; *string != 0; string++) + { + int ch = EOF; + if (*string == '$') + { + PCRE2_SIZE capture_id = 0; + BOOL brace = FALSE; + + string++; + + if (*string == '{') + { + /* Must be a decimal number in braces, e.g: {5} or {38} */ + string++; + + brace = TRUE; + } + + if ((*string >= '1' && *string <= '9') || (!callout && *string == '0')) + { + do + { + /* Maximum capture id is 65535. */ + if (capture_id <= 65535) + capture_id = capture_id * 10 + (*string - '0'); + + string++; + } + while (*string >= '0' && *string <= '9'); + + if (!brace) + { + /* To negate the effect of the for. */ + string--; + } + + if (capture_id < capture_top) + { + PCRE2_SIZE capturesize; + capture_id *= 2; + + capturesize = ovector[capture_id + 1] - ovector[capture_id]; + if (capturesize > 0) + { + print_match(subject + ovector[capture_id], capturesize); + printed = TRUE; + } + } + } + else if (*string == 'a') ch = '\a'; + else if (*string == 'b') ch = '\b'; +#ifndef EBCDIC + else if (*string == 'e') ch = '\033'; +#else + else if (*string == 'e') ch = '\047'; +#endif + else if (*string == 'f') ch = '\f'; + else if (*string == 'r') ch = '\r'; + else if (*string == 't') ch = '\t'; + else if (*string == 'v') ch = '\v'; + else if (*string == 'n') + { + fprintf(stdout, STDOUT_NL); + printed = FALSE; + } + else if (*string == 'o') + { + string++; + + ch = *string - '0'; + if (string[1] >= '0' && string[1] <= '7') + { + string++; + ch = ch * 8 + (*string - '0'); + } + if (string[1] >= '0' && string[1] <= '7') + { + string++; + ch = ch * 8 + (*string - '0'); + } + } + else if (*string == 'x') + { + string++; + + if (*string >= '0' && *string <= '9') + ch = *string - '0'; + else + ch = (*string | 0x20) - 'a' + 10; + if (isxdigit((unsigned char)string[1])) + { + string++; + ch *= 16; + if (*string >= '0' && *string <= '9') + ch += *string - '0'; + else + ch += (*string | 0x20) - 'a' + 10; + } + } + else + { + ch = *string; + } + } + else + { + ch = *string; + } + if (ch != EOF) + { + fprintf(stdout, "%c", ch); + printed = TRUE; + } + } + +return printed; +} + + #ifdef SUPPORT_PCRE2GREP_CALLOUT /************************************************* @@ -1667,7 +1938,7 @@ program_name|param1|param2|... -Any substirng (including the program name) can contain escape sequences +Any substring (including the program name) can contain escape sequences started by the dollar character. The escape sequences are substituted as follows: @@ -1679,6 +1950,10 @@ Any other character is substituted by itself. E.g: $$ is replaced by a single dollar or $| replaced by a pipe character. +Alternatively, if string starts with pipe, the remainder is taken as an output +string, same as --output. In this case, --om-separator is used to separate each +callout, defaulting to newline. + Example: echo -e "abcde\n12345" | pcre2grep \ @@ -1721,6 +1996,29 @@ /* Only callout with strings are supported. */ if (string == NULL || length == 0) return 0; +/* If there's no command, output the remainder directly. */ + +if (*string == '|') + { + static BOOL printed = FALSE; + + string++; + + if (!syntax_check_output(string, TRUE)) return 0; + + if (printed) + { + if (om_separator == NULL) om_separator = STDOUT_NL; + fprintf(stdout, "%s", om_separator); + } + printed = TRUE; + show_only_matching = TRUE; + + display_output(string, TRUE, subject, ovector, capture_top); + + return 1; + } + /* Checking syntax and compute the number of string fragments. Callout strings are ignored in case of a syntax error. */ @@ -2253,6 +2551,15 @@ (int)(filepos + matchptr + offsets[0] - ptr), (int)(offsets[1] - offsets[0])); + /* Handle --output (which has already been syntax checked) */ + + else if (output != NULL) + { + if (display_output(output, FALSE, matchptr, offsets, mrc) || + printname != NULL || number) + fprintf(stdout, STDOUT_NL); + } + /* Handle --only-matching, which may occur many times */ else @@ -2268,7 +2575,8 @@ int plen = offsets[2*n + 1] - offsets[2*n]; if (plen > 0) { - if (printed) fprintf(stdout, "%s", om_separator); + if (printed && om_separator != NULL) + fprintf(stdout, "%s", om_separator); print_match(matchptr + offsets[n*2], plen); printed = TRUE; } @@ -2945,7 +3253,6 @@ - /************************************************* * Construct printed ordinal * *************************************************/ @@ -3515,24 +3822,34 @@ if (before_context == 0) before_context = both_context; } -/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. -However, all three set show_only_matching because they display, each in their -own way, only the data that has matched. */ +/* Only one of --only-matching, --output, --file-offsets, or --line-offsets is +permitted. However, all four set show_only_matching because they display, each +in their own way, only the data that has matched. */ -if ((only_matching != NULL && (file_offsets || line_offsets)) || - (file_offsets && line_offsets)) +{ +int only_opts = 0; +if (only_matching != NULL) only_opts++; +if (output != NULL) only_opts++; +if (file_offsets) only_opts++; +if (line_offsets) only_opts++; +if (only_opts > 1) { - fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets " - "and/or --line-offsets\n"); + fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, " + "--file-offsets and/or --line-offsets\n"); pcre2grep_exit(usage(2)); } +} + +/* Check --output for errors. */ + +if (output != NULL && !syntax_check_output(output, FALSE)) goto EXIT2; /* Put limits into the match data block. */ if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit); -if (only_matching != NULL || file_offsets || line_offsets) +if (only_matching != NULL || output != NULL || file_offsets || line_offsets) show_only_matching = TRUE; /* If a locale has not been provided as an option, see if the LC_CTYPE or @@ -3824,6 +4141,13 @@ else if (frc == 0 && rc == 1) rc = 0; } +#ifdef SUPPORT_PCRE2GREP_CALLOUT +/* If separating builtin echo callouts by implicit newline, add one more for +the final item. */ + +if (om_separator == STDOUT_NL) fprintf(stdout, STDOUT_NL); +#endif + /* Show the total number of matches if requested, but not if only one file's count was printed. */ diff -ur pcre2-10.23/testdata/grepoutput pcre2-10-23/testdata/grepoutput --- pcre2-10.23/testdata/grepoutput 2017-02-11 03:36:15 +1000 +++ pcre2-10-23/testdata/grepoutput 2017-02-17 17:55:15 +1000 @@ -829,3 +829,8 @@ xyz --- RC=0 +---------------------------- Test 120 ------------------------------ +./testdata/grepinput:the binary zero.:zerothe. +./testdata/grepinput:a binary zero:zeroa +./testdata/grepinput:the binary zero.:zerothe. +RC=0 diff -ur pcre2-10.23/testdata/grepoutputC pcre2-10-23/testdata/grepoutputC --- pcre2-10.23/testdata/grepoutputC 2016-04-01 21:47:38 +1000 +++ pcre2-10-23/testdata/grepoutputC 2017-02-17 18:07:07 +1000 @@ -6,3 +6,6 @@ Arg1: [ t] [ t] The quick brown This time it jumps and jumps and jumps. +0:T +0:T +T/T/