/* text.c - Scan a binary file for text. Find text within a file. Text is regarded as starting with four letters (including space) one of which must be a vowel (including 'y'). Jason Hood. Public Domain. Started: 8 June, 1996. Finished: 9 June, 1996. 15 November, 1996: added space and 'y'. 21 & 22 March, 1998: reformatted; test vowels and consonants with a character map; test now involves 32 characters, or up to a space. There can not be five vowels or consonants in a row. 19 June, 1998: added filters, more than one file. 26 December, 1998: added "Unicode" -u option (even/odd bytes); xor filter is now -x option. 5 January, 1999: added offset -o option. 26 February, 1999: added 'e' and 'o' options to -u for even/odd only; added -s option to subtract byte from a number. 25 January, 2001: replaced -u with -j; -u really is Unicode, now (well, only LSB and ASCII); extended charmap[], removing ctype.h. 30 January, 2003: rewrote as C, reformatted; if redirected, add the newline when wrapping lines; use the current screen width (even if redirected); removed filter0. 15 October, 2003, v1.00: Win32 console support; replaced -o offset with -a add; limit to no more than four consecutive vowels, five consonants; have a consonant (or space) in the first four. */ #define PVERS "1.00" #define PDATE "15 October, 2003" #include #include #include #ifdef __DJGPP__ // The location of isatty() & screen width # include # include #else # include # ifdef _WIN32 # include # endif #endif #ifdef __DJGPP__ #include void __crt0_load_environment_file( char* dummy ) { return; } #endif #define Alpha 1 #define Print 2 #define Vowel 4 #define Conso 8 #define P Print #define V Alpha | Print | Vowel #define C Alpha | Print | Conso int charmap[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2 */ P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, /* 3 */ P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, /* 4 */ P, V, C, C, C, V, C, C, C, V, C, C, C, C, C, V, /* 5 */ C, C, C, C, C, V, C, C, C, V, C, P, P, P, P, P, /* 6 */ P, V, C, C, C, V, C, C, C, V, C, C, C, C, C, V, /* 7 */ C, C, C, C, C, V, C, C, C, V, C, P, P, P, P, 0, /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #undef C #undef V #undef P #define isalpha(c) (charmap[(unsigned char)(c)] & Alpha) #define isprint(c) (charmap[(unsigned char)(c)] & Print) #define isvowel(c) (charmap[(unsigned char)(c)] & Vowel) #define iscons(c) (charmap[(unsigned char)(c)] & Conso) // Scan code map. Starts at 0x02 and finishes at 0x35, inclusive. char scanmap[] = { '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '=', '\b', '\t', 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', 'O', 'P', '[', ']', '\n', 0, 'A', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', ';', '\'', '`', 0, '\\', 'Z', 'X', 'C', 'V', 'B', 'N', 'M', ',', '.', '/' }; typedef void (*filter_function)( char* ); int filter_num = 0; int xor = 0; int add = 0; int sub = 0; int unicode = 0; int jump = 0; filter_function filter; void filter1( char* c ) // 0..25 becomes A..Z { *c = (*c < 26) ? *c + 'A' : 0; } void filter2( char* c ) // 1..26 becomes A..Z { *c = (*c < 27 && *c != 0) ? *c - 1 + 'A' : 0; } void filter3( char* c ) // 2..53 as a scancode { *c = (*c < 0x02 || *c > 0x35) ? 0 : scanmap[*c - 0x02]; } void filter4( char* c ) // ROT13 { if (isalpha( *c )) { *c += (*c <= 'M') ? 13 : (*c <= 'Z') ? -13 : (*c <= 'm') ? 13 : // (*c <= 'z') ? -13 : 0; // In preparation for non-English chars -13; } } void filter5( char* c ) // Strip high bit { *c &= 0x7f; } filter_function filter_fn[] = { filter1, filter2, filter3, filter4, filter5 }; // Read a character from a file and transform it. // Returns 0 for EOF, 1 otherwise. int get( FILE* in, char* c ) { int ch; ch = getc( in ); if (ch == EOF) return 0; if (unicode && getc( in ) != 0) { *c = '\0'; return 1; } *c = ch; *c ^= xor; *c += add; if (sub) *c = sub - *c; if (filter_num) filter( c ); if (jump) { fgetc( in ); if (unicode) fgetc( in ); } return 1; } int main( int argc, char* argv[] ) { int only_even = 0, only_odd = 0; int done_odd; int size; FILE* file; char test[33]; int vowel, cons, let; int redir; int cols, col; int f, j, k; if (argc == 1 || strcmp( argv[1], "/?" ) == 0 || strcmp( argv[1], "-?" ) == 0 || strcmp( argv[1], "--help" ) == 0) { puts( "Text by Jason Hood .\n" "Version "PVERS" ("PDATE"). Public Domain.\n" "http://misc.adoxa.cjb.net/\n" "\n" "Try and find readable text in binary files. Readable text starts with\n" "four letters (including space), with at least one vowel (including 'y')\n" "and consonant (or space). A space or non-printable character must occur\n" "within 32 characters. If a non-printable character is found, there can\n" "not be more than four vowels or five consonants consecutively and half\n" "of the characters must be letters. Once text has been found, the file\n" "offset is shown and all printable characters are displayed.\n" "\n" "text [-] [-x[]] [-a] [-s] [-u] [-j[e|o]] file(s)\n" "\n" "where is\n" " 1 - 0..25 is A..Z -x xor byte with num (default is 255)\n" " 2 - 1..26 is A..Z -a add num to each byte\n" " 3 - Keyboard scan codes -s sub each byte from num\n" " 4 - ROT13 -u Unicode (little-endian and ASCII only)\n" " 5 - Strip eighth bit -j jump a byte (or word) [even/odd only]" ); return 0; } while (argv[1][0] == '-') { j = (int)strtol( argv[1] + 2, NULL, 0 ); switch (argv[1][1]) { case 'x': xor = (j == 0) ? 255 : j; break; case 'a': add = j; break; case 's': sub = j; break; case 'u': unicode = 1; break; case 'j': jump = 1; switch (argv[1][2]) { case 'e': only_even = 1; break; case 'o': only_odd = 1; break; } break; default: filter_num = atoi( argv[1] + 1 ); if (filter_num > 5 || filter_num <= 0) { printf( "Unknown option: %s\n", argv[1] ); return 1; } filter = filter_fn[filter_num - 1]; break; } ++argv; if (--argc == 1) return 1; } size = 1 + unicode; if (jump) size *= 2; redir = !isatty( 1 ); #if defined( _WIN32 ) { CONSOLE_SCREEN_BUFFER_INFO csbi; // Use stderr in case stdout is redirected (and hope stderr isn't). GetConsoleScreenBufferInfo( GetStdHandle( STD_ERROR_HANDLE ), &csbi ); cols = csbi.dwSize.X; } #elif defined( __DJGPP__ ) cols = ScreenCols(); #else // 16-bit DOS cols = *(unsigned char far*)0x44a; #endif for (f = 1; f < argc; ++f) { if (argc > 2) printf( "%s:", argv[f] ); file = fopen( argv[f], "rb" ); if (!file) { if (argc == 2) printf( "%s:", argv[f] ); puts( " unable to open." ); continue; } if (argc > 2) putchar( '\n' ); done_odd = (only_even || only_odd); if (only_odd) { fgetc( file ); if (unicode) fgetc( file ); } for (;;) { while (get( file, test ) && !isalpha( *test )) ; if (feof( file )) { if (jump && !done_odd) { done_odd = 1; fseek( file, 1 + unicode, SEEK_SET ); clearerr( file ); continue; } break; } vowel = isvowel( *test ); cons = iscons( *test ); for (j = 1; j < 4; ++j) { if (get( file, test + j ) && (isalpha( test[j] ) || test[j] == ' ')) { if (isvowel( test[j] )) vowel = 1; else cons = 1; } else break; } if (j != 4) continue; if (!vowel || !cons) { fseek( file, -3 * size, SEEK_CUR ); continue; } for (; j < 32; ++j) { if (!get( file, test + j )) { test[j] = '\0'; fseek( file, 1, SEEK_END ); // Offset adjustment break; } if (test[j] == ' ' || !isprint( test[j] )) break; } if (j == 32) { while (get( file, test ) && isprint( *test )) ; continue; } if (test[j] != ' ') { let = vowel = cons = 0; for (k = 0; k < j; ++k) { if (isalpha( test[k] )) { ++let; if (isvowel( test[k] )) { if (++vowel > 4) { let = 0; break; } cons = 0; } else { if (++cons > 5) { let = 0; break; } vowel = 0; } } else vowel = cons = 0; } if (let < j / 2) continue; } test[j + (test[j] == ' ')] = '\0'; printf( "%6lx: %s", ftell( file ) - (j + 1) * size, test ); if (isprint( test[j] )) { col = 9 + j; while (get( file, test ) && isprint( *test )) { putchar( *test ); if (++col == cols) { col = 8; if (redir) putchar( '\n' ); fputs( " ", stdout ); } } } putchar( '\n' ); } fclose( file ); if (argc > 2 && f < argc - 1) putchar( '\n' ); } return 0; }