/* * Listing 10 -- comp-2.c * * This module is the driver program for a variable order * finite context compression program. The maximum order is * determined by command line option. This particular version * also monitors compression ratios, and flushes the model whenever * the local (last 256 symbols) compression ratio hits 90% or higher. * * To build this program: * * Turbo C: tcc -w -mc comp-2.c model-2.c bitio.c coder.c * QuickC: qcl /AC /W3 comp-2.c model-2.c bitio.c coder.c * Zortech: ztc -mc comp-2.c model-2.c bitio.c coder.c * *NIX: cc -o comp-2 comp-2.c model-2.c bitio.c coder.c * * Command line options: * * -f text_file_name [defaults to test.inp] * -c compressed_file_name [defaults to test.cmp] * -o order [defaults to 3 for model-2] */ #include #include #include #include "coder.h" #include "model.h" #include "bitio.h" /* * The file pointers are used throughout this module. */ FILE *text_file; FILE *compressed_file; /* * Declarations for local procedures. */ void initialize_options( int argc, char **argv ); int check_compression( void ); void print_compression( void ); /* * The main procedure is similar to the main found in COMP-1.C. * It has to initialize the coder, the bit oriented I/O, the * standard I/O, and the model. It then sits in a loop reading * input symbols and encoding them. One difference is that every * 256 symbols a compression check is performed. If the compression * ratio exceeds 90%, a flush character is encoded. This flushes * the encoding model, and will cause the decoder to flush its model * when the file is being expanded. The second difference is that * each symbol is repeatedly encoded until a succesfull encoding * occurs. When trying to encode a character in a particular order, * the model may have to transmit an ESCAPE character. If this * is the case, the character has to be retransmitted using a lower * order. This process repeats until a succesful match is found of * the symbol in a particular context. Usually this means going down * no further than the order -1 model. However, the FLUSH and DONE * symbols do drop back to the order -2 model. Note also that by * all rights, add_character_to_model() and update_model() logically * should be combined into a single routine. */ void main( int argc, char **argv ) { SYMBOL s; int c; int escaped; int flush = 0; long int text_count = 0; initialize_options( --argc, ++argv ); initialize_model(); initialize_output_bitstream(); initialize_arithmetic_encoder(); for ( ; ; ) { if ( ( ++text_count & 0x0ff ) == 0 ) flush = check_compression(); if ( !flush ) c = getc( text_file ); else c = FLUSH; if ( c == EOF ) c = DONE; do { escaped = convert_int_to_symbol( c, &s ); encode_symbol( compressed_file, &s ); } while ( escaped ); if ( c == DONE ) break; if ( c == FLUSH ) { flush_model(); flush = 0; } update_model( c ); add_character_to_model( c ); } flush_arithmetic_encoder( compressed_file ); flush_output_bitstream( compressed_file ); print_compression(); fputc( '\n', stderr ); exit( 0 ); } /* * This routine checks for command line options, and opens the * input and output files. The only other command line option * besides the input and output file names is the order of the model, * which defaults to 3. */ void initialize_options( int argc, char **argv ) { char text_file_name[ 81 ]; char compressed_file_name[ 81 ]; strcpy( compressed_file_name, "test.cmp" ); strcpy( text_file_name, "test.inp" ); while ( argc-- > 0 ) { if ( strcmp( *argv, "-f" ) == 0 ) { argc--; strcpy( text_file_name, *++argv ); } else if ( strcmp( *argv, "-c" ) == 0 ) { argc--; strcpy( compressed_file_name, *++argv ); } else if ( strcmp( *argv, "-o" ) == 0 ) { argc--; max_order = atoi( *++argv ); } else { fprintf( stderr, "\nUsage: COMP-2 [-o order] " ); fprintf( stderr, "[-f text file] [-c compressed file]\n" ); exit( -1 ); } argc--; argv++; } text_file = fopen( text_file_name, "rb" ); compressed_file = fopen( "test.cmp", "wb" ); if ( text_file == NULL || compressed_file == NULL ) { printf( "Had trouble opening one of the files!\n" ); exit( -1 ); } setvbuf( text_file, NULL, _IOFBF, 4096 ); setbuf( stdout, NULL ); printf( "Compressing %s to %s, order %d.\n", text_file_name, compressed_file_name, max_order ); } /* * This routine is called to print the current compression ratio. * It prints out the number of input bytes, the number of output bytes, * and the bits per byte compression ratio. This is done both as a * pacifier and as a seat-of-the-pants diagnostice. A better version * of this routine would also print the local compression ratio. */ void print_compression() { long total_input_bytes; long total_output_bytes; total_input_bytes = ftell( text_file ); total_output_bytes = bit_ftell_output( compressed_file ); if ( total_output_bytes == 0 ) total_output_bytes = 1; fprintf( stderr,"%ld/%ld, %2.3f\r", total_input_bytes, total_output_bytes, 8.0 * total_output_bytes / total_input_bytes ); } /* * This routine is called once every 256 input symbols. Its job is to * check to see if the compression ratio hits or exceeds 90%. If the * output size is 90% of the input size, it means not much compression * is taking place, so we probably ought to flush the statistics in the * model to allow for more current statistics to have greater impactic. * This heuristic approach does seem to have some effect. */ int check_compression() { static long local_input_marker = 0L; static long local_output_marker = 0L; long total_input_bytes; long total_output_bytes; int local_ratio; print_compression(); total_input_bytes = ftell( text_file ) - local_input_marker; total_output_bytes = bit_ftell_output( compressed_file ); total_output_bytes -= local_output_marker; if ( total_output_bytes == 0 ) total_output_bytes = 1; local_ratio = (int)( ( total_output_bytes * 100 ) / total_input_bytes ); local_input_marker = ftell( text_file ); local_output_marker = bit_ftell_output( compressed_file ); if ( local_ratio > 90 && flushing_enabled ) { fprintf( stderr, "Flushing... \r" ); return( 1 ); } return( 0 ); }