/* * Listing 2 -- coder.c * * This file contains the code needed to accomplish arithmetic * coding of a symbol. All the routines in this module need * to know in order to accomplish coding is what the probabilities * and scales of the symbol counts are. This information is * generally passed in a SYMBOL structure. * * This code was first published by Ian H. Witten, Radford M. Neal, * and John G. Cleary in "Communications of the ACM" in June 1987, * and has been modified slightly for this article. The code * is published here with permission. */ #include #include "coder.h" #include "bitio.h" /* * These four variables define the current state of the arithmetic * coder/decoder. They are assumed to be 16 bits long. Note that * by declaring them as short ints, they will actually be 16 bits * on most 80X86 and 680X0 machines, as well as VAXen. */ static unsigned short int code; /* The present input code value */ static unsigned short int low; /* Start of the current code range */ static unsigned short int high; /* End of the current code range */ long underflow_bits; /* Number of underflow bits pending */ /* * This routine must be called to initialize the encoding process. * The high register is initialized to all 1s, and it is assumed that * it has an infinite string of 1s to be shifted into the lower bit * positions when needed. */ void initialize_arithmetic_encoder() { low = 0; high = 0xffff; underflow_bits = 0; } /* * This routine is called to encode a symbol. The symbol is passed * in the SYMBOL structure as a low count, a high count, and a range, * instead of the more conventional probability ranges. The encoding * process takes two steps. First, the values of high and low are * updated to take into account the range restriction created by the * new symbol. Then, as many bits as possible are shifted out to * the output stream. Finally, high and low are stable again and * the routine returns. */ void encode_symbol( FILE *stream, SYMBOL *s ) { long range; /* * These three lines rescale high and low for the new symbol. */ range = (long) ( high-low ) + 1; high = low + (unsigned short int ) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int ) (( range * s->low_count ) / s->scale ); /* * This loop turns out new bits until high and low are far enough * apart to have stabilized. */ for ( ; ; ) { /* * If this test passes, it means that the MSDigits match, and can * be sent to the output stream. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { output_bit( stream, high & 0x8000 ); while ( underflow_bits > 0 ) { output_bit( stream, ~high & 0x8000 ); underflow_bits--; } } /* * If this test passes, the numbers are in danger of underflow, because * the MSDigits don't match, and the 2nd digits are just one apart. */ else if ( ( low & 0x4000 ) && !( high & 0x4000 )) { underflow_bits += 1; low &= 0x3fff; high |= 0x4000; } else return ; low <<= 1; high <<= 1; high |= 1; } } /* * At the end of the encoding process, there are still significant * bits left in the high and low registers. We output two bits, * plus as many underflow bits as are necessary. */ void flush_arithmetic_encoder( FILE *stream ) { output_bit( stream, low & 0x4000 ); underflow_bits++; while ( underflow_bits-- > 0 ) output_bit( stream, ~low & 0x4000 ); } /* * When decoding, this routine is called to figure out which symbol * is presently waiting to be decoded. This routine expects to get * the current model scale in the s->scale parameter, and it returns * a count that corresponds to the present floating point code: * * code = count / s->scale */ short int get_current_count( SYMBOL *s ) { long range; short int count; range = (long) ( high - low ) + 1; count = (short int) ((((long) ( code - low ) + 1 ) * s->scale-1 ) / range ); return( count ); } /* * This routine is called to initialize the state of the arithmetic * decoder. This involves initializing the high and low registers * to their conventional starting values, plus reading the first * 16 bits from the input stream into the code value. */ void initialize_arithmetic_decoder( FILE *stream ) { int i; code = 0; for ( i = 0 ; i < 16 ; i++ ) { code <<= 1; code += input_bit( stream ); } low = 0; high = 0xffff; } /* * Just figuring out what the present symbol is doesn't remove * it from the input bit stream. After the character has been * decoded, this routine has to be called to remove it from the * input stream. */ void remove_symbol_from_stream( FILE *stream, SYMBOL *s ) { long range; /* * First, the range is expanded to account for the symbol removal. */ range = (long)( high - low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale ); /* * Next, any possible bits are shipped out. */ for ( ; ; ) { /* * If the MSDigits match, the bits will be shifted out. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { } /* * Else, if underflow is threatining, shift out the 2nd MSDigit. */ else if ((low & 0x4000) == 0x4000 && (high & 0x4000) == 0 ) { code ^= 0x4000; low &= 0x3fff; high |= 0x4000; } /* * Otherwise, nothing can be shifted out, so I return. */ else return; low <<= 1; high <<= 1; high |= 1; code <<= 1; code += input_bit( stream ); } }