From 1dcb97faa086634603d25aac0fcd06eef475f7bb Mon Sep 17 00:00:00 2001 From: firdausnutrihadi Date: Sun, 6 Dec 2015 20:44:14 +0700 Subject: [PATCH 1/5] Revert "Add directory for improved LZW" This reverts commit 0a05163a936763b77e26143b48c733b61873c539. --- lzw15v/lzw15v.c | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 lzw15v/lzw15v.c diff --git a/lzw15v/lzw15v.c b/lzw15v/lzw15v.c deleted file mode 100644 index efc45b6..0000000 --- a/lzw15v/lzw15v.c +++ /dev/null @@ -1,18 +0,0 @@ -/************************** Start of LZW15V.C *************************** -* -* This is the LZW module which implements a more powerful version -* of the algorithm. This version of the program has three major -* improvements over LZW12.C. First, it expands the maximum code size -* to 15 bits. Second, it starts encoding with 9 bit codes, working -* its way up in bit size only as necessary. Finally, it flushes the -* dictionary when done. -* -* Note that under MS-DOS this program needs to be built using the -* Compact or Large memory model. -* -*/ -#include -#include -#include -#include "errhand.h" -#include "bitio.h" From a7a2a519675907cb3213cfd4daa269620806f3cf Mon Sep 17 00:00:00 2001 From: firdausnutrihadi Date: Sun, 6 Dec 2015 20:45:37 +0700 Subject: [PATCH 2/5] Revert "Revert "Add directory for improved LZW"" This reverts commit 1dcb97faa086634603d25aac0fcd06eef475f7bb. --- lzw15v/lzw15v.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 lzw15v/lzw15v.c diff --git a/lzw15v/lzw15v.c b/lzw15v/lzw15v.c new file mode 100644 index 0000000..efc45b6 --- /dev/null +++ b/lzw15v/lzw15v.c @@ -0,0 +1,18 @@ +/************************** Start of LZW15V.C *************************** +* +* This is the LZW module which implements a more powerful version +* of the algorithm. This version of the program has three major +* improvements over LZW12.C. First, it expands the maximum code size +* to 15 bits. Second, it starts encoding with 9 bit codes, working +* its way up in bit size only as necessary. Finally, it flushes the +* dictionary when done. +* +* Note that under MS-DOS this program needs to be built using the +* Compact or Large memory model. +* +*/ +#include +#include +#include +#include "errhand.h" +#include "bitio.h" From edbd9369a6863b01ca10c87baf379d3ffc6fd009 Mon Sep 17 00:00:00 2001 From: firdausnutrihadi Date: Mon, 7 Dec 2015 09:24:09 +0700 Subject: [PATCH 3/5] update lzw15v.c update, delete, dan edit pada code lzw15v.c --- lzw15v/lzw15v.c | 306 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 293 insertions(+), 13 deletions(-) diff --git a/lzw15v/lzw15v.c b/lzw15v/lzw15v.c index efc45b6..c5ba898 100644 --- a/lzw15v/lzw15v.c +++ b/lzw15v/lzw15v.c @@ -1,18 +1,298 @@ -/************************** Start of LZW15V.C *************************** -* -* This is the LZW module which implements a more powerful version -* of the algorithm. This version of the program has three major -* improvements over LZW12.C. First, it expands the maximum code size -* to 15 bits. Second, it starts encoding with 9 bit codes, working -* its way up in bit size only as necessary. Finally, it flushes the -* dictionary when done. -* -* Note that under MS-DOS this program needs to be built using the -* Compact or Large memory model. -* -*/ +/************************** Start of LZW15V.C ************************* + * + * This is the LZW module which implements a more powerful version + * of the algorithm. This version of the program has three major + * improvements over LZW12.C. First, it expands the maximum code size + * to 15 bits. Second, it starts encoding with 9 bit codes, working + * its way up in bit size only as necessary. Finally, it flushes the + * dictionary when done. + * + * Note that under MS-DOS this program needs to be built using the + * Compact or Large memory model. + * + */ #include #include #include #include "errhand.h" #include "bitio.h" + + +/* + * Constants used throughout the program. BITS defines the maximum + * number of bits that can be used in the output code. TABLE_SIZE defines + * the size of the dictionary table. TABLE_BANKS are the number of + * 256 element dictionary pages needed. The code defines should be + * self-explanatory. + */ + +#define BITS 15 +#define MAX_CODE ( ( 1 << BITS ) - 1 ) +#define TABLE_SIZE 35023L +#define TABLE_BANKS ( ( TABLE_SIZE >> 8 ) + 1 ) +#define END_OF_STREAM 256 +#define BUMP_CODE 257 +#define FLUSH_CODE 258 +#define FIRST_CODE 259 +#define UNUSED -1 + +/* + * Local prototypes. + */ + +#ifdef __STDC__ +unsigned int find_child_node( int parent_code, int child_character ); +unsigned int decode_string( unsigned int offset, unsigned int code ); +#else +unsigned int find_child_node(); +unsigned int decode_string(); +#endif + + +char *CompressionName = "LZW 15 Bit Variable Rate Encoder"; +char *Usage = "in-file out-file\n\n"; + +/* + * This data structure defines the dictionary. Each entry in the dictionary + * has a code value. This is the code emitted by the compressor. Each + * code is actually made up of two pieces: a parent_code, and a + * character. Code values of less than 256 are actually plain + * text codes. + * + * Note that in order to handle 16 bit segmented compilers, such as most + * of the MS-DOS compilers, it was necessary to break up the dictionary + * into a table of smaller dictionary pointers. Every reference to the + * dictionary was replaced by a macro that did a pointer dereference first. + * By breaking up the index along byte boundaries we should be as efficient + * as possible. + */ + +struct dictionary { + int code_value; + int parent_code; + char character; +} *dict[ TABLE_BANKS ]; + +#define DICT( i ) dict[ i >> 8 ][ i & 0xff ] + +/* + * Other global data structures. The decode_stack is used to reverse + * strings that come out of the tree during decoding. next_code is the + * next code to be added to the dictionary, both during compression and + * decompression. current_code_bits defines how many bits are currently + * being used for output, and next_bump_code defines the code that will + * trigger the next jump in word size. + */ + +char decode_stack[ TABLE_SIZE ]; +unsigned int next_code; +int current_code_bits; +unsigned int next_bump_code; + +/* + * This routine is used to initialize the dictionary, both when the + * compressor or decompressor first starts up, and also when a flush + * code comes in. Note that even thought the decompressor sets all + * the code_value elements to UNUSED, it doesn't really need to. + */ + +void InitializeDictionary() +{ + unsigned int i; + + for ( i = 0 ; i < TABLE_SIZE ; i++ ) + DICT( i ).code_value = UNUSED; + next_code = FIRST_CODE; + putc( 'F', stdout ); + current_code_bits = 9; + next_bump_code = 511; +} + +/* + * This routine allocates the dictionary. Since the total size of the + * dictionary is much larger than 64K, it can't be allocated as a single + * object. Instead, it is allocated as a set of pointers to smaller + * dictionary objects. The special DICT() macro is used to translate + * indices into pairs of references. + */ + +void InitializeStorage() +{ + int i; + + for ( i = 0 ; i < TABLE_BANKS ; i++ ) { + dict[ i ] = (struct dictionary *) + calloc( 256, sizeof ( struct dictionary ) ); + if ( dict[ i ] == NULL ) + fatal_error( "Error allocating dictionary space" ); + } +} + +/* + * The compressor is short and simple. It reads in new symbols one + * at a time from the input file. It then checks to see if the + * combination of the current symbol and the current code are already + * defined in the dictionary. If they are not, they are added to the + * dictionary, and we start over with a new one symbol code. If they + * are, the code for the combination of the code and character becomes + * our new code. Note that in this enhanced version of LZW, the + * encoder needs to check the codes for boundary conditions. + */ + +void CompressFile( input, output, argc, argv ) +FILE *input; +BIT_FILE *output; +int argc; +char *argv[]; +{ + int character; + int string_code; + unsigned int index; + + InitializeStorage(); + InitializeDictionary(); + if ( ( string_code = getc( input ) ) == EOF ) + string_code = END_OF_STREAM; + while ( ( character = getc( input ) ) != EOF ) { + index = find_child_node( string_code, character ); + if ( DICT( index ).code_value != - 1 ) + string_code = DICT( index ).code_value; + else { + DICT( index ).code_value = next_code++; + DICT( index ).parent_code = string_code; + DICT( index ).character = (char) character; + OutputBits( output, + (unsigned long) string_code, current_code_bits ); + string_code = character; + if ( next_code > MAX_CODE ) { + OutputBits( output, + (unsigned long) FLUSH_CODE, current_code_bits ); + InitializeDictionary(); + } else if ( next_code > next_bump_code ) { + OutputBits( output, + (unsigned long) BUMP_CODE, current_code_bits ); + current_code_bits++; + next_bump_code <<= 1; + next_bump_code |= 1; + putc( 'B', stdout ); + } + } + } + OutputBits( output, (unsigned long) string_code, current_code_bits ); + OutputBits( output, (unsigned long) END_OF_STREAM, current_code_bits); + while ( argc-- > 0 ) + printf( "Unknown argument: %s\n", *argv++ ); +} + +/* + * The file expander operates much like the encoder. It has to + * read in codes, the convert the codes to a string of characters. + * The only catch in the whole operation occurs when the encoder + * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this + * occurs, the encoder outputs a code that is not presently defined + * in the table. This is handled as an exception. All of the special + * input codes are handled in various ways. + */ + +void ExpandFile( input, output, argc, argv ) +BIT_FILE *input; +FILE *output; +int argc; +char *argv[]; +{ + unsigned int new_code; + unsigned int old_code; + int character; + unsigned int count; + + InitializeStorage(); + while ( argc-- > 0 ) + printf( "Unknown argument: %s\n", *argv++ ); + for ( ; ; ) { + InitializeDictionary(); + old_code = (unsigned int) InputBits( input, current_code_bits ); + if ( old_code == END_OF_STREAM ) + return; + character = old_code; + putc( old_code, output ); + for ( ; ; ) { + new_code = (unsigned int) InputBits( input, current_code_bits ); + if ( new_code == END_OF_STREAM ) + return; + if ( new_code == FLUSH_CODE ) + break; + if ( new_code == BUMP_CODE ) { + current_code_bits++; + putc( 'B', stdout ); + continue; + } + if ( new_code >= next_code ) { + decode_stack[ 0 ] = (char) character; + count = decode_string( 1, old_code ); + } + else + count = decode_string( 0, new_code ); + character = decode_stack[ count - 1 ]; + while ( count > 0 ) + putc( decode_stack[ --count ], output ); + DICT( next_code ).parent_code = old_code; + DICT( next_code ).character = (char) character; + next_code++; + old_code = new_code; + } + } +} + +/* + * This hashing routine is responsible for finding the table location + * for a string/character combination. The table index is created + * by using an exclusive OR combination of the prefix and character. + * This code also has to check for collisions, and handles them by + * jumping around in the table. + */ + +unsigned int find_child_node( parent_code, child_character ) +int parent_code; +int child_character; +{ + unsigned int index; + int offset; + + index = ( child_character << ( BITS - 8 ) ) ^ parent_code; + if ( index == 0 ) + offset = 1; + else + offset = TABLE_SIZE - index; + for ( ; ; ) { + if ( DICT( index ).code_value == UNUSED ) + return( (unsigned int) index ); + if ( DICT( index ).parent_code == parent_code && + DICT( index ).character == (char) child_character ) + return( index ); + if ( (int) index >= offset ) + index -= offset; + else + index += TABLE_SIZE - offset; + } +} + +/* + * This routine decodes a string from the dictionary, and stores it + * in the decode_stack data structure. It returns a count to the + * calling program of how many characters were placed in the stack. + */ + +unsigned int decode_string( count, code ) +unsigned int count; +unsigned int code; +{ + while ( code > 255 ) { + decode_stack[ count++ ] = DICT( code ).character; + code = DICT( code ).parent_code; + } + decode_stack[ count++ ] = (char) code; + return( count ); +} + + +/************************** End of LZW15V.C *************************/ From c115e1b8bfe584acabb73732444f6f79442f353c Mon Sep 17 00:00:00 2001 From: firdausnutrihadi Date: Mon, 7 Dec 2015 10:36:21 +0700 Subject: [PATCH 4/5] Update Lzw15v.c perubahan pada code di ebook --- lzw15v/lzw15v.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/lzw15v/lzw15v.c b/lzw15v/lzw15v.c index c5ba898..cf49c36 100644 --- a/lzw15v/lzw15v.c +++ b/lzw15v/lzw15v.c @@ -52,21 +52,6 @@ unsigned int decode_string(); char *CompressionName = "LZW 15 Bit Variable Rate Encoder"; char *Usage = "in-file out-file\n\n"; -/* - * This data structure defines the dictionary. Each entry in the dictionary - * has a code value. This is the code emitted by the compressor. Each - * code is actually made up of two pieces: a parent_code, and a - * character. Code values of less than 256 are actually plain - * text codes. - * - * Note that in order to handle 16 bit segmented compilers, such as most - * of the MS-DOS compilers, it was necessary to break up the dictionary - * into a table of smaller dictionary pointers. Every reference to the - * dictionary was replaced by a macro that did a pointer dereference first. - * By breaking up the index along byte boundaries we should be as efficient - * as possible. - */ - struct dictionary { int code_value; int parent_code; From ff0f64c9cf5b6bb6026803bf35328ad45bc62daf Mon Sep 17 00:00:00 2001 From: firdausnutrihadi Date: Mon, 7 Dec 2015 13:16:43 +0700 Subject: [PATCH 5/5] lzw15.c code lzw15v.c yang telah diperbarui --- lzw15v/lzw15.c | 291 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 lzw15v/lzw15.c diff --git a/lzw15v/lzw15.c b/lzw15v/lzw15.c new file mode 100644 index 0000000..951a0eb --- /dev/null +++ b/lzw15v/lzw15.c @@ -0,0 +1,291 @@ +/************************** Start of LZW15V.C ************************* + * + * This is the LZW module which implements a more powerful version + * of the algorithm. This version of the program has three major + * improvements over LZW12.C. First, it expands the maximum code size + * to 15 bits. Second, it starts encoding with 9 bit codes, working + * its way up in bit size only as necessary. Finally, it flushes the + * dictionary when done. + * + * Note that under MS-DOS this program needs to be built using the + * Compact or Large memory model. + * + */ +#include +#include +#include +#include "errhand.h" +#include "bitio.h" +#include "main.h" + +/* + * Constants used throughout the program. BITS defines the maximum + * number of bits that can be used in the output code. TABLE_SIZE defines + * the size of the dictionary table. TABLE_BANKS are the number of + * 256 element dictionary pages needed. The code defines should be + * self-explanatory. + */ + +#define BITS 15 +#define MAX_CODE ( ( 1 << BITS ) - 1 ) +#define TABLE_SIZE 35023L +#define TABLE_BANKS ( ( TABLE_SIZE >> 8 ) + 1 ) +#define END_OF_STREAM 256 +#define BUMP_CODE 257 +#define FLUSH_CODE 258 +#define FIRST_CODE 259 +#define UNUSED -1 + +/* + * Local prototypes. + */ + +#ifdef __STDC__ +unsigned int find_child_node( int parent_code, int child_character ); +unsigned int decode_string( unsigned int offset, unsigned int code ); +#else +unsigned int find_child_node(); +unsigned int decode_string(); +#endif + + +char *CompressionName = "LZW 15 Bit Variable Rate Encoder"; +char *Usage = "in-file out-file\n\n"; + +/* + * This data structure defines the dictionary. Each entry in the dictionary + * has a code value. This is the code emitted by the compressor. Each + * code is actually made up of two pieces: a parent_code, and a + * character. Code values of less than 256 are actually plain + * text codes. + * + * Note that in order to handle 16 bit segmented compilers, such as most + * of the MS-DOS compilers, it was necessary to break up the dictionary + * into a table of smaller dictionary pointers. Every reference to the + * dictionary was replaced by a macro that did a pointer dereference first. + * By breaking up the index along byte boundaries we should be as efficient + * as possible. + */ + +struct dictionary { + int code_value; + int parent_code; + char character; +} *dict[ TABLE_BANKS ]; + +#define DICT( i ) dict[ i >> 8 ][ i & 0xff ] + +/* + * Other global data structures. The decode_stack is used to reverse + * strings that come out of the tree during decoding. next_code is the + * next code to be added to the dictionary, both during compression and + * decompression. current_code_bits defines how many bits are currently + * being used for output, and next_bump_code defines the code that will + * trigger the next jump in word size. + */ + +char decode_stack[ TABLE_SIZE ]; +unsigned int next_code; +int current_code_bits; +unsigned int next_bump_code; + +/* + * This routine is used to initialize the dictionary, both when the + * compressor or decompressor first starts up, and also when a flush + * code comes in. Note that even thought the decompressor sets all + * the code_value elements to UNUSED, it doesn't really need to. + */ + +void InitializeDictionary() +{ + unsigned int i; + + for ( i = 0 ; i < TABLE_SIZE ; i++ ) + DICT( i ).code_value = UNUSED; + next_code = FIRST_CODE; + putc( 'F', stdout ); + current_code_bits = 9; + next_bump_code = 511; +} + +/* + * This routine allocates the dictionary. Since the total size of the + * dictionary is much larger than 64K, it can't be allocated as a single + * object. Instead, it is allocated as a set of pointers to smaller + * dictionary objects. The special DICT() macro is used to translate + * indices into pairs of references. + */ + +void InitializeStorage() +{ + int i; + + for ( i = 0 ; i < TABLE_BANKS ; i++ ) { + dict[ i ] = (struct dictionary *) + calloc( 256, sizeof ( struct dictionary ) ); + if ( dict[ i ] == NULL ) + fatal_error( "Error allocating dictionary space" ); + } +} + +/* + * The compressor is short and simple. It reads in new symbols one + * at a time from the input file. It then checks to see if the + * combination of the current symbol and the current code are already + * defined in the dictionary. If they are not, they are added to the + * dictionary, and we start over with a new one symbol code. If they + * are, the code for the combination of the code and character becomes + * our new code. Note that in this enhanced version of LZW, the + * encoder needs to check the codes for boundary conditions. + */ + +void CompressFile( input, output, argc, argv ) +FILE *input; +BIT_FILE *output; +int argc; +char *argv[]; +{ + int character; + int string_code; + unsigned int index; + + InitializeStorage(); + InitializeDictionary(); + if ( ( string_code = getc( input ) ) == EOF ) + string_code = END_OF_STREAM; + while ( ( character = getc( input ) ) != EOF ) { + index = find_child_node( string_code, character ); + if ( DICT( index ).code_value != - 1 ) + string_code = DICT( index ).code_value; + else { + DICT( index ).code_value = next_code++; + DICT( index ).parent_code = string_code; + DICT( index ).character = (char) character; + OutputBits( output, + (unsigned long) string_code, current_code_bits ); + string_code = character; + if ( next_code > MAX_CODE ) { + OutputBits( output, + (unsigned long) FLUSH_CODE, current_code_bits ); + InitializeDictionary(); + } else if ( next_code > next_bump_code ) { + OutputBits( output, + (unsigned long) BUMP_CODE, current_code_bits ); + current_code_bits++; + next_bump_code <<= 1; + next_bump_code |= 1; + putc( 'B', stdout ); + } + } + } + OutputBits( output, (unsigned long) string_code, current_code_bits ); + OutputBits( output, (unsigned long) END_OF_STREAM, current_code_bits); + while ( argc-- > 0 ) + printf( "Unknown argument: %s\n", *argv++ ); +} + +/* + * The file expander operates much like the encoder. It has to + * read in codes, the convert the codes to a string of characters. + * The only catch in the whole operation occurs when the encoder + * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this + * occurs, the encoder outputs a code that is not presently defined + * in the table. This is handled as an exception. All of the special + * input codes are handled in various ways. + */ + +void ExpandFile( input, output, argc, argv ) +BIT_FILE *input; +FILE *output; +int argc; +char *argv[]; +{ + unsigned int new_code; + unsigned int old_code; + int character; + unsigned int count; + + InitializeStorage(); + while ( argc-- > 0 ) + printf( "Unknown argument: %s\n", *argv++ ); + for ( ; ; ) { + InitializeDictionary(); + old_code = (unsigned int) InputBits( input, current_code_bits ); + if ( old_code == END_OF_STREAM ) + return; + character = old_code; + putc( old_code, output ); + for ( ; ; ) { + new_code = (unsigned int) InputBits( input, current_code_bits ); + if ( new_code == END_OF_STREAM ) + return; + if ( new_code == FLUSH_CODE ) + break; + if ( new_code == BUMP_CODE ) { + current_code_bits++; + putc( 'B', stdout ); + continue; + } + if ( new_code >= next_code ) { + decode_stack[ 0 ] = (char) character; + count = decode_string( 1, old_code ); + } + else + count = decode_string( 0, new_code ); + character = decode_stack[ count - 1 ]; + while ( count > 0 ) + putc( decode_stack[ --count ], output ); + DICT( next_code ).parent_code = old_code; + DICT( next_code ).character = (char) character; + next_code++; + old_code = new_code; + } + } +} + +/* + * This hashing routine is responsible for finding the table location + * for a string/character combination. The table index is created + * by using an exclusive OR combination of the prefix and character. + * This code also has to check for collisions, and handles them by + * jumping around in the table. + */ + +unsigned int find_child_node( parent_code, child_character ) +int parent_code; +int child_character; +{ + unsigned int index; + int offset; + + index = ( child_character << ( BITS - 8 ) ) ^ parent_code; + if ( index == 0 ) + offset = 1; + else + offset = TABLE_SIZE - index; + for ( ; ; ) { + if ( DICT( index ).code_value == UNUSED ) + return( (unsigned int) index ); + if ( DICT( index ).parent_code == parent_code && + DICT( index ).character == (char) child_character ) + return( index ); + if ( (int) index >= offset ) + index -= offset; + else + index += TABLE_SIZE - offset; + } +} + + + +unsigned int decode_string( count, code ) +unsigned int count; +unsigned int code; +{ + while ( code > 255 ) { + decode_stack[ count++ ] = DICT( code ).character; + code = DICT( code ).parent_code; + } + decode_stack[ count++ ] = (char) code; + return( count ); +}