Initial community commit

This commit is contained in:
Jef 2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit fc06254474
16440 changed files with 4239995 additions and 2 deletions

View file

@ -0,0 +1,43 @@
SUBDIRS = public
lib_LTLIBRARIES=liblhasa.la
check_LIBRARIES=liblhasatest.a
EXTRA_DIST = \
bit_stream_reader.c \
lh_new_decoder.c \
pma_common.c \
tree_decode.c
SRC = \
crc16.c crc16.h \
ext_header.c ext_header.h \
lha_arch_unix.c lha_arch.h \
lha_arch_win32.c \
lha_decoder.c lha_decoder.h \
lha_endian.c lha_endian.h \
lha_file_header.c lha_file_header.h \
lha_input_stream.c lha_input_stream.h \
lha_basic_reader.c lha_basic_reader.h \
lha_reader.c \
macbinary.c macbinary.h \
null_decoder.c \
lh1_decoder.c \
lh5_decoder.c \
lh6_decoder.c \
lh7_decoder.c \
lhx_decoder.c \
lz5_decoder.c \
lzs_decoder.c \
pm1_decoder.c \
pm2_decoder.c
liblhasatest_a_CFLAGS=$(TEST_CFLAGS) -DALLOC_TESTING -I../test -g
liblhasatest_a_SOURCES=$(SRC) $(HEADER_FILES)
liblhasa_la_CFLAGS=$(MAIN_CFLAGS)
liblhasa_la_SOURCES=$(SRC) $(HEADER_FILES)
clean-local:
rm -f *.gcno *.gcda *.c.gcov

View file

@ -0,0 +1,128 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Data structure used to read bits from an input source as a stream.
//
// This file is designed to be #included by other source files to
// make a complete decoder.
//
typedef struct {
// Callback function to invoke to read more data from the
// input stream.
LHADecoderCallback callback;
void *callback_data;
// Bits from the input stream that are waiting to be read.
uint32_t bit_buffer;
unsigned int bits;
} BitStreamReader;
// Initialize bit stream reader structure.
static void bit_stream_reader_init(BitStreamReader *reader,
LHADecoderCallback callback,
void *callback_data)
{
reader->callback = callback;
reader->callback_data = callback_data;
reader->bits = 0;
reader->bit_buffer = 0;
}
// Return the next n bits waiting to be read from the input stream,
// without removing any. Returns -1 for failure.
static int peek_bits(BitStreamReader *reader,
unsigned int n)
{
uint8_t buf[4];
unsigned int fill_bytes;
size_t bytes;
if (n == 0) {
return 0;
}
// If there are not enough bits in the buffer to satisfy this
// request, we need to fill up the buffer with more bits.
while (reader->bits < n) {
// Maximum number of bytes we can fill?
fill_bytes = (32 - reader->bits) / 8;
// Read from input and fill bit_buffer.
memset(buf, 0, sizeof(buf));
bytes = reader->callback(buf, fill_bytes,
reader->callback_data);
// End of file?
if (bytes == 0) {
return -1;
}
reader->bit_buffer |= (uint32_t) buf[0] << (24 - reader->bits);
reader->bit_buffer |= (uint32_t) buf[1] << (16 - reader->bits);
reader->bit_buffer |= (uint32_t) buf[2] << (8 - reader->bits);
reader->bit_buffer |= (uint32_t) buf[3];
reader->bits += bytes * 8;
}
return (signed int) (reader->bit_buffer >> (32 - n));
}
// Read a bit from the input stream.
// Returns -1 for failure.
static int read_bits(BitStreamReader *reader,
unsigned int n)
{
int result;
result = peek_bits(reader, n);
if (result >= 0) {
reader->bit_buffer <<= n;
reader->bits -= n;
}
return result;
}
// Read a bit from the input stream.
// Returns -1 for failure.
static int read_bit(BitStreamReader *reader)
{
return read_bits(reader, 1);
}

View file

@ -0,0 +1,73 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "crc16.h"
static unsigned int crc16_table[] = {
0x0000, 0xc0c1, 0xc181, 0x0140, 0xc301, 0x03c0, 0x0280, 0xc241,
0xc601, 0x06c0, 0x0780, 0xc741, 0x0500, 0xc5c1, 0xc481, 0x0440,
0xcc01, 0x0cc0, 0x0d80, 0xcd41, 0x0f00, 0xcfc1, 0xce81, 0x0e40,
0x0a00, 0xcac1, 0xcb81, 0x0b40, 0xc901, 0x09c0, 0x0880, 0xc841,
0xd801, 0x18c0, 0x1980, 0xd941, 0x1b00, 0xdbc1, 0xda81, 0x1a40,
0x1e00, 0xdec1, 0xdf81, 0x1f40, 0xdd01, 0x1dc0, 0x1c80, 0xdc41,
0x1400, 0xd4c1, 0xd581, 0x1540, 0xd701, 0x17c0, 0x1680, 0xd641,
0xd201, 0x12c0, 0x1380, 0xd341, 0x1100, 0xd1c1, 0xd081, 0x1040,
0xf001, 0x30c0, 0x3180, 0xf141, 0x3300, 0xf3c1, 0xf281, 0x3240,
0x3600, 0xf6c1, 0xf781, 0x3740, 0xf501, 0x35c0, 0x3480, 0xf441,
0x3c00, 0xfcc1, 0xfd81, 0x3d40, 0xff01, 0x3fc0, 0x3e80, 0xfe41,
0xfa01, 0x3ac0, 0x3b80, 0xfb41, 0x3900, 0xf9c1, 0xf881, 0x3840,
0x2800, 0xe8c1, 0xe981, 0x2940, 0xeb01, 0x2bc0, 0x2a80, 0xea41,
0xee01, 0x2ec0, 0x2f80, 0xef41, 0x2d00, 0xedc1, 0xec81, 0x2c40,
0xe401, 0x24c0, 0x2580, 0xe541, 0x2700, 0xe7c1, 0xe681, 0x2640,
0x2200, 0xe2c1, 0xe381, 0x2340, 0xe101, 0x21c0, 0x2080, 0xe041,
0xa001, 0x60c0, 0x6180, 0xa141, 0x6300, 0xa3c1, 0xa281, 0x6240,
0x6600, 0xa6c1, 0xa781, 0x6740, 0xa501, 0x65c0, 0x6480, 0xa441,
0x6c00, 0xacc1, 0xad81, 0x6d40, 0xaf01, 0x6fc0, 0x6e80, 0xae41,
0xaa01, 0x6ac0, 0x6b80, 0xab41, 0x6900, 0xa9c1, 0xa881, 0x6840,
0x7800, 0xb8c1, 0xb981, 0x7940, 0xbb01, 0x7bc0, 0x7a80, 0xba41,
0xbe01, 0x7ec0, 0x7f80, 0xbf41, 0x7d00, 0xbdc1, 0xbc81, 0x7c40,
0xb401, 0x74c0, 0x7580, 0xb541, 0x7700, 0xb7c1, 0xb681, 0x7640,
0x7200, 0xb2c1, 0xb381, 0x7340, 0xb101, 0x71c0, 0x7080, 0xb041,
0x5000, 0x90c1, 0x9181, 0x5140, 0x9301, 0x53c0, 0x5280, 0x9241,
0x9601, 0x56c0, 0x5780, 0x9741, 0x5500, 0x95c1, 0x9481, 0x5440,
0x9c01, 0x5cc0, 0x5d80, 0x9d41, 0x5f00, 0x9fc1, 0x9e81, 0x5e40,
0x5a00, 0x9ac1, 0x9b81, 0x5b40, 0x9901, 0x59c0, 0x5880, 0x9841,
0x8801, 0x48c0, 0x4980, 0x8941, 0x4b00, 0x8bc1, 0x8a81, 0x4a40,
0x4e00, 0x8ec1, 0x8f81, 0x4f40, 0x8d01, 0x4dc0, 0x4c80, 0x8c41,
0x4400, 0x84c1, 0x8581, 0x4540, 0x8701, 0x47c0, 0x4680, 0x8641,
0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040
};
void lha_crc16_buf(uint16_t *crc, uint8_t *buf, size_t buf_len)
{
uint16_t tmp;
unsigned int index;
unsigned int i;
tmp = *crc;
for (i = 0; i < buf_len; ++i) {
index = (tmp ^ buf[i]) & 0xff;
tmp = ((tmp >> 8) ^ crc16_table[index]) & 0xffff;
}
*crc = tmp;
}

View file

@ -0,0 +1,30 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_LHA_CRC16_H
#define LHASA_LHA_CRC16_H
#include <inttypes.h>
#include <stdlib.h>
void lha_crc16_buf(uint16_t *crc, uint8_t *buf, size_t buf_len);
#endif /* #ifndef LHASA_LHA_CRC16_H */

View file

@ -0,0 +1,437 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include "ext_header.h"
#include "lha_endian.h"
//
// Extended header parsing.
//
// Extended headers were introduced with LHA v2 - various different
// tools support different extended headers. Some are operating system
// specific.
//
// Extended header types:
#define LHA_EXT_HEADER_COMMON 0x00
#define LHA_EXT_HEADER_FILENAME 0x01
#define LHA_EXT_HEADER_PATH 0x02
#define LHA_EXT_HEADER_MULTI_DISC 0x39
#define LHA_EXT_HEADER_COMMENT 0x3f
#define LHA_EXT_HEADER_WINDOWS_TIMESTAMPS 0x41
#define LHA_EXT_HEADER_UNIX_PERMISSION 0x50
#define LHA_EXT_HEADER_UNIX_UID_GID 0x51
#define LHA_EXT_HEADER_UNIX_GROUP 0x52
#define LHA_EXT_HEADER_UNIX_USER 0x53
#define LHA_EXT_HEADER_UNIX_TIMESTAMP 0x54
#define LHA_EXT_HEADER_OS9 0xcc
/**
* Structure representing an extended header type.
*/
typedef struct {
/**
* Header number.
*
* Each extended header type has a unique byte value that represents
* it.
*/
uint8_t num;
/**
* Callback function for parsing an extended header block.
*
* @param header The file header structure in which to store
* decoded data.
* @param data Pointer to the header data to decode.
* @param data_len Size of the header data, in bytes.
* @return Non-zero if successful, or zero for failure.
*/
int (*decoder)(LHAFileHeader *header, uint8_t *data, size_t data_len);
/** Minimum length for a header of this type. */
size_t min_len;
} LHAExtHeaderType;
// Common header (0x00).
//
// This contains a 16-bit CRC of the entire LHA header.
static int ext_header_common_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
header->extra_flags |= LHA_FILE_COMMON_CRC;
header->common_crc = lha_decode_uint16(data);
// There is a catch-22 in calculating the CRC, because the field
// containing the CRC is part of the data being CRC'd. The solution
// is that the CRC is calculated with the CRC field set to zero.
// Therefore, now that the CRC has been read, set the field to
// zero in the raw_data array so that the CRC can be calculated
// correctly.
data[0] = 0x00;
data[1] = 0x00;
// TODO: Some platforms (OS/2, Unix) put extra data in the common
// header which might also be decoded.
return 1;
}
static LHAExtHeaderType lha_ext_header_common = {
LHA_EXT_HEADER_COMMON,
ext_header_common_decoder,
2
};
// Filename header (0x01).
//
// This stores the filename for the file. This is essential on level 2/3
// headers, as the filename field is no longer part of the standard
// header.
static int ext_header_filename_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
char *new_filename;
unsigned int i;
new_filename = malloc(data_len + 1);
if (new_filename == NULL) {
return 0;
}
memcpy(new_filename, data, data_len);
new_filename[data_len] = '\0';
// Sanitize the filename that was read. It is not allowed to
// contain a path separator, which could potentially be used
// to do something malicious.
for (i = 0; new_filename[i] != '\0'; ++i) {
if (new_filename[i] == '/') {
new_filename[i] = '_';
}
}
free(header->filename);
header->filename = new_filename;
return 1;
}
static LHAExtHeaderType lha_ext_header_filename = {
LHA_EXT_HEADER_FILENAME,
ext_header_filename_decoder,
1
};
// Path header (0x02).
//
// This stores the directory path of the file. A value of 0xff is used
// as the path separator. It is supposed to include a terminating path
// separator as the last character.
static int ext_header_path_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
unsigned int i;
uint8_t *new_path;
new_path = malloc(data_len + 2);
if (new_path == NULL) {
return 0;
}
memcpy(new_path, data, data_len);
new_path[data_len] = '\0';
// Amiga LHA v1.22 generates path headers without a path
// separator at the end of the string. This is broken (and
// was fixed in a later version), but handle it correctly.
if (new_path[data_len - 1] != 0xff) {
new_path[data_len] = 0xff;
new_path[data_len + 1] = '\0';
++data_len;
}
free(header->path);
header->path = (char *) new_path;
for (i = 0; i < data_len; ++i) {
if (new_path[i] == 0xff) {
new_path[i] = '/';
}
}
return 1;
}
static LHAExtHeaderType lha_ext_header_path = {
LHA_EXT_HEADER_PATH,
ext_header_path_decoder,
1
};
// Windows timestamp header (0x41).
//
// This is a Windows-specific header that stores 64-bit timestamps in
// Windows FILETIME format. The timestamps have 100ns accuracy, which is
// much more accurate than the normal Unix time_t format.
static int ext_header_windows_timestamps(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
header->extra_flags |= LHA_FILE_WINDOWS_TIMESTAMPS;
header->win_creation_time = lha_decode_uint64(data);
header->win_modification_time = lha_decode_uint64(data + 8);
header->win_access_time = lha_decode_uint64(data + 16);
return 1;
}
static LHAExtHeaderType lha_ext_header_windows_timestamps = {
LHA_EXT_HEADER_WINDOWS_TIMESTAMPS,
ext_header_windows_timestamps,
24
};
// Unix permissions header (0x50).
static int ext_header_unix_perms_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
header->extra_flags |= LHA_FILE_UNIX_PERMS;
header->unix_perms = lha_decode_uint16(data);
return 1;
}
static LHAExtHeaderType lha_ext_header_unix_perms = {
LHA_EXT_HEADER_UNIX_PERMISSION,
ext_header_unix_perms_decoder,
2
};
// Unix UID/GID header (0x51).
static int ext_header_unix_uid_gid_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
header->extra_flags |= LHA_FILE_UNIX_UID_GID;
header->unix_gid = lha_decode_uint16(data);
header->unix_uid = lha_decode_uint16(data + 2);
return 1;
}
static LHAExtHeaderType lha_ext_header_unix_uid_gid = {
LHA_EXT_HEADER_UNIX_UID_GID,
ext_header_unix_uid_gid_decoder,
4
};
// Unix username header (0x53).
//
// This stores a string containing the username. There don't seem to be
// any tools that actually generate archives containing this header.
static int ext_header_unix_username_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
char *username;
username = malloc(data_len + 1);
if (username == NULL) {
return 0;
}
memcpy(username, data, data_len);
username[data_len] = '\0';
free(header->unix_username);
header->unix_username = username;
return 1;
}
static LHAExtHeaderType lha_ext_header_unix_username = {
LHA_EXT_HEADER_UNIX_USER,
ext_header_unix_username_decoder,
1
};
// Unix group header (0x52).
//
// This stores a string containing the Unix group name. As with the
// username header, there don't seem to be any tools that actually
// generate archives containing this header.
static int ext_header_unix_group_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
char *group;
group = malloc(data_len + 1);
if (group == NULL) {
return 0;
}
memcpy(group, data, data_len);
group[data_len] = '\0';
free(header->unix_group);
header->unix_group = group;
return 1;
}
static LHAExtHeaderType lha_ext_header_unix_group = {
LHA_EXT_HEADER_UNIX_GROUP,
ext_header_unix_group_decoder,
1
};
// Unix timestamp header (0x54).
//
// This stores a 32-bit Unix time_t timestamp representing the
// modification time of the file.
static int ext_header_unix_timestamp_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
header->timestamp = lha_decode_uint32(data);
return 1;
}
static LHAExtHeaderType lha_ext_header_unix_timestamp = {
LHA_EXT_HEADER_UNIX_TIMESTAMP,
ext_header_unix_timestamp_decoder,
4
};
// OS-9 (6809) header (0xcc)
//
// This stores OS-9 filesystem metadata.
static int ext_header_os9_decoder(LHAFileHeader *header,
uint8_t *data,
size_t data_len)
{
// TODO: The OS-9 extended header contains various data, but
// it's not clear what it's all for. Just extract the
// permissions for now.
header->os9_perms = lha_decode_uint16(data + 7);
header->extra_flags |= LHA_FILE_OS9_PERMS;
return 1;
}
static LHAExtHeaderType lha_ext_header_os9 = {
LHA_EXT_HEADER_OS9,
ext_header_os9_decoder,
12
};
// Table of extended headers.
static const LHAExtHeaderType *ext_header_types[] = {
&lha_ext_header_common,
&lha_ext_header_filename,
&lha_ext_header_path,
&lha_ext_header_unix_perms,
&lha_ext_header_unix_uid_gid,
&lha_ext_header_unix_username,
&lha_ext_header_unix_group,
&lha_ext_header_unix_timestamp,
&lha_ext_header_windows_timestamps,
&lha_ext_header_os9,
};
#define NUM_HEADER_TYPES (sizeof(ext_header_types) / sizeof(*ext_header_types))
/**
* Look up the extended header parser for the specified header code.
*
* @param num Extended header type.
* @return Matching @ref LHAExtHeaderType structure, or NULL if
* not found for this header type.
*/
static const LHAExtHeaderType *ext_header_for_num(uint8_t num)
{
unsigned int i;
for (i = 0; i < NUM_HEADER_TYPES; ++i) {
if (ext_header_types[i]->num == num) {
return ext_header_types[i];
}
}
return NULL;
}
int lha_ext_header_decode(LHAFileHeader *header,
uint8_t num,
uint8_t *data,
size_t data_len)
{
const LHAExtHeaderType *htype;
htype = ext_header_for_num(num);
if (htype == NULL) {
return 0;
}
if (data_len < htype->min_len) {
return 0;
}
return htype->decoder(header, data, data_len);
}

View file

@ -0,0 +1,44 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_EXT_HEADER_H
#define LHASA_EXT_HEADER_H
#include <stdlib.h>
#include "lha_file_header.h"
/**
* Decode the specified extended header.
*
* @param header The file header in which to store decoded data.
* @param num Extended header type.
* @param data Pointer to the data to decode.
* @param data_len Size of the data to decode, in bytes.
* @return Non-zero for success, or zero if not decoded.
*/
int lha_ext_header_decode(LHAFileHeader *header,
uint8_t num,
uint8_t *data,
size_t data_len);
#endif /* #ifndef LHASA_EXT_HEADER_H */

View file

@ -0,0 +1,724 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "lha_decoder.h"
#include "bit_stream_reader.c"
// Size of the ring buffer used to hold history:
#define RING_BUFFER_SIZE 4096 /* bytes */
// When this limit is reached, the code tree is reordered.
#define TREE_REORDER_LIMIT 32 * 1024 /* 32 kB */
// Number of codes ('byte' codes + 'copy' codes):
#define NUM_CODES 314
// Number of nodes in the code tree.
#define NUM_TREE_NODES (NUM_CODES * 2 - 1)
// Number of possible offsets:
#define NUM_OFFSETS 64
// Minimum length of the offset top bits:
#define MIN_OFFSET_LENGTH 3 /* bits */
// Threshold for copying. The first copy code starts from here.
#define COPY_THRESHOLD 3 /* bytes */
// Required size of the output buffer. At most, a single call to read()
// might result in a copy of the entire ring buffer.
#define OUTPUT_BUFFER_SIZE RING_BUFFER_SIZE
typedef struct {
// If true, this node is a leaf node.
unsigned int leaf :1;
// If this is a leaf node, child_index is the code represented by
// this node. Otherwise, nodes[child_index] and nodes[child_index-1]
// are the children of this node.
unsigned int child_index :15;
// Index of the parent node of this node.
uint16_t parent;
// Frequency count for this node - number of times that it has
// received a hit.
uint16_t freq;
// Group that this node belongs to.
uint16_t group;
} Node;
typedef struct {
// Input bit stream.
BitStreamReader bit_stream_reader;
// Ring buffer of past data. Used for position-based copies.
uint8_t ringbuf[RING_BUFFER_SIZE];
unsigned int ringbuf_pos;
// Array of tree nodes. nodes[0] is the root node. The array
// is maintained in order by frequency.
Node nodes[NUM_TREE_NODES];
// Indices of leaf nodes of the tree (map from code to leaf
// node index)
uint16_t leaf_nodes[NUM_CODES];
// Groups list. Every node belongs to a group. All nodes within
// a group have the same frequency. There can be at most
// NUM_TREE_NODES groups (one for each node). num_groups is used
// to allocate and deallocate groups as needed.
uint16_t groups[NUM_TREE_NODES];
unsigned int num_groups;
// Index of the "leader" of a group within the nodes[] array.
// The leader is the left-most node within a span of nodes with
// the same frequency.
uint16_t group_leader[NUM_TREE_NODES];
// Offset lookup table. Maps from a byte value (sequence of next
// 8 bits from input stream) to an offset value.
uint8_t offset_lookup[256];
// Length of offsets, in bits.
uint8_t offset_lengths[NUM_OFFSETS];
} LHALH1Decoder;
// Frequency distribution used to calculate the offset codes.
static const unsigned int offset_fdist[] = {
1, // 3 bits
3, // 4 bits
8, // 5 bits
12, // 6 bits
24, // 7 bits
16, // 8 bits
};
// Allocate a group from the free groups array.
static uint16_t alloc_group(LHALH1Decoder *decoder)
{
uint16_t result;
result = decoder->groups[decoder->num_groups];
++decoder->num_groups;
return result;
}
// Free a group that is no longer in use.
static void free_group(LHALH1Decoder *decoder, uint16_t group)
{
--decoder->num_groups;
decoder->groups[decoder->num_groups] = group;
}
// Initialize groups array.
static void init_groups(LHALH1Decoder *decoder)
{
unsigned int i;
for (i = 0; i < NUM_TREE_NODES; ++i) {
decoder->groups[i] = (uint16_t) i;
}
decoder->num_groups = 0;
}
// Initialize the tree with its basic initial configuration.
static void init_tree(LHALH1Decoder *decoder)
{
unsigned int i, child;
int node_index;
uint16_t leaf_group;
Node *node;
// Leaf nodes are placed at the end of the table. Start by
// initializing these, and working backwards.
node_index = NUM_TREE_NODES - 1;
leaf_group = alloc_group(decoder);
for (i = 0; i < NUM_CODES; ++i) {
node = &decoder->nodes[node_index];
node->leaf = 1;
node->child_index = (unsigned short) i;
node->freq = 1;
node->group = leaf_group;
decoder->group_leader[leaf_group] = (uint16_t) node_index;
decoder->leaf_nodes[i] = (uint16_t) node_index;
--node_index;
}
// Now build up the intermediate nodes, up to the root. Each
// node gets two nodes as children.
child = NUM_TREE_NODES - 1;
while (node_index >= 0) {
node = &decoder->nodes[node_index];
node->leaf = 0;
// Set child pointer and update the parent pointers of the
// children.
node->child_index = child;
decoder->nodes[child].parent = (uint16_t) node_index;
decoder->nodes[child - 1].parent = (uint16_t) node_index;
// The node's frequency is equal to the sum of the frequencies
// of its children.
node->freq = (uint16_t) (decoder->nodes[child].freq
+ decoder->nodes[child - 1].freq);
// Is the frequency the same as the last node we processed?
// if so, we are in the same group. If not, we must
// allocate a new group. Either way, this node is now the
// leader of its group.
if (node->freq == decoder->nodes[node_index + 1].freq) {
node->group = decoder->nodes[node_index + 1].group;
} else {
node->group = alloc_group(decoder);
}
decoder->group_leader[node->group] = (uint16_t) node_index;
// Process next node.
--node_index;
child -= 2;
}
}
// Fill in a range of values in the offset_lookup table, which have
// the bits from 'code' as the high bits, and the low bits can be
// any values in the range from 'mask'. Set these values to point
// to 'offset'.
static void fill_offset_range(LHALH1Decoder *decoder, uint8_t code,
unsigned int mask, unsigned int offset)
{
unsigned int i;
// Set offset lookup table to map from all possible input values
// that fit within the mask to the target offset.
for (i = 0; (i & ~mask) == 0; ++i) {
decoder->offset_lookup[code | i] = (uint8_t) offset;
}
}
// Calculate the values for the offset_lookup and offset_lengths
// tables.
static void init_offset_table(LHALH1Decoder *decoder)
{
unsigned int i, j, len;
uint8_t code, iterbit, offset;
code = 0;
offset = 0;
// Iterate through each entry in the frequency distribution table,
// filling in codes in the lookup table as we go.
for (i = 0; i < sizeof(offset_fdist) / sizeof(*offset_fdist); ++i) {
// offset_fdist[0] is the number of codes of length
// MIN_OFFSET_LENGTH bits, increasing as we go. As the
// code increases in length, we must iterate progressively
// lower bits in the code (moving right - extending the
// code to be 1 bit longer).
len = i + MIN_OFFSET_LENGTH;
iterbit = (uint8_t) (1 << (8 - len));
for (j = 0; j < offset_fdist[i]; ++j) {
// Store lookup values for this offset in the
// lookup table, and save the code length.
// (iterbit - 1) turns into a mask for the lower
// bits that are not part of the code.
fill_offset_range(decoder, code,
(uint8_t) (iterbit - 1), offset);
decoder->offset_lengths[offset] = (uint8_t) len;
// Iterate to next code.
code = (uint8_t) (code + iterbit);
++offset;
}
}
}
// Initialize the history ring buffer.
static void init_ring_buffer(LHALH1Decoder *decoder)
{
memset(decoder->ringbuf, ' ', RING_BUFFER_SIZE);
decoder->ringbuf_pos = 0;
}
static int lha_lh1_init(void *data, LHADecoderCallback callback,
void *callback_data)
{
LHALH1Decoder *decoder = data;
// Initialize input stream reader.
bit_stream_reader_init(&decoder->bit_stream_reader,
callback, callback_data);
// Initialize data structures.
init_groups(decoder);
init_tree(decoder);
init_offset_table(decoder);
init_ring_buffer(decoder);
return 1;
}
// Make the given node the leader of its group: swap it with the current
// leader so that it is in the left-most position. Returns the new index
// of the node.
static uint16_t make_group_leader(LHALH1Decoder *decoder,
uint16_t node_index)
{
Node *node, *leader;
uint16_t group;
uint16_t leader_index;
unsigned int tmp;
group = decoder->nodes[node_index].group;
leader_index = decoder->group_leader[group];
// Already the leader? If so, there is nothing to do.
if (leader_index == node_index) {
return node_index;
}
node = &decoder->nodes[node_index];
leader = &decoder->nodes[leader_index];
// Swap leaf and child indices in the two nodes:
tmp = leader->leaf;
leader->leaf = node->leaf;
node->leaf = tmp;
tmp = leader->child_index;
leader->child_index = node->child_index;
node->child_index = tmp;
if (node->leaf) {
decoder->leaf_nodes[node->child_index] = node_index;
} else {
decoder->nodes[node->child_index].parent = node_index;
decoder->nodes[node->child_index - 1].parent = node_index;
}
if (leader->leaf) {
decoder->leaf_nodes[leader->child_index] = leader_index;
} else {
decoder->nodes[leader->child_index].parent = leader_index;
decoder->nodes[leader->child_index - 1].parent = leader_index;
}
return leader_index;
}
// Increase the frequency count for a node, rearranging groups as
// appropriate.
static void increment_node_freq(LHALH1Decoder *decoder, uint16_t node_index)
{
Node *node, *other;
node = &decoder->nodes[node_index];
other = &decoder->nodes[node_index - 1];
++node->freq;
// If the node is part of a group containing other nodes, it
// must leave the group.
if (node_index < NUM_TREE_NODES - 1
&& node->group == decoder->nodes[node_index + 1].group) {
// Next node in the group now becomes the leader.
++decoder->group_leader[node->group];
// The node must now either join the group to its
// left, or start a new group.
if (node->freq == other->freq) {
node->group = other->group;
} else {
node->group = alloc_group(decoder);
decoder->group_leader[node->group] = node_index;
}
} else {
// The node is in a group of its own (single-node
// group). It might need to join the group of the
// node on its left if it has the same frequency.
if (node->freq == other->freq) {
free_group(decoder, node->group);
node->group = other->group;
}
}
}
// Reconstruct the code huffman tree to be more evenly distributed.
// Invoked periodically as data is processed.
static void reconstruct_tree(LHALH1Decoder *decoder)
{
Node *leaf;
unsigned int child;
unsigned int freq;
unsigned int group;
int i;
// Gather all leaf nodes at the start of the table.
leaf = decoder->nodes;
for (i = 0; i < NUM_TREE_NODES; ++i) {
if (decoder->nodes[i].leaf) {
leaf->leaf = 1;
leaf->child_index = decoder->nodes[i].child_index;
// Frequency of the nodes in the new tree is halved,
// this acts as a running average each time the
// tree is reconstructed.
leaf->freq = (uint16_t) (decoder->nodes[i].freq + 1) / 2;
++leaf;
}
}
// The leaf nodes are now all at the start of the table. Now
// reconstruct the tree, starting from the end of the table and
// working backwards, inserting branch nodes between the leaf
// nodes. Each branch node inherits the sum of the frequencies
// of its children, and must be placed to maintain the ordering
// within the table by decreasing frequency.
leaf = &decoder->nodes[NUM_CODES - 1];
child = NUM_TREE_NODES - 1;
i = NUM_TREE_NODES - 1;
while (i >= 0) {
// Before we can add a new branch node, we need at least
// two nodes to use as children. If we don't have this
// then we need to copy some from the leaves.
while ((int) child - i < 2) {
decoder->nodes[i] = *leaf;
decoder->leaf_nodes[leaf->child_index] = (uint16_t) i;
--i;
--leaf;
}
// Now that we have at least two nodes to take as children
// of the new branch node, we can calculate the branch
// node's frequency.
freq = (unsigned int) (decoder->nodes[child].freq
+ decoder->nodes[child - 1].freq);
// Now copy more leaf nodes until the correct place to
// insert the new branch node presents itself.
while (leaf >= decoder->nodes && freq >= leaf->freq) {
decoder->nodes[i] = *leaf;
decoder->leaf_nodes[leaf->child_index] = (uint16_t) i;
--i;
--leaf;
}
// The new branch node can now be inserted.
decoder->nodes[i].leaf = 0;
decoder->nodes[i].freq = (uint16_t) freq;
decoder->nodes[i].child_index = (uint16_t) child;
decoder->nodes[child].parent = (uint16_t) i;
decoder->nodes[child - 1].parent = (uint16_t) i;
--i;
// Process the next pair of children.
child -= 2;
}
// Reconstruct the group data. Start by resetting group data.
init_groups(decoder);
// Assign a group to the first node.
group = alloc_group(decoder);
decoder->nodes[0].group = (uint16_t) group;
decoder->group_leader[group] = 0;
// Assign a group number to each node, nodes having the same
// group if the have the same frequency, and allocating new
// groups when a new frequency is found.
for (i = 1; i < NUM_TREE_NODES; ++i) {
if (decoder->nodes[i].freq == decoder->nodes[i - 1].freq) {
decoder->nodes[i].group = decoder->nodes[i - 1].group;
} else {
group = alloc_group(decoder);
decoder->nodes[i].group = (uint16_t) group;
// First node with a particular frequency is leader.
decoder->group_leader[group] = (uint16_t) i;
}
}
}
// Increment the counter for the specific code, reordering the tree as
// necessary.
static void increment_for_code(LHALH1Decoder *decoder, uint16_t code)
{
uint16_t node_index;
// When the limit is reached, we must reorder the code tree
// to better match the code frequencies:
if (decoder->nodes[0].freq >= TREE_REORDER_LIMIT) {
reconstruct_tree(decoder);
}
++decoder->nodes[0].freq;
// Dynamically adjust the tree. Start from the leaf node of
// the tree and walk back up, rearranging nodes to the root.
node_index = decoder->leaf_nodes[code];
while (node_index != 0) {
// Shift the node to the left side of its group,
// and bump the frequency count.
node_index = make_group_leader(decoder, node_index);
increment_node_freq(decoder, node_index);
// Iterate up to the parent node.
node_index = decoder->nodes[node_index].parent;
}
}
// Read a code from the input stream.
static int read_code(LHALH1Decoder *decoder, uint16_t *result)
{
unsigned int node_index;
int bit;
// Start from the root node, and traverse down until a leaf is
// reached.
node_index = 0;
//printf("<root ");
while (!decoder->nodes[node_index].leaf) {
bit = read_bit(&decoder->bit_stream_reader);
if (bit < 0) {
return 0;
}
//printf("<%i>", bit);
// Choose one of the two children depending on the
// bit that was read.
node_index = decoder->nodes[node_index].child_index
- (unsigned int) bit;
}
*result = decoder->nodes[node_index].child_index;
//printf(" -> %i!>\n", *result);
increment_for_code(decoder, *result);
return 1;
}
// Read an offset code from the input stream.
static int read_offset(LHALH1Decoder *decoder, unsigned int *result)
{
unsigned int offset;
int future, offset2;
// The offset can be up to 8 bits long, but is likely not
// that long. Use the lookup table to find the offset
// and its length.
future = peek_bits(&decoder->bit_stream_reader, 8);
if (future < 0) {
return 0;
}
offset = decoder->offset_lookup[future];
// Skip past the offset bits and also read the following
// lower-order bits.
read_bits(&decoder->bit_stream_reader,
decoder->offset_lengths[offset]);
offset2 = read_bits(&decoder->bit_stream_reader, 6);
if (offset2 < 0) {
return 0;
}
*result = (offset << 6) | (unsigned int) offset2;
return 1;
}
static void output_byte(LHALH1Decoder *decoder, uint8_t *buf,
size_t *buf_len, uint8_t b)
{
buf[*buf_len] = b;
++*buf_len;
decoder->ringbuf[decoder->ringbuf_pos] = b;
decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
}
static size_t lha_lh1_read(void *data, uint8_t *buf)
{
LHALH1Decoder *decoder = data;
size_t result;
uint16_t code;
result = 0;
// Read the next code from the input stream.
if (!read_code(decoder, &code)) {
return 0;
}
// The code either indicates a single byte to be output, or
// it indicates that a block should be copied from the ring
// buffer as it is a repeat of a sequence earlier in the
// stream.
if (code < 0x100) {
output_byte(decoder, buf, &result, (uint8_t) code);
} else {
unsigned int count, start, i, pos, offset;
// Read the offset into the history at which to start
// copying.
if (!read_offset(decoder, &offset)) {
return 0;
}
count = code - 0x100U + COPY_THRESHOLD;
start = decoder->ringbuf_pos - offset + RING_BUFFER_SIZE - 1;
// Copy from history into output buffer:
for (i = 0; i < count; ++i) {
pos = (start + i) % RING_BUFFER_SIZE;
output_byte(decoder, buf, &result,
decoder->ringbuf[pos]);
}
}
return result;
}
LHADecoderType lha_lh1_decoder = {
lha_lh1_init,
NULL,
lha_lh1_read,
sizeof(LHALH1Decoder),
OUTPUT_BUFFER_SIZE,
RING_BUFFER_SIZE
};

View file

@ -0,0 +1,49 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Decoder for the -lh5- algorithm.
//
// This is the "new" algorithm that appeared in LHA v2, replacing
// the older -lh1-. -lh4- seems to be identical to -lh5-.
//
// 16 KiB history ring buffer:
#define HISTORY_BITS 14 /* 2^14 = 16384 */
// Number of bits to encode HISTORY_BITS:
#define OFFSET_BITS 4
// Name of the variable for the encoder:
#define DECODER_NAME lha_lh5_decoder
// Generate a second decoder for lh4 that just has a different
// block size.
#define DECODER2_NAME lha_lh4_decoder
// The actual algorithm code is contained in lh_new_decoder.c, which
// acts as a template for -lh4-, -lh5-, -lh6- and -lh7-.
#include "lh_new_decoder.c"

View file

@ -0,0 +1,43 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Decoder for the -lh6- algorithm.
//
// -lh6- is an "extended" version of -lh5- introduced in LHA v2.66.
//
// 64 KiB history ring buffer:
#define HISTORY_BITS 16 /* 2^16 = 65536 */
// Number of bits to encode HISTORY_BITS:
#define OFFSET_BITS 5
// Name of the variable for the encoder:
#define DECODER_NAME lha_lh6_decoder
// The actual algorithm code is contained in lh_new_decoder.c, which
// acts as a template for -lh4-, -lh5-, -lh6- and -lh7-.
#include "lh_new_decoder.c"

View file

@ -0,0 +1,44 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Decoder for the -lh7- algorithm.
//
// -lh7- is an extension of the -lh5- algorithm introduced in
// LHA 2.67 beta.
//
// 128 KiB history ring buffer:
#define HISTORY_BITS 17 /* 2^17 = 131072 */
// Number of bits to encode HISTORY_BITS:
#define OFFSET_BITS 5
// Name of the variable for the encoder:
#define DECODER_NAME lha_lh7_decoder
// The actual algorithm code is contained in lh_new_decoder.c, which
// acts as a template for -lh4-, -lh5-, -lh6- and -lh7-.
#include "lh_new_decoder.c"

View file

@ -0,0 +1,569 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Decoder for "new-style" LHA algorithms, used with LHA v2 and onwards
// (-lh4-, -lh5-, -lh6-, -lh7-).
//
// This file is designed to be a template. It is #included by other
// files to generate an optimized decoder.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "lha_decoder.h"
#include "bit_stream_reader.c"
// Include tree decoder.
typedef uint16_t TreeElement;
#include "tree_decode.c"
// Threshold for copying. The first copy code starts from here.
#define COPY_THRESHOLD 3 /* bytes */
// Ring buffer containing history has a size that is a power of two.
// The number of bits is specified.
#define RING_BUFFER_SIZE (1 << HISTORY_BITS)
// Required size of the output buffer. At most, a single call to read()
// might result in a copy of the entire ring buffer.
#define OUTPUT_BUFFER_SIZE RING_BUFFER_SIZE
// Number of different command codes. 0-255 range are literal byte
// values, while higher values indicate copy from history.
#define NUM_CODES 510
// Number of possible codes in the "temporary table" used to encode the
// codes table.
#define MAX_TEMP_CODES 20
typedef struct {
// Input bit stream.
BitStreamReader bit_stream_reader;
// Ring buffer of past data. Used for position-based copies.
uint8_t ringbuf[RING_BUFFER_SIZE];
unsigned int ringbuf_pos;
// Number of commands remaining before we start a new block.
unsigned int block_remaining;
// Table used for the code tree.
TreeElement code_tree[NUM_CODES * 2];
// Table used to encode the offset tree, used to read offsets
// into the history buffer. This same table is also used to
// encode the temp-table, which is bigger; hence the size.
TreeElement offset_tree[MAX_TEMP_CODES * 2];
} LHANewDecoder;
// Initialize the history ring buffer.
static void init_ring_buffer(LHANewDecoder *decoder)
{
memset(decoder->ringbuf, ' ', RING_BUFFER_SIZE);
decoder->ringbuf_pos = 0;
}
static int lha_lh_new_init(void *data, LHADecoderCallback callback,
void *callback_data)
{
LHANewDecoder *decoder = data;
// Initialize input stream reader.
bit_stream_reader_init(&decoder->bit_stream_reader,
callback, callback_data);
// Initialize data structures.
init_ring_buffer(decoder);
// First read starts the first block.
decoder->block_remaining = 0;
// Initialize tree tables to a known state.
init_tree(decoder->code_tree, NUM_CODES * 2);
init_tree(decoder->offset_tree, MAX_TEMP_CODES * 2);
return 1;
}
// Read a length value - this is normally a value in the 0-7 range, but
// sometimes can be longer.
static int read_length_value(LHANewDecoder *decoder)
{
int i, len;
len = read_bits(&decoder->bit_stream_reader, 3);
if (len < 0) {
return -1;
}
if (len == 7) {
// Read more bits to extend the length until we reach a '0'.
for (;;) {
i = read_bit(&decoder->bit_stream_reader);
if (i < 0) {
return -1;
} else if (i == 0) {
break;
}
++len;
}
}
return len;
}
// Read the values from the input stream that define the temporary table
// used for encoding the code table.
static int read_temp_table(LHANewDecoder *decoder)
{
int i, j, n, len, code;
uint8_t code_lengths[MAX_TEMP_CODES];
// How many codes?
n = read_bits(&decoder->bit_stream_reader, 5);
if (n < 0) {
return 0;
}
// n=0 is a special case, meaning only a single code that
// is of zero length.
if (n == 0) {
code = read_bits(&decoder->bit_stream_reader, 5);
if (code < 0) {
return 0;
}
set_tree_single(decoder->offset_tree, code);
return 1;
}
// Enforce a hard limit on the number of codes.
if (n > MAX_TEMP_CODES) {
n = MAX_TEMP_CODES;
}
// Read the length of each code.
for (i = 0; i < n; ++i) {
len = read_length_value(decoder);
if (len < 0) {
return 0;
}
code_lengths[i] = len;
// After the first three lengths, there is a 2-bit
// field to allow skipping over up to a further three
// lengths. Not sure of the reason for this ...
if (i == 2) {
len = read_bits(&decoder->bit_stream_reader, 2);
if (len < 0) {
return 0;
}
for (j = 0; j < len; ++j) {
++i;
code_lengths[i] = 0;
}
}
}
build_tree(decoder->offset_tree, MAX_TEMP_CODES * 2, code_lengths, n);
return 1;
}
// Code table codes can indicate that a sequence of codes should be
// skipped over. The number to skip is Huffman-encoded. Given a skip
// range (0-2), this reads the number of codes to skip over.
static int read_skip_count(LHANewDecoder *decoder, int skiprange)
{
int result;
// skiprange=0 => 1 code.
if (skiprange == 0) {
result = 1;
}
// skiprange=1 => 3-18 codes.
else if (skiprange == 1) {
result = read_bits(&decoder->bit_stream_reader, 4);
if (result < 0) {
return -1;
}
result += 3;
}
// skiprange=2 => 20+ codes.
else {
result = read_bits(&decoder->bit_stream_reader, 9);
if (result < 0) {
return -1;
}
result += 20;
}
return result;
}
static int read_code_table(LHANewDecoder *decoder)
{
int i, j, n, skip_count, code;
uint8_t code_lengths[NUM_CODES];
// How many codes?
n = read_bits(&decoder->bit_stream_reader, 9);
if (n < 0) {
return 0;
}
// n=0 implies a single code of zero length; all inputs
// decode to the same code.
if (n == 0) {
code = read_bits(&decoder->bit_stream_reader, 9);
if (code < 0) {
return 0;
}
set_tree_single(decoder->code_tree, code);
return 1;
}
if (n > NUM_CODES) {
n = NUM_CODES;
}
// Read the length of each code.
// The lengths are encoded using the temp-table previously read;
// offset_tree is reused temporarily to hold it.
i = 0;
while (i < n) {
code = read_from_tree(&decoder->bit_stream_reader,
decoder->offset_tree);
if (code < 0) {
return 0;
}
// The code that was read can have different meanings.
// If in the range 0-2, it indicates that a number of
// codes are unused and should be skipped over.
// Values greater than two represent a frequency count.
if (code <= 2) {
skip_count = read_skip_count(decoder, code);
if (skip_count < 0) {
return 0;
}
for (j = 0; j < skip_count && i < n; ++j) {
code_lengths[i] = 0;
++i;
}
} else {
code_lengths[i] = code - 2;
++i;
}
}
build_tree(decoder->code_tree, NUM_CODES * 2, code_lengths, n);
return 1;
}
static int read_offset_table(LHANewDecoder *decoder)
{
int i, n, len, code;
uint8_t code_lengths[HISTORY_BITS];
// How many codes?
n = read_bits(&decoder->bit_stream_reader, OFFSET_BITS);
if (n < 0) {
return 0;
}
// n=0 is a special case, meaning only a single code that
// is of zero length.
if (n == 0) {
code = read_bits(&decoder->bit_stream_reader, OFFSET_BITS);
if (code < 0) {
return 0;
}
set_tree_single(decoder->offset_tree, code);
return 1;
}
// Enforce a hard limit on the number of codes.
if (n > HISTORY_BITS) {
n = HISTORY_BITS;
}
// Read the length of each code.
for (i = 0; i < n; ++i) {
len = read_length_value(decoder);
if (len < 0) {
return 0;
}
code_lengths[i] = len;
}
build_tree(decoder->offset_tree, MAX_TEMP_CODES * 2, code_lengths, n);
return 1;
}
// Start reading a new block from the input stream.
static int start_new_block(LHANewDecoder *decoder)
{
int len;
// Read length of new block (in commands).
len = read_bits(&decoder->bit_stream_reader, 16);
if (len < 0) {
return 0;
}
decoder->block_remaining = (size_t) len;
// Read the temporary decode table, used to encode the codes table.
// The position table data structure is reused for this.
if (!read_temp_table(decoder)) {
return 0;
}
// Read the code table; this is encoded *using* the temp table.
if (!read_code_table(decoder)) {
return 0;
}
// Read the offset table.
if (!read_offset_table(decoder)) {
return 0;
}
return 1;
}
// Read the next code from the input stream. Returns the code, or -1 if
// an error occurred.
static int read_code(LHANewDecoder *decoder)
{
return read_from_tree(&decoder->bit_stream_reader, decoder->code_tree);
}
// Read an offset distance from the input stream.
// Returns the code, or -1 if an error occurred.
static int read_offset_code(LHANewDecoder *decoder)
{
int bits, result;
bits = read_from_tree(&decoder->bit_stream_reader,
decoder->offset_tree);
if (bits < 0) {
return -1;
}
// The code read indicates the length of the offset in bits.
//
// The returned value looks like this:
// bits = 0 -> 0
// bits = 1 -> 1
// bits = 2 -> 1x
// bits = 3 -> 1xx
// bits = 4 -> 1xxx
// etc.
if (bits == 0) {
return 0;
} else if (bits == 1) {
return 1;
} else {
result = read_bits(&decoder->bit_stream_reader, bits - 1);
if (result < 0) {
return -1;
}
return result + (1 << (bits - 1));
}
}
// Add a byte value to the output stream.
static void output_byte(LHANewDecoder *decoder, uint8_t *buf,
size_t *buf_len, uint8_t b)
{
buf[*buf_len] = b;
++*buf_len;
decoder->ringbuf[decoder->ringbuf_pos] = b;
decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
}
// Copy a block from the history buffer.
static void copy_from_history(LHANewDecoder *decoder, uint8_t *buf,
size_t *buf_len, size_t count)
{
int offset;
unsigned int i, start;
offset = read_offset_code(decoder);
if (offset < 0) {
return;
}
start = decoder->ringbuf_pos + RING_BUFFER_SIZE
- (unsigned int) offset - 1;
for (i = 0; i < count; ++i) {
output_byte(decoder, buf, buf_len,
decoder->ringbuf[(start + i) % RING_BUFFER_SIZE]);
}
}
static size_t lha_lh_new_read(void *data, uint8_t *buf)
{
LHANewDecoder *decoder = data;
size_t result;
int code;
// Start of new block?
while (decoder->block_remaining == 0) {
if (!start_new_block(decoder)) {
return 0;
}
}
--decoder->block_remaining;
// Read next command from input stream.
result = 0;
code = read_code(decoder);
if (code < 0) {
return 0;
}
// The code may be either a literal byte value or a copy command.
if (code < 256) {
output_byte(decoder, buf, &result, (uint8_t) code);
} else {
copy_from_history(decoder, buf, &result,
code - 256 + COPY_THRESHOLD);
}
return result;
}
LHADecoderType DECODER_NAME = {
lha_lh_new_init,
NULL,
lha_lh_new_read,
sizeof(LHANewDecoder),
OUTPUT_BUFFER_SIZE,
RING_BUFFER_SIZE / 2
};
// This is a hack for -lh4-:
#ifdef DECODER2_NAME
LHADecoderType DECODER2_NAME = {
lha_lh_new_init,
NULL,
lha_lh_new_read,
sizeof(LHANewDecoder),
OUTPUT_BUFFER_SIZE,
RING_BUFFER_SIZE / 4
};
#endif

View file

@ -0,0 +1,160 @@
/*
Copyright (c) 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_LHA_ARCH_H
#define LHASA_LHA_ARCH_H
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#define LHA_ARCH_UNIX 1
#define LHA_ARCH_WINDOWS 2
#ifdef _WIN32
#define LHA_ARCH LHA_ARCH_WINDOWS
#else
#define LHA_ARCH LHA_ARCH_UNIX
#endif
typedef enum {
LHA_FILE_NONE,
LHA_FILE_FILE,
LHA_FILE_DIRECTORY,
LHA_FILE_ERROR,
} LHAFileType;
/**
* Cross-platform version of vasprintf().
*
* @param result Pointer to a variable to store the resulting string.
* @param fmt Format string.
* @param args Additional arguments for printf().
* @return Number of characters in resulting string, or -1 if
* an error occurred in generating the string.
*/
int lha_arch_vasprintf(char **result, char *fmt, va_list args);
/**
* Change the mode of the specified FILE handle to be binary mode.
*
* @param handle The FILE handle.
*/
void lha_arch_set_binary(FILE *handle);
/**
* Create a directory.
*
* @param path Path to the directory to create.
* @param unix_perms Unix permissions for the directory to create.
* @return Non-zero if the directory was created successfully.
*/
int lha_arch_mkdir(char *path, unsigned int unix_perms);
/**
* Change the Unix ownership of the specified file or directory.
* If this is not a Unix system, do nothing.
*
* @param filename Path to the file or directory.
* @param unix_uid The UID to set.
* @param unix_gid The GID to set.
* @return Non-zero if set successfully.
*/
int lha_arch_chown(char *filename, int unix_uid, int unix_gid);
/**
* Change the Unix permissions on the specified file or directory.
*
* @param filename Path to the file or directory.
* @param unix_perms The permissions to set.
* @return Non-zero if set successfully.
*/
int lha_arch_chmod(char *filename, int unix_perms);
/**
* Set the file creation / modification time on the specified file or
* directory.
*
* @param filename Path to the file or directory.
* @param timestamp The Unix timestamp to set.
* @return Non-zero if set successfully.
*/
int lha_arch_utime(char *filename, unsigned int timestamp);
/**
* Set the file creation, modification and access times for the
* specified file or directory, using 64-bit Windows timestamps.
*
* @param filename Path to the file or directory.
* @param creation_time 64-bit Windows FILETIME value for the
* creation time of the file.
* @param modification_time Modification time of the file.
* @param access_time Last access time of the file.
* @return Non-zero if set successfully.
*/
int lha_arch_set_windows_timestamps(char *filename,
uint64_t creation_time,
uint64_t modification_time,
uint64_t access_time);
/**
* Open a new file for writing.
*
* @param filename Path to the file or directory.
* @param unix_uid Unix UID to set for the new file, or -1 to not set.
* @param unix_gid Unix GID to set for the new file, or -1 to not set.
* @param unix_perms Unix permissions to set for the new file, or -1 to not
* set.
* @return Standard C file handle.
*/
FILE *lha_arch_fopen(char *filename, int unix_uid,
int unix_gid, int unix_perms);
/**
* Query whether the specified file exists.
*
* @param filename Path to the file.
* @return The type of file.
*/
LHAFileType lha_arch_exists(char *filename);
/**
* Create a symbolic link.
*
* If a file already exists at the location of the link to be created, it is
* overwritten.
*
* @param path Path to the symbolic link to create.
* @param target Target for the symbolic link.
* @return Non-zero for success.
*/
int lha_arch_symlink(char *path, char *target);
#endif /* ifndef LHASA_LHA_ARCH_H */

View file

@ -0,0 +1,173 @@
/*
Copyright (c) 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Architecture-specific files for compilation on Unix.
//
#define _GNU_SOURCE
#include "lha_arch.h"
#if LHA_ARCH == LHA_ARCH_UNIX
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
#include <sys/types.h>
// TODO: This file depends on vasprintf(), which is a non-standard
// function (_GNU_SOURCE above). Most modern Unix systems have an
// implementation of it, but develop a compatible workaround for
// operating systems that don't have it.
int lha_arch_vasprintf(char **result, char *fmt, va_list args)
{
return vasprintf(result, fmt, args);
}
void lha_arch_set_binary(FILE *handle)
{
// No-op on Unix systems: there is no difference between
// "text" and "binary" files.
}
int lha_arch_mkdir(char *path, unsigned int unix_perms)
{
return mkdir(path, unix_perms) == 0;
}
int lha_arch_chown(char *filename, int unix_uid, int unix_gid)
{
return chown(filename, unix_uid, unix_gid) == 0;
}
int lha_arch_chmod(char *filename, int unix_perms)
{
return chmod(filename, unix_perms) == 0;
}
int lha_arch_utime(char *filename, unsigned int timestamp)
{
struct utimbuf times;
times.actime = (time_t) timestamp;
times.modtime = (time_t) timestamp;
return utime(filename, &times) == 0;
}
FILE *lha_arch_fopen(char *filename, int unix_uid, int unix_gid, int unix_perms)
{
FILE *fstream;
int fileno;
// The O_EXCL flag will cause the open() below to fail if the
// file already exists. Remove it first.
unlink(filename);
// If we have file permissions, they must be set after the
// file is created and UID/GID have been set. When open()ing
// the file, create it with minimal permissions granted only
// to the current user.
// Use O_EXCL so that symlinks are not followed; this prevents
// a malicious symlink from overwriting arbitrary filesystem
// locations.
fileno = open(filename, O_CREAT|O_WRONLY|O_EXCL, 0600);
if (fileno < 0) {
return NULL;
}
// Set owner and group.
if (unix_uid >= 0) {
if (fchown(fileno, unix_uid, unix_gid) != 0) {
// On most Unix systems, only root can change
// ownership. But if we can't change ownership,
// it isn't a fatal error. So ignore the failure
// and continue.
// TODO: Implement some kind of alternate handling
// here?
/* close(fileno);
remove(filename);
return NULL; */
}
}
// Set file permissions.
// File permissions must be set *after* owner and group have
// been set; otherwise, we might briefly be granting permissions
// to the wrong group.
if (unix_perms >= 0) {
if (fchmod(fileno, unix_perms) != 0) {
close(fileno);
remove(filename);
return NULL;
}
}
// Create stdc FILE handle.
fstream = fdopen(fileno, "wb");
if (fstream == NULL) {
close(fileno);
remove(filename);
return NULL;
}
return fstream;
}
LHAFileType lha_arch_exists(char *filename)
{
struct stat statbuf;
if (stat(filename, &statbuf) != 0) {
if (errno == ENOENT) {
return LHA_FILE_NONE;
} else {
return LHA_FILE_ERROR;
}
}
if (S_ISDIR(statbuf.st_mode)) {
return LHA_FILE_DIRECTORY;
} else {
return LHA_FILE_FILE;
}
}
int lha_arch_symlink(char *path, char *target)
{
unlink(path);
return symlink(target, path) == 0;
}
#endif /* LHA_ARCH_UNIX */

View file

@ -0,0 +1,204 @@
/*
Copyright (c) 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Architecture-specific files for compilation on Windows.
// A work in progress.
//
#include "lha_arch.h"
#if LHA_ARCH == LHA_ARCH_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <fcntl.h>
#include <io.h>
#include <stdlib.h>
#include <stdint.h>
static uint64_t unix_epoch_offset = 0;
int lha_arch_vasprintf(char **result, char *fmt, va_list args)
{
int len;
len = vsnprintf(NULL, 0, fmt, args);
if (len >= 0) {
*result = malloc(len + 1);
if (*result != NULL) {
return vsprintf(*result, fmt, args);
}
}
*result = NULL;
return -1;
}
void lha_arch_set_binary(FILE *handle)
{
_setmode(_fileno(handle), _O_BINARY);
}
int lha_arch_mkdir(char *path, unsigned int unix_mode)
{
return CreateDirectoryA(path, NULL) != 0;
}
int lha_arch_chown(char *filename, int unix_uid, int unix_gid)
{
return 1;
}
int lha_arch_chmod(char *filename, int unix_perms)
{
return 1;
}
static int set_timestamps(char *filename,
FILETIME *creation_time,
FILETIME *modification_time,
FILETIME *access_time)
{
HANDLE file;
int result;
// Open file handle to the file to change.
// The FILE_FLAG_BACKUP_SEMANTICS flag is needed so that we
// can obtain handles to directories as well as files.
file = CreateFileA(filename,
FILE_WRITE_ATTRIBUTES,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
NULL);
if (file == INVALID_HANDLE_VALUE) {
return 0;
}
// Set the timestamp and close the file handle.
result = SetFileTime(file, creation_time,
access_time, modification_time);
CloseHandle(file);
return result != 0;
}
int lha_arch_set_windows_timestamps(char *filename,
uint64_t creation_time,
uint64_t modification_time,
uint64_t access_time)
{
FILETIME _creation_time;
FILETIME _modification_time;
FILETIME _access_time;
_creation_time.dwHighDateTime
= (uint32_t) ((creation_time >> 32) & 0xffffffff);
_creation_time.dwLowDateTime
= (uint32_t) (creation_time & 0xffffffff);
_modification_time.dwHighDateTime
= (uint32_t) ((modification_time >> 32) & 0xffffffff);
_modification_time.dwLowDateTime
= (uint32_t) (modification_time & 0xffffffff);
_access_time.dwHighDateTime
= (uint32_t) ((access_time >> 32) & 0xffffffff);
_access_time.dwLowDateTime
= (uint32_t) (access_time & 0xffffffff);
return set_timestamps(filename, &_creation_time,
&_modification_time, &_access_time);
}
int lha_arch_utime(char *filename, unsigned int timestamp)
{
SYSTEMTIME unix_epoch;
FILETIME filetime;
uint64_t ts_scaled;
// Calculate offset between Windows FILETIME Jan 1, 1601 epoch
// and Unix Jan 1, 1970 offset.
if (unix_epoch_offset == 0) {
unix_epoch.wYear = 1970;
unix_epoch.wMonth = 1;
unix_epoch.wDayOfWeek = 4; // Thursday
unix_epoch.wDay = 1;
unix_epoch.wHour = 0; // 00:00:00.0
unix_epoch.wMinute = 0;
unix_epoch.wSecond = 0;
unix_epoch.wMilliseconds = 0;
SystemTimeToFileTime(&unix_epoch, &filetime);
unix_epoch_offset = ((uint64_t) filetime.dwHighDateTime << 32)
+ filetime.dwLowDateTime;
}
// Convert to Unix FILETIME.
ts_scaled = (uint64_t) timestamp * 10000000 + unix_epoch_offset;
filetime.dwHighDateTime = (uint32_t) ((ts_scaled >> 32) & 0xffffffff);
filetime.dwLowDateTime = (uint32_t) (ts_scaled & 0xffffffff);
// Set all timestamps to the same value:
return set_timestamps(filename, &filetime, &filetime, &filetime);
}
FILE *lha_arch_fopen(char *filename, int unix_uid, int unix_gid, int unix_perms)
{
return fopen(filename, "wb");
}
LHAFileType lha_arch_exists(char *filename)
{
WIN32_FILE_ATTRIBUTE_DATA file_attr;
// Read file attributes to determine the type of the file.
// If this fails, assume the file does not exist.
if (GetFileAttributesExA(filename, GetFileExInfoStandard,
&file_attr)) {
if ((file_attr.dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY) != 0) {
return LHA_FILE_DIRECTORY;
} else {
return LHA_FILE_FILE;
}
}
return LHA_FILE_NONE;
}
int lha_arch_symlink(char *path, char *target)
{
// No-op.
return 1;
}
#endif /* LHA_ARCH_WINDOWS */

View file

@ -0,0 +1,159 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "crc16.h"
#include "lha_decoder.h"
#include "lha_basic_reader.h"
struct _LHABasicReader {
LHAInputStream *stream;
LHAFileHeader *curr_file;
size_t curr_file_remaining;
int eof;
};
LHABasicReader *lha_basic_reader_new(LHAInputStream *stream)
{
LHABasicReader *reader;
reader = calloc(1, sizeof(LHABasicReader));
if (reader == NULL) {
return NULL;
}
reader->stream = stream;
reader->curr_file = NULL;
reader->curr_file_remaining = 0;
reader->eof = 0;
return reader;
}
void lha_basic_reader_free(LHABasicReader *reader)
{
if (reader->curr_file != NULL) {
lha_file_header_free(reader->curr_file);
}
free(reader);
}
LHAFileHeader *lha_basic_reader_curr_file(LHABasicReader *reader)
{
return reader->curr_file;
}
LHAFileHeader *lha_basic_reader_next_file(LHABasicReader *reader)
{
// Free the current file header and skip over any remaining
// compressed data that hasn't been read yet.
if (reader->curr_file != NULL) {
lha_file_header_free(reader->curr_file);
reader->curr_file = NULL;
if (!lha_input_stream_skip(reader->stream,
reader->curr_file_remaining)) {
reader->eof = 1;
}
}
if (reader->eof) {
return NULL;
}
// Read the header for the next file.
reader->curr_file = lha_file_header_read(reader->stream);
if (reader->curr_file == NULL) {
reader->eof = 1;
return NULL;
}
reader->curr_file_remaining = reader->curr_file->compressed_length;
return reader->curr_file;
}
size_t lha_basic_reader_read_compressed(LHABasicReader *reader, void *buf,
size_t buf_len)
{
size_t bytes;
if (reader->eof || reader->curr_file_remaining == 0) {
return 0;
}
// Read up to the number of bytes of compressed data remaining.
if (buf_len > reader->curr_file_remaining) {
bytes = reader->curr_file_remaining;
} else {
bytes = buf_len;
}
if (!lha_input_stream_read(reader->stream, buf, bytes)) {
reader->eof = 1;
return 0;
}
// Update counter and return success.
reader->curr_file_remaining -= bytes;
return bytes;
}
static size_t decoder_callback(void *buf, size_t buf_len, void *user_data)
{
return lha_basic_reader_read_compressed(user_data, buf, buf_len);
}
// Create the decoder structure to decode the current file.
LHADecoder *lha_basic_reader_decode(LHABasicReader *reader)
{
LHADecoderType *dtype;
if (reader->curr_file == NULL) {
return NULL;
}
// Look up the decoder to use for this compression method.
dtype = lha_decoder_for_name(reader->curr_file->compress_method);
if (dtype == NULL) {
return NULL;
}
// Create decoder.
return lha_decoder_new(dtype, decoder_callback, reader,
reader->curr_file->length);
}

View file

@ -0,0 +1,99 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_LHA_BASIC_READER_H
#define LHASA_LHA_BASIC_READER_H
#include "lha_input_stream.h"
#include "lha_file_header.h"
#include "lha_decoder.h"
/**
* Basic LHA stream reader.
*
* The basic reader structure just reads @ref LHAFileHeader structures
* from an input stream and decompresses files. The more elaborate
* @ref LHAReader builds upon this to offer more complicated functionality.
*/
typedef struct _LHABasicReader LHABasicReader;
/**
* Create a new LHA reader to read data from an input stream.
*
* @param stream The input stream to read from.
* @return Pointer to an LHABasicReader structure, or NULL for error.
*/
LHABasicReader *lha_basic_reader_new(LHAInputStream *stream);
/**
* Free an LHA reader.
*
* @param reader The LHABasicReader structure.
*/
void lha_basic_reader_free(LHABasicReader *reader);
/**
* Return the last file read by @ref lha_basic_reader_next_file.
*
* @param reader The LHABasicReader structure.
* @return Last file returned by @ref lha_basic_reader_next_file,
* or NULL if no file has been read yet.
*/
LHAFileHeader *lha_basic_reader_curr_file(LHABasicReader *reader);
/**
* Read the header of the next archived file from the input stream.
*
* @param reader The LHABasicReader structure.
* @return Pointer to an LHAFileHeader structure, or NULL if
* an error occurred. This pointer is only valid until
* the next time that lha_basic_reader_next_file is called.
*/
LHAFileHeader *lha_basic_reader_next_file(LHABasicReader *reader);
/**
* Read some of the compressed data for the current archived file.
*
* @param reader The LHABasicReader structure.
* @param buf Pointer to the buffer in which to store the data.
* @param buf_len Size of the buffer, in bytes.
*/
size_t lha_basic_reader_read_compressed(LHABasicReader *reader, void *buf,
size_t buf_len);
/**
* Create a decoder object to decompress the compressed data in the
* current file.
*
* @param reader The LHABasicReader structure.
* @return Pointer to a @ref LHADecoder structure to decompress
* the current file, or NULL for failure.
*/
LHADecoder *lha_basic_reader_decode(LHABasicReader *reader);
#endif /* #ifndef LHASA_LHA_BASIC_READER_H */

View file

@ -0,0 +1,256 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "crc16.h"
#include "lha_decoder.h"
// Null decoder, used for -lz4-, -lh0-, -pm0-:
extern LHADecoderType lha_null_decoder;
// LArc compression algorithms:
extern LHADecoderType lha_lz5_decoder;
extern LHADecoderType lha_lzs_decoder;
// LHarc compression algorithms:
extern LHADecoderType lha_lh1_decoder;
extern LHADecoderType lha_lh4_decoder;
extern LHADecoderType lha_lh5_decoder;
extern LHADecoderType lha_lh6_decoder;
extern LHADecoderType lha_lh7_decoder;
extern LHADecoderType lha_lhx_decoder;
// PMarc compression algorithms:
extern LHADecoderType lha_pm1_decoder;
extern LHADecoderType lha_pm2_decoder;
static struct {
char *name;
LHADecoderType *dtype;
} decoders[] = {
{ "-lz4-", &lha_null_decoder },
{ "-lz5-", &lha_lz5_decoder },
{ "-lzs-", &lha_lzs_decoder },
{ "-lh0-", &lha_null_decoder },
{ "-lh1-", &lha_lh1_decoder },
{ "-lh4-", &lha_lh4_decoder },
{ "-lh5-", &lha_lh5_decoder },
{ "-lh6-", &lha_lh6_decoder },
{ "-lh7-", &lha_lh7_decoder },
{ "-lhx-", &lha_lhx_decoder },
{ "-pm0-", &lha_null_decoder },
{ "-pm1-", &lha_pm1_decoder },
{ "-pm2-", &lha_pm2_decoder },
};
LHADecoder *lha_decoder_new(LHADecoderType *dtype,
LHADecoderCallback callback,
void *callback_data,
size_t stream_length)
{
LHADecoder *decoder;
void *extra_data;
// Space is allocated together: the LHADecoder structure,
// then the private data area used by the algorithm,
// followed by the output buffer,
decoder = calloc(1, sizeof(LHADecoder) + dtype->extra_size
+ dtype->max_read);
if (decoder == NULL) {
return NULL;
}
decoder->dtype = dtype;
decoder->progress_callback = NULL;
decoder->last_block = UINT_MAX;
decoder->outbuf_pos = 0;
decoder->outbuf_len = 0;
decoder->stream_pos = 0;
decoder->stream_length = stream_length;
decoder->decoder_failed = 0;
decoder->crc = 0;
// Private data area follows the structure.
extra_data = decoder + 1;
decoder->outbuf = ((uint8_t *) extra_data) + dtype->extra_size;
if (dtype->init != NULL
&& !dtype->init(extra_data, callback, callback_data)) {
free(decoder);
return NULL;
}
return decoder;
}
LHADecoderType *lha_decoder_for_name(char *name)
{
unsigned int i;
for (i = 0; i < sizeof(decoders) / sizeof(*decoders); ++i) {
if (!strcmp(name, decoders[i].name)) {
return decoders[i].dtype;
}
}
// Unknown?
return NULL;
}
void lha_decoder_free(LHADecoder *decoder)
{
if (decoder->dtype->free != NULL) {
decoder->dtype->free(decoder + 1);
}
free(decoder);
}
// Check if the stream has progressed far enough that the progress callback
// should be invoked again.
static void check_progress_callback(LHADecoder *decoder)
{
unsigned int block;
block = (decoder->stream_pos + decoder->dtype->block_size - 1)
/ decoder->dtype->block_size;
// If the stream has advanced by another block, invoke the callback
// function. Invoke it multiple times if it has advanced by
// more than one block.
while (decoder->last_block != block) {
++decoder->last_block;
decoder->progress_callback(decoder->last_block,
decoder->total_blocks,
decoder->progress_callback_data);
}
}
void lha_decoder_monitor(LHADecoder *decoder,
LHADecoderProgressCallback callback,
void *callback_data)
{
decoder->progress_callback = callback;
decoder->progress_callback_data = callback_data;
decoder->total_blocks
= (decoder->stream_length + decoder->dtype->block_size - 1)
/ decoder->dtype->block_size;
check_progress_callback(decoder);
}
size_t lha_decoder_read(LHADecoder *decoder, uint8_t *buf, size_t buf_len)
{
size_t filled, bytes;
// When we reach the end of the stream, we must truncate the
// decompressed data at exactly the right point (stream_length),
// or we may read a few extra false byte(s) by mistake.
// Reduce buf_len when we get to the end to limit it to the
// real number of remaining characters.
if (decoder->stream_pos + buf_len > decoder->stream_length) {
buf_len = decoder->stream_length - decoder->stream_pos;
}
// Try to fill up the buffer that has been passed with as much
// data as possible. Each call to read() will fill up outbuf
// with some data; this is then copied into buf, with some
// data left at the end for the next call.
filled = 0;
while (filled < buf_len) {
// Try to empty out some of the output buffer first.
bytes = decoder->outbuf_len - decoder->outbuf_pos;
if (buf_len - filled < bytes) {
bytes = buf_len - filled;
}
memcpy(buf + filled, decoder->outbuf + decoder->outbuf_pos,
bytes);
decoder->outbuf_pos += bytes;
filled += bytes;
// If we previously encountered a failure reading from
// the decoder, don't try to call the read function again.
if (decoder->decoder_failed) {
break;
}
// If outbuf is now empty, we can process another run to
// re-fill it.
if (decoder->outbuf_pos >= decoder->outbuf_len) {
decoder->outbuf_len
= decoder->dtype->read(decoder + 1,
decoder->outbuf);
decoder->outbuf_pos = 0;
}
// No more data to be read?
if (decoder->outbuf_len == 0) {
decoder->decoder_failed = 1;
break;
}
}
// Update CRC.
lha_crc16_buf(&decoder->crc, buf, filled);
// Track stream position.
decoder->stream_pos += filled;
// Check progress callback, if one is set:
if (decoder->progress_callback != NULL) {
check_progress_callback(decoder);
}
return filled;
}
uint16_t lha_decoder_get_crc(LHADecoder *decoder)
{
return decoder->crc;
}
size_t lha_decoder_get_length(LHADecoder *decoder)
{
return decoder->stream_pos;
}

View file

@ -0,0 +1,114 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_LHA_DECODER_H
#define LHASA_LHA_DECODER_H
#include "public/lha_decoder.h"
struct _LHADecoderType {
/**
* Callback function to initialize the decoder.
*
* @param extra_data Pointer to the extra data area allocated for
* the decoder.
* @param callback Callback function to invoke to read more
* compressed data.
* @param callback_data Extra pointer to pass to the callback.
* @return Non-zero for success.
*/
int (*init)(void *extra_data,
LHADecoderCallback callback,
void *callback_data);
/**
* Callback function to free the decoder.
*
* @param extra_data Pointer to the extra data area allocated for
* the decoder.
*/
void (*free)(void *extra_data);
/**
* Callback function to read (ie. decompress) data from the
* decoder.
*
* @param extra_data Pointer to the decoder's custom data.
* @param buf Pointer to the buffer in which to store
* the decompressed data. The buffer is
* at least 'max_read' bytes in size.
* @return Number of bytes decompressed.
*/
size_t (*read)(void *extra_data, uint8_t *buf);
/** Number of bytes of extra data to allocate for the decoder. */
size_t extra_size;
/** Maximum number of bytes that might be put into the buffer by
a single call to read() */
size_t max_read;
/** Block size. Used for calculating number of blocks for
progress bar. */
size_t block_size;
};
struct _LHADecoder {
/** Type of decoder (algorithm) */
LHADecoderType *dtype;
/** Callback function to monitor decoder progress. */
LHADecoderProgressCallback progress_callback;
void *progress_callback_data;
/** Last announced block position, for progress callback. */
unsigned int last_block, total_blocks;
/** Current position in the decode stream, and total length. */
size_t stream_pos, stream_length;
/** Output buffer, containing decoded data not yet returned. */
unsigned int outbuf_pos, outbuf_len;
uint8_t *outbuf;
/** If true, the decoder read() function returned zero. */
unsigned int decoder_failed;
/** Current CRC of the output stream. */
uint16_t crc;
};
#endif /* #ifndef LHASA_LHA_DECODER_H */

View file

@ -0,0 +1,60 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "lha_endian.h"
uint16_t lha_decode_uint16(uint8_t *buf)
{
return (uint16_t) (buf[0] | (buf[1] << 8));
}
uint32_t lha_decode_uint32(uint8_t *buf)
{
return ((uint32_t) buf[0])
| ((uint32_t) buf[1] << 8)
| ((uint32_t) buf[2] << 16)
| ((uint32_t) buf[3] << 24);
}
uint64_t lha_decode_uint64(uint8_t *buf)
{
return ((uint64_t) buf[0])
| ((uint64_t) buf[1] << 8)
| ((uint64_t) buf[2] << 16)
| ((uint64_t) buf[3] << 24)
| ((uint64_t) buf[4] << 32)
| ((uint64_t) buf[5] << 40)
| ((uint64_t) buf[6] << 48)
| ((uint64_t) buf[7] << 56);
}
uint16_t lha_decode_be_uint16(uint8_t *buf)
{
return (uint16_t) ((buf[0] << 8) | buf[1]);
}
uint32_t lha_decode_be_uint32(uint8_t *buf)
{
return ((uint32_t) buf[0] << 24)
| ((uint32_t) buf[1] << 16)
| ((uint32_t) buf[2] << 8)
| ((uint32_t) buf[3]);
}

View file

@ -0,0 +1,72 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_LHA_ENDIAN_H
#define LHASA_LHA_ENDIAN_H
#include <inttypes.h>
/**
* Decode a 16-bit little-endian unsigned integer.
*
* @param buf Pointer to buffer containing value to decode.
* @return Decoded value.
*/
uint16_t lha_decode_uint16(uint8_t *buf);
/**
* Decode a 32-bit little-endian unsigned integer.
*
* @param buf Pointer to buffer containing value to decode.
* @return Decoded value.
*/
uint32_t lha_decode_uint32(uint8_t *buf);
/**
* Decode a 64-bit little-endian unsigned integer.
*
* @param buf Pointer to buffer containing value to decode.
* @return Decoded value.
*/
uint64_t lha_decode_uint64(uint8_t *buf);
/**
* Decode a 16-bit big-endian unsigned integer.
*
* @param buf Pointer to buffer containing value to decode.
* @return Decoded value.
*/
uint16_t lha_decode_be_uint16(uint8_t *buf);
/**
* Decode a 32-bit big-endian unsigned integer.
*
* @param buf Pointer to buffer containing value to decode.
* @return Decoded value.
*/
uint32_t lha_decode_be_uint32(uint8_t *buf);
#endif /* #ifndef LHASA_LHA_ENDIAN_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,68 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_LHA_FILE_HEADER_H
#define LHASA_LHA_FILE_HEADER_H
#include "public/lha_file_header.h"
#include "lha_input_stream.h"
/**
* Read a file header from the input stream.
*
* @param stream The input stream to read from.
* @return Pointer to a new LHAFileHeader structure, or NULL
* if an error occurred or a valid header could not
* be read.
*/
LHAFileHeader *lha_file_header_read(LHAInputStream *stream);
/**
* Free a file header structure.
*
* @param header The file header to free.
*/
void lha_file_header_free(LHAFileHeader *header);
/**
* Add a reference to the specified file header, to stop it from being
* freed.
*
* @param header The file header to add a reference to.
*/
void lha_file_header_add_ref(LHAFileHeader *header);
/**
* Get the full path for the given file header.
*
* @param header Pointer to the file header structure.
* @return Pointer to an allocated string containing the full
* file or directory path, or NULL for failure. The
* string must be freed by the caller.
*/
char *lha_file_header_full_path(LHAFileHeader *header);
#endif /* #ifndef LHASA_LHA_FILE_HEADER_H */

View file

@ -0,0 +1,403 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include "lha_arch.h"
#include "lha_input_stream.h"
// Maximum length of the self-extractor header.
// If we don't find an LHA file header after this many bytes, give up.
#define MAX_SFX_HEADER_LEN 65536
// Size of the lead-in buffer used to skip the self-extractor.
#define LEADIN_BUFFER_LEN 24
// Magic string to detect an Amiga LhASFX self-extracting file.
// This type of self-extractor is special because the program itself
// contains a mini-LHA file that must be skipped over to get to
// the real one.
#define AMIGA_LHASFX_ID "LhASFX V1.2,"
typedef enum {
LHA_INPUT_STREAM_INIT,
LHA_INPUT_STREAM_READING,
LHA_INPUT_STREAM_FAIL
} LHAInputStreamState;
struct _LHAInputStream {
const LHAInputStreamType *type;
void *handle;
LHAInputStreamState state;
uint8_t leadin[LEADIN_BUFFER_LEN];
size_t leadin_len;
};
LHAInputStream *lha_input_stream_new(const LHAInputStreamType *type,
void *handle)
{
LHAInputStream *result;
result = calloc(1, sizeof(LHAInputStream));
if (result == NULL) {
return NULL;
}
result->type = type;
result->handle = handle;
result->leadin_len = 0;
result->state = LHA_INPUT_STREAM_INIT;
return result;
}
void lha_input_stream_free(LHAInputStream *stream)
{
// Close the input stream.
if (stream->type->close != NULL) {
stream->type->close(stream->handle);
}
free(stream);
}
// Check if the specified buffer is the start of a file header.
static int file_header_match(uint8_t *buf)
{
if (buf[2] != '-' || buf[6] != '-') {
return 0;
}
// LHA algorithm?
if (buf[3] == 'l' && buf[4] == 'h') {
return 1;
}
// LArc algorithm (lz4, lz5, lzs)?
if (buf[3] == 'l' && buf[4] == 'z'
&& (buf[5] == '4' || buf[5] == '5' || buf[5] == 's')) {
return 1;
}
// PMarc algorithm? (pm0, pm2)
// Note: PMarc SFX archives have a -pms- string in them that must
// be ignored.
if (buf[3] == 'p' && buf[4] == 'm' && buf[5] != 's') {
return 1;
}
return 0;
}
// Empty some of the bytes from the start of the lead-in buffer.
static void empty_leadin(LHAInputStream *stream, size_t bytes)
{
memmove(stream->leadin, stream->leadin + bytes,
stream->leadin_len - bytes);
stream->leadin_len -= bytes;
}
// Read bytes from the input stream into the specified buffer.
static int do_read(LHAInputStream *stream, void *buf, size_t buf_len)
{
return stream->type->read(stream->handle, buf, buf_len);
}
// Skip the self-extractor header at the start of the file.
// Returns non-zero if a header was found.
static int skip_sfx(LHAInputStream *stream)
{
size_t filepos;
unsigned int i;
int skip_files;
int read;
filepos = 0;
skip_files = 0;
while (filepos < MAX_SFX_HEADER_LEN) {
// Add some more bytes to the lead-in buffer:
read = do_read(stream, stream->leadin + stream->leadin_len,
LEADIN_BUFFER_LEN - stream->leadin_len);
if (read <= 0) {
break;
}
stream->leadin_len += (unsigned int) read;
// Check the lead-in buffer for a file header.
for (i = 0; i + 12 < stream->leadin_len; ++i) {
if (file_header_match(stream->leadin + i)) {
if (skip_files == 0) {
empty_leadin(stream, i);
return 1;
} else {
--skip_files;
}
}
// Detect Amiga self-extractor.
if (!memcmp(stream->leadin + i, AMIGA_LHASFX_ID,
strlen(AMIGA_LHASFX_ID))) {
skip_files = 1;
}
}
empty_leadin(stream, i);
filepos += i;
}
return 0;
}
int lha_input_stream_read(LHAInputStream *stream, void *buf, size_t buf_len)
{
size_t total_bytes, n;
int result;
// Start of the stream? Skip self-extract header, if there is one.
if (stream->state == LHA_INPUT_STREAM_INIT) {
if (skip_sfx(stream)) {
stream->state = LHA_INPUT_STREAM_READING;
} else {
stream->state = LHA_INPUT_STREAM_FAIL;
}
}
if (stream->state == LHA_INPUT_STREAM_FAIL) {
return 0;
}
// Now fill the result buffer. Start by emptying the lead-in buffer.
total_bytes = 0;
if (stream->leadin_len > 0) {
if (buf_len < stream->leadin_len) {
n = buf_len;
} else {
n = stream->leadin_len;
}
memcpy(buf, stream->leadin, n);
empty_leadin(stream, n);
total_bytes += n;
}
// Read from the input stream.
if (total_bytes < buf_len) {
result = do_read(stream, (uint8_t *) buf + total_bytes,
buf_len - total_bytes);
if (result > 0) {
total_bytes += (unsigned int) result;
}
}
// Only successful if the complete buffer is filled.
return total_bytes == buf_len;
}
int lha_input_stream_skip(LHAInputStream *stream, size_t bytes)
{
// If we have a dedicated skip function, use it; otherwise,
// the read function can be used to perform a skip.
if (stream->type->skip != NULL) {
return stream->type->skip(stream->handle, bytes);
} else {
uint8_t data[32];
unsigned int len;
int result;
while (bytes > 0) {
// Read as many bytes left as possible to fit in
// the buffer:
if (bytes > sizeof(data)) {
len = sizeof(data);
} else {
len = bytes;
}
result = do_read(stream, data, len);
if (result < 0) {
return 0;
}
bytes -= (unsigned int) result;
}
return 1;
}
}
// Read data from a FILE * source.
static int file_source_read(void *handle, void *buf, size_t buf_len)
{
size_t bytes_read;
FILE *fh = handle;
bytes_read = fread(buf, 1, buf_len, fh);
// If an error occurs, zero is returned; however, it may also
// indicate end of file.
if (bytes_read == 0 && !feof(fh)) {
return -1;
}
return (int) bytes_read;
}
// "Fallback" skip for file source that uses fread(), for unseekable
// streams.
static int file_source_skip_fallback(FILE *handle, size_t bytes)
{
uint8_t data[32];
unsigned int len;
int result;
while (bytes > 0) {
if (bytes > sizeof(data)) {
len = sizeof(data);
} else {
len = bytes;
}
result = fread(data, 1, len, handle);
if (result != (int) len) {
return 0;
}
bytes -= len;
}
return 1;
}
// Seek forward in a FILE * input stream.
static int file_source_skip(void *handle, size_t bytes)
{
int result;
// If this is an unseekable stream of some kind, always use the
// fallback behavior, as at least this is guaranteed to work.
// This is to work around problems on Windows, where fseek() can
// seek half-way on a stream and *then* fail, leaving us in an
// unworkable situation.
if (ftell(handle) < 0) {
return file_source_skip_fallback(handle, bytes);
}
result = fseek(handle, (long) bytes, SEEK_CUR);
if (result < 0) {
if (errno == EBADF || errno == ESPIPE) {
return file_source_skip_fallback(handle, bytes);
} else {
return 0;
}
}
return 1;
}
// Close a FILE * input stream.
static void file_source_close(void *handle)
{
fclose(handle);
}
// "Owned" file source - the stream will be closed when the input
// stream is freed.
static const LHAInputStreamType file_source_owned = {
file_source_read,
file_source_skip,
file_source_close
};
// "Unowned" file source - the stream is owned by the calling code.
static const LHAInputStreamType file_source_unowned = {
file_source_read,
file_source_skip,
NULL
};
LHAInputStream *lha_input_stream_from(char *filename)
{
LHAInputStream *result;
FILE *fstream;
fstream = fopen(filename, "rb");
if (fstream == NULL) {
return NULL;
}
result = lha_input_stream_new(&file_source_owned, fstream);
if (result == NULL) {
fclose(fstream);
}
return result;
}
LHAInputStream *lha_input_stream_from_FILE(FILE *stream)
{
lha_arch_set_binary(stream);
return lha_input_stream_new(&file_source_unowned, stream);
}

View file

@ -0,0 +1,51 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_LHA_INPUT_STREAM_H
#define LHASA_LHA_INPUT_STREAM_H
#include <inttypes.h>
#include "public/lha_input_stream.h"
/**
* Read a block of data from the LHA stream, of the specified number
* of bytes.
*
* @param stream The input stream.
* @param buf Pointer to buffer in which to store read data.
* @param buf_len Size of buffer, in bytes.
* @return Non-zero if buffer was filled, or zero if an
* error occurred, or end of file was reached.
*/
int lha_input_stream_read(LHAInputStream *stream, void *buf, size_t buf_len);
/**
* Skip over the specified number of bytes.
*
* @param stream The input stream.
* @param bytes Number of bytes to skip.
* @return Non-zero for success, zero for failure.
*/
int lha_input_stream_skip(LHAInputStream *stream, size_t bytes);
#endif /* #ifndef LHASA_LHA_INPUT_STREAM_H */

View file

@ -0,0 +1,886 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lha_arch.h"
#include "lha_decoder.h"
#include "lha_basic_reader.h"
#include "public/lha_reader.h"
#include "macbinary.h"
typedef enum {
// Initial state at start of stream:
CURR_FILE_START,
// Current file is a "normal" file (or directory) read from
// the input stream.
CURR_FILE_NORMAL,
// Current file is a directory that has been popped from the
// directory stack.
CURR_FILE_FAKE_DIR,
// Current file is a deferred symbolic link that has been left
// to the end of the input stream to be created.
CURR_FILE_DEFERRED_SYMLINK,
// End of input stream has been reached.
CURR_FILE_EOF,
} CurrFileType;
struct _LHAReader {
LHABasicReader *reader;
// The current file that we are processing (last file returned
// by lha_reader_next_file).
LHAFileHeader *curr_file;
CurrFileType curr_file_type;
// Pointer to decoder being used to decompress the current file,
// or NULL if we have not yet started decompression.
LHADecoder *decoder;
// Pointer to "inner" decoder. Most of the time,
// decoder == inner_decoder, but when decoding an archive
// generated by MacLHA, inner_decoder points to the actual
// decompressor.
LHADecoder *inner_decoder;
// Policy used to extract directories.
LHAReaderDirPolicy dir_policy;
// Directories that have been created by lha_reader_extract but
// have not yet had their metadata set. This is a linked list
// using the _next field in LHAFileHeader.
// In the case of LHA_READER_DIR_END_OF_DIR this is a stack;
// in the case of LHA_READER_DIR_END_OF_FILE it is a list.
LHAFileHeader *dir_stack;
// Symbolic links containing absolute paths or '..' are not
// created immediately - instead, "placeholder" files are created
// in their place, and the symbolic links created at the end
// of extraction.
LHAFileHeader *deferred_symlinks;
};
/**
* Free the current decoder structure.
*
* If the reader has an allocated decoder being used to decompress the
* current file, the decoder is freed and the decoder pointer reset
* to NULL.
*
* @param reader Pointer to the LHA reader structure.
*/
static void close_decoder(LHAReader *reader)
{
if (reader->decoder != NULL) {
if (reader->inner_decoder == reader->decoder) {
reader->inner_decoder = NULL;
}
lha_decoder_free(reader->decoder);
reader->decoder = NULL;
}
if (reader->inner_decoder != NULL) {
lha_decoder_free(reader->inner_decoder);
reader->inner_decoder = NULL;
}
}
/**
* Create the decoder structure to decompress the data from the
* current file.
*
* @param reader Pointer to the LHA reader structure.
* @param callback Callback function to invoke to track progress.
* @param callback_data Extra pointer to pass to the callback function.
* @return Non-zero for success, zero for failure.
*/
static int open_decoder(LHAReader *reader,
LHADecoderProgressCallback callback,
void *callback_data)
{
// Can only read from a normal file.
if (reader->curr_file_type != CURR_FILE_NORMAL) {
return 0;
}
reader->inner_decoder = lha_basic_reader_decode(reader->reader);
if (reader->inner_decoder == NULL) {
return 0;
}
// Set progress callback for decoder.
if (callback != NULL) {
lha_decoder_monitor(reader->inner_decoder,
callback, callback_data);
}
// Some archives generated by MacLHA have a MacBinary header
// attached to the start, which contains MacOS-specific
// metadata about the compressed file. These are identified
// and stripped off, using a "passthrough" decoder.
if (reader->curr_file->os_type == LHA_OS_TYPE_MACOS) {
reader->decoder = lha_macbinary_passthrough(
reader->inner_decoder, reader->curr_file);
if (reader->decoder == NULL) {
return 0;
}
} else {
reader->decoder = reader->inner_decoder;
}
return 1;
}
LHAReader *lha_reader_new(LHAInputStream *stream)
{
LHABasicReader *basic_reader;
LHAReader *reader;
reader = calloc(1, sizeof(LHAReader));
if (reader == NULL) {
return NULL;
}
basic_reader = lha_basic_reader_new(stream);
if (basic_reader == NULL) {
free(reader);
return NULL;
}
reader->reader = basic_reader;
reader->curr_file = NULL;
reader->curr_file_type = CURR_FILE_START;
reader->decoder = NULL;
reader->inner_decoder = NULL;
reader->dir_stack = NULL;
reader->dir_policy = LHA_READER_DIR_END_OF_DIR;
reader->deferred_symlinks = NULL;
return reader;
}
void lha_reader_free(LHAReader *reader)
{
LHAFileHeader *header;
// Shut down the current decoder, if there is one.
close_decoder(reader);
// Free any file headers in the stack.
while (reader->dir_stack != NULL) {
header = reader->dir_stack;
reader->dir_stack = header->_next;
lha_file_header_free(header);
}
lha_basic_reader_free(reader->reader);
free(reader);
}
void lha_reader_set_dir_policy(LHAReader *reader,
LHAReaderDirPolicy policy)
{
reader->dir_policy = policy;
}
/**
* Check if the directory at the top of the stack should be popped.
*
* Extracting a directory is a two stage process; after the directory
* is created, it is pushed onto the directory stack. Later the
* directory must be popped off the stack and its metadata applied.
*
* @param reader Pointer to the LHA reader structure.
* @return Non-zero if there is a directory at the top of
* the stack that should be popped.
*/
static int end_of_top_dir(LHAReader *reader)
{
LHAFileHeader *input;
// No directories to pop?
if (reader->dir_stack == NULL) {
return 0;
}
// Once the end of the input stream is reached, all that is
// left to do is pop off the remaining directories.
input = lha_basic_reader_curr_file(reader->reader);
if (input == NULL) {
return 1;
}
switch (reader->dir_policy) {
// Shouldn't happen?
case LHA_READER_DIR_PLAIN:
default:
return 1;
// Don't process directories until we reach the end of
// the input stream.
case LHA_READER_DIR_END_OF_FILE:
return 0;
// Once we reach a file from the input that is not within
// the directory at the top of the stack, we have reached
// the end of that directory, so we can pop it off.
case LHA_READER_DIR_END_OF_DIR:
return input->path == NULL
|| strncmp(input->path,
reader->dir_stack->path,
strlen(reader->dir_stack->path)) != 0;
}
}
// Read the next file from the input stream.
LHAFileHeader *lha_reader_next_file(LHAReader *reader)
{
// Free the current decoder if there is one.
close_decoder(reader);
// No point continuing once the end of the input stream has
// been reached.
if (reader->curr_file_type == CURR_FILE_EOF) {
return NULL;
}
// Advance to the next file from the input stream?
// Don't advance until we've done the fake directories first.
if (reader->curr_file_type == CURR_FILE_START
|| reader->curr_file_type == CURR_FILE_NORMAL) {
lha_basic_reader_next_file(reader->reader);
}
// If the last file we returned was a 'fake' directory, we must
// now unreference it.
if (reader->curr_file_type == CURR_FILE_FAKE_DIR) {
lha_file_header_free(reader->curr_file);
}
// Pop off all appropriate directories from the stack first.
if (end_of_top_dir(reader)) {
reader->curr_file = reader->dir_stack;
reader->dir_stack = reader->dir_stack->_next;
reader->curr_file_type = CURR_FILE_FAKE_DIR;
} else {
reader->curr_file = lha_basic_reader_curr_file(reader->reader);
reader->curr_file_type = CURR_FILE_NORMAL;
}
// Once we reach the end of the file, there may be deferred
// symbolic links still to extract, so process those before
// giving up and declaring end of file.
if (reader->curr_file == NULL) {
if (reader->deferred_symlinks != NULL) {
reader->curr_file = reader->deferred_symlinks;
reader->curr_file_type = CURR_FILE_DEFERRED_SYMLINK;
reader->deferred_symlinks =
reader->deferred_symlinks->_next;
reader->curr_file->_next = NULL;
} else {
reader->curr_file_type = CURR_FILE_EOF;
}
}
return reader->curr_file;
}
size_t lha_reader_read(LHAReader *reader, void *buf, size_t buf_len)
{
// The first time that we try to read the current file, we
// must create the decoder to decompress it.
if (reader->decoder == NULL) {
if (!open_decoder(reader, NULL, NULL)) {
return 0;
}
}
// Read from decoder and return the result.
return lha_decoder_read(reader->decoder, buf, buf_len);
}
/**
* Decompress the current file.
*
* Assumes that @param open_decoder has already been called to
* start the decode process.
*
* @param reader Pointer to the LHA reader structure.
* @param output FILE handle to write decompressed data, or NULL
* if the decompressed data should be discarded.
* @return Non-zero if the file decompressed successfully.
*/
static int do_decode(LHAReader *reader, FILE *output)
{
uint8_t buf[64];
unsigned int bytes;
// Decompress the current file.
do {
bytes = lha_reader_read(reader, buf, sizeof(buf));
if (output != NULL) {
if (fwrite(buf, 1, bytes, output) < bytes) {
return 0;
}
}
} while (bytes > 0);
// Decoder stores output position and performs running CRC.
// At the end of the stream these should match the header values.
return lha_decoder_get_length(reader->inner_decoder)
== reader->curr_file->length
&& lha_decoder_get_crc(reader->inner_decoder)
== reader->curr_file->crc;
}
int lha_reader_check(LHAReader *reader,
LHADecoderProgressCallback callback,
void *callback_data)
{
if (reader->curr_file_type != CURR_FILE_NORMAL) {
return 0;
}
// CRC checking of directories is not necessary.
if (!strcmp(reader->curr_file->compress_method,
LHA_COMPRESS_TYPE_DIR)) {
return 1;
}
// Decode file.
return open_decoder(reader, callback, callback_data)
&& do_decode(reader, NULL);
}
/**
* Open an output stream into which to decompress the current file.
*
* @param reader Pointer to the LHA reader structure.
* @param filename Name of the file to open.
* @return FILE handle of the opened file, or NULL in
* case of failure.
*/
static FILE *open_output_file(LHAReader *reader, char *filename)
{
int unix_uid = -1, unix_gid = -1, unix_perms = -1;
if (LHA_FILE_HAVE_EXTRA(reader->curr_file, LHA_FILE_UNIX_UID_GID)) {
unix_uid = reader->curr_file->unix_uid;
unix_gid = reader->curr_file->unix_gid;
}
if (LHA_FILE_HAVE_EXTRA(reader->curr_file, LHA_FILE_UNIX_PERMS)) {
unix_perms = reader->curr_file->unix_perms;
}
return lha_arch_fopen(filename, unix_uid, unix_gid, unix_perms);
}
/**
* Set file timestamps for the specified file.
*
* If possible, the more accurate Windows timestamp values are used;
* otherwise normal Unix timestamps are used.
*
* @param path Path to the file or directory to set.
* @param header Pointer to file header structure containing the
* timestamps to set.
* @return Non-zero if the timestamps were set successfully,
* or zero for failure.
*/
static int set_timestamps_from_header(char *path, LHAFileHeader *header)
{
#if LHA_ARCH == LHA_ARCH_WINDOWS
if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_WINDOWS_TIMESTAMPS)) {
return lha_arch_set_windows_timestamps(
path,
header->win_creation_time,
header->win_modification_time,
header->win_access_time
);
} else // ....
#endif
if (header->timestamp != 0) {
return lha_arch_utime(path, header->timestamp);
} else {
return 1;
}
}
/**
* Set directory metadata.
*
* This is the second stage of directory extraction. Metadata (timestamps
* and permissions) should be set on a dictory after the contents of
* the directory has been extracted.
*
* @param header Pointer to file header structure containing the
* metadata to set.
* @param path Path to the directory on which to set the metadata.
* @return Non-zero for success, or zero for failure.
*/
static int set_directory_metadata(LHAFileHeader *header, char *path)
{
// Set timestamp:
set_timestamps_from_header(path, header);
// Set owner and group:
if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_UID_GID)) {
if (!lha_arch_chown(path, header->unix_uid,
header->unix_gid)) {
// On most Unix systems, only root can change
// ownership. But if we can't change ownership,
// it isn't a fatal error. Ignore the failure
// and continue.
// TODO: Implement some kind of alternate handling
// here?
/* return 0; */
}
}
// Set permissions on directory:
if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) {
if (!lha_arch_chmod(path, header->unix_perms)) {
return 0;
}
}
return 1;
}
/**
* "Extract" (create) a directory.
*
* The current file is assumed to be a directory. This is the first
* stage in extracting a directory; after the directory is created,
* it is added to the directory stack so that the metadata apply stage
* runs later. (If the LHA_READER_DIR_PLAIN policy is used, metadata
* is just applied now).
*
* @param reader Pointer to the LHA reader structure.
* @param path Path to the directory, or NULL to use the path from
* the file header.
* @return Non-zero for success, or zero for failure.
*/
static int extract_directory(LHAReader *reader, char *path)
{
LHAFileHeader *header;
unsigned int mode;
header = reader->curr_file;
// If path is not specified, use the path from the file header.
if (path == NULL) {
path = header->path;
}
// Create directory. If there are permissions to be set, create
// the directory with minimal permissions limited to the running
// user. Otherwise use the default umask.
if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) {
mode = 0700;
} else {
mode = 0777;
}
if (!lha_arch_mkdir(path, mode)) {
// If the attempt to create the directory failed, it may
// be because the directory already exists. Return success
// if this is the case; it isn't really an error.
return lha_arch_exists(path) == LHA_FILE_DIRECTORY;
}
// The directory has been created, but the metadata has not yet
// been applied. It depends on the directory policy how this
// is handled. If we are using LHA_READER_DIR_PLAIN, set
// metadata now. Otherwise, save the directory for later.
if (reader->dir_policy == LHA_READER_DIR_PLAIN) {
set_directory_metadata(header, path);
} else {
lha_file_header_add_ref(header);
header->_next = reader->dir_stack;
reader->dir_stack = header;
}
return 1;
}
/**
* Extract the current file.
*
* @param reader Pointer to the LHA reader structure.
* @param filename Filename into which to extract the file, or NULL
* to use the filename from the file header.
* @param callback Callback function to invoke to track progress.
* @param callback_data Extra pointer to pass to the callback function.
* @return Non-zero if the file was successfully extracted,
* or zero for failure.
*/
static int extract_file(LHAReader *reader, char *filename,
LHADecoderProgressCallback callback,
void *callback_data)
{
FILE *fstream;
char *tmp_filename = NULL;
int result;
// Construct filename?
if (filename == NULL) {
tmp_filename = lha_file_header_full_path(reader->curr_file);
if (tmp_filename == NULL) {
return 0;
}
filename = tmp_filename;
}
// Create decoder. If the file cannot be created, there is no
// need to even create an output file. If successful, open the
// output file and decode.
result = 0;
if (open_decoder(reader, callback, callback_data)) {
fstream = open_output_file(reader, filename);
if (fstream != NULL) {
result = do_decode(reader, fstream);
fclose(fstream);
}
}
// Set timestamp on file:
if (result) {
set_timestamps_from_header(filename, reader->curr_file);
}
free(tmp_filename);
return result;
}
/**
* Determine whether a header contains a "dangerous" symbolic link.
*
* Symbolic links that begin with '/' or contain '..' as a path are
* Potentially dangerous and could potentially be used to overwrite
* arbitrary files on the filesystem. They therefore need to be
* treated specially.
*
* @param header Pointer to a header structure defining a symbolic
* link.
* @return Non-zero if the symbolic link is potentially
* dangerous.
*/
static int is_dangerous_symlink(LHAFileHeader *header)
{
char *path_start;
char *p;
if (header->symlink_target == NULL) {
return 0;
}
// Absolute path symlinks could be used to point to arbitrary
// filesystem locations.
if (header->symlink_target[0] == '/') {
return 1;
}
// Check for paths containing '..'.
path_start = header->symlink_target;
for (p = header->symlink_target; *p != '\0'; ++p) {
if (*p == '/') {
if ((p - path_start) == 2
&& path_start[0] == '.' && path_start[1] == '.') {
return 1;
}
path_start = p + 1;
}
}
// The path might also end with '..' (no terminating /)
if ((p - path_start) == 2
&& path_start[0] == '.' && path_start[1] == '.') {
return 1;
}
return 0;
}
/**
* Get the length of a path defined by a file header.
*
* @param header The file header structure.
* @return Length of the header in bytes.
*/
static size_t file_header_path_len(LHAFileHeader *header)
{
size_t result;
result = 0;
if (header->path != NULL) {
result += strlen(header->path);
}
if (header->filename != NULL) {
result += strlen(header->filename);
}
return result;
}
/**
* Create a "placeholder" symbolic link.
*
* When a "dangerous" symbolic link is extracted, instead of creating it
* immediately, create a "placeholder" empty file to go in its place, and
* place it into the deferred_symlinks list to be created later.
*
* @param reader Pointer to the LHA reader structure.
* @param filename Filename into which to extract the symlink.
* @return Non-zero if the symlink was extracted successfully,
* or zero for failure.
*/
static int extract_placeholder_symlink(LHAReader *reader, char *filename)
{
LHAFileHeader **rover;
FILE *f;
f = lha_arch_fopen(filename, -1, -1, 0600);
if (f == NULL) {
return 0;
}
fclose(f);
// Insert this header into the list of deferred symbolic links.
// The list must be maintained in order of decreasing path length,
// so that one symbolic link cannot depend on another. For example:
//
// etc -> /etc
// etc/passwd -> /malicious_path/passwd
rover = &reader->deferred_symlinks;
while (*rover != NULL
&& file_header_path_len(*rover)
> file_header_path_len(reader->curr_file)) {
rover = &(*rover)->_next;
}
reader->curr_file->_next = *rover;
*rover = reader->curr_file;
// Save reference to the header so it won't be freed.
lha_file_header_add_ref(reader->curr_file);
return 1;
}
/**
* Extract a Unix symbolic link.
*
* @param reader Pointer to the LHA reader structure.
* @param filename Filename into which to extract the symlink, or NULL
* to use the filename from the file header.
* @return Non-zero if the symlink was extracted successfully,
* or zero for failure.
*/
static int extract_symlink(LHAReader *reader, char *filename)
{
char *tmp_filename = NULL;
int result;
// Construct filename?
if (filename == NULL) {
tmp_filename = lha_file_header_full_path(reader->curr_file);
if (tmp_filename == NULL) {
return 0;
}
filename = tmp_filename;
}
if (reader->curr_file_type == CURR_FILE_NORMAL
&& is_dangerous_symlink(reader->curr_file)) {
return extract_placeholder_symlink(reader, filename);
}
result = lha_arch_symlink(filename, reader->curr_file->symlink_target);
// TODO: Set symlink timestamp.
free(tmp_filename);
return result;
}
/**
* Extract a "normal" file.
*
* This just extracts the file header most recently read by the
* BasicReader.
*
* @param reader Pointer to the LHA reader structure.
* @param filename Filename into which to extract the file, or NULL
* to use the filename from the file header.
* @param callback Callback function to invoke to track progress.
* @param callback_data Extra pointer to pass to the callback function.
* @return Non-zero if the file was successfully extracted,
* or zero for failure.
*/
static int extract_normal(LHAReader *reader,
char *filename,
LHADecoderProgressCallback callback,
void *callback_data)
{
if (strcmp(reader->curr_file->compress_method,
LHA_COMPRESS_TYPE_DIR) != 0) {
return extract_file(reader, filename, callback, callback_data);
} else if (reader->curr_file->symlink_target != NULL) {
return extract_symlink(reader, filename);
} else {
return extract_directory(reader, filename);
}
}
int lha_reader_extract(LHAReader *reader,
char *filename,
LHADecoderProgressCallback callback,
void *callback_data)
{
switch (reader->curr_file_type) {
case CURR_FILE_NORMAL:
return extract_normal(reader, filename, callback,
callback_data);
case CURR_FILE_FAKE_DIR:
if (filename == NULL) {
filename = reader->curr_file->path;
}
set_directory_metadata(reader->curr_file, filename);
return 1;
case CURR_FILE_DEFERRED_SYMLINK:
return extract_symlink(reader, filename);
case CURR_FILE_START:
case CURR_FILE_EOF:
break;
}
return 0;
}
int lha_reader_current_is_fake(LHAReader *reader)
{
return reader->curr_file_type == CURR_FILE_FAKE_DIR
|| reader->curr_file_type == CURR_FILE_DEFERRED_SYMLINK;
}

View file

@ -0,0 +1,45 @@
/*
Copyright (c) 2011, 2012, 2013, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Decoder for the -lhx- algorithm. Provided by Multi.
//
// -lhx- is Unlha32.dll's original extension. Some unique archivers
// support it.
//
// 128 KiB history ring buffer:
// -lhx-'s maximum dictionary size is 2^19. 2x ring buffer is required.
#define HISTORY_BITS 20 /* 2^20 = 1048576. */
// Number of bits to encode HISTORY_BITS:
#define OFFSET_BITS 5
// Name of the variable for the encoder:
#define DECODER_NAME lha_lhx_decoder
// The actual algorithm code is contained in lh_new_decoder.c, which
// acts as a template for -lh4-, -lh5-, -lh6-, -lh7- and -lhx-.
#include "lh_new_decoder.c"

View file

@ -0,0 +1,198 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "lha_decoder.h"
// Parameters for ring buffer, used for storing history. This acts
// as the dictionary for copy operations.
#define RING_BUFFER_SIZE 4096
#define START_OFFSET 18
// Threshold offset. In the copy operation, the copy length is a 4-bit
// value, giving a range 0..15. The threshold offsets this so that it
// is interpreted as 3..18 - a more useful range.
#define THRESHOLD 3
// Size of output buffer. Must be large enough to hold the results of
// a complete "run" (see below).
#define OUTPUT_BUFFER_SIZE (15 + THRESHOLD) * 8
// Decoder for the -lz5- compression method used by LArc.
//
// This processes "runs" of eight commands, each of which is either
// "output a character" or "copy block". The result of that run
// is written into the output buffer.
typedef struct {
uint8_t ringbuf[RING_BUFFER_SIZE];
unsigned int ringbuf_pos;
LHADecoderCallback callback;
void *callback_data;
} LHALZ5Decoder;
static void fill_initial(LHALZ5Decoder *decoder)
{
unsigned int i, j;
uint8_t *p;
p = decoder->ringbuf;
// For each byte value, the history buffer includes a run of 13
// bytes all with that value. This is useful eg. for text files
// that include a long run like this (eg. ===========).
for (i = 0; i < 256; ++i) {
for (j = 0; j < 13; ++j) {
*p++ = (uint8_t) i;
}
}
// Next we include all byte values ascending and descending.
for (i = 0; i < 256; ++i) {
*p++ = (uint8_t) i;
}
for (i = 0; i < 256; ++i) {
*p++ = (uint8_t) (255 - i);
}
// Block of zeros, and then ASCII space characters. I think these are
// towards the end of the range because they're most likely to be
// useful and therefore last to get overwritten?
for (i = 0; i < 128; ++i) {
*p++ = 0;
}
for (i = 0; i < 110; ++i) {
*p++ = ' ';
}
// Final 18 characters are all zeros, probably because of START_OFFSET.
for (i = 0; i < 18; ++i) {
*p++ = 0;
}
}
static int lha_lz5_init(void *data, LHADecoderCallback callback,
void *callback_data)
{
LHALZ5Decoder *decoder = data;
fill_initial(decoder);
decoder->ringbuf_pos = RING_BUFFER_SIZE - START_OFFSET;
decoder->callback = callback;
decoder->callback_data = callback_data;
return 1;
}
// Add a single byte to the output buffer.
static void output_byte(LHALZ5Decoder *decoder, uint8_t *buf,
size_t *buf_len, uint8_t b)
{
buf[*buf_len] = b;
++*buf_len;
decoder->ringbuf[decoder->ringbuf_pos] = b;
decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
}
// Output a "block" of data from the specified range in the ring buffer.
static void output_block(LHALZ5Decoder *decoder,
uint8_t *buf,
size_t *buf_len,
unsigned int start,
unsigned int len)
{
unsigned int i;
for (i = 0; i < len; ++i) {
output_byte(decoder, buf, buf_len,
decoder->ringbuf[(start + i) % RING_BUFFER_SIZE]);
}
}
// Process a "run" of LZ5-compressed data (a control byte followed by
// eight "commands").
static size_t lha_lz5_read(void *data, uint8_t *buf)
{
LHALZ5Decoder *decoder = data;
uint8_t bitmap;
unsigned int bit;
size_t result;
// Start from an empty buffer.
result = 0;
// Read the bitmap byte first.
if (!decoder->callback(&bitmap, 1, decoder->callback_data)) {
return 0;
}
// Each bit in the bitmap is a command.
// If the bit is set, it is an "output byte" command.
// If it is not set, it is a "copy block" command.
for (bit = 0; bit < 8; ++bit) {
if ((bitmap & (1 << bit)) != 0) {
uint8_t b;
if (!decoder->callback(&b, 1, decoder->callback_data)) {
break;
}
output_byte(decoder, buf, &result, b);
} else {
uint8_t cmd[2];
unsigned int seqstart, seqlen;
if (!decoder->callback(cmd, 2, decoder->callback_data)) {
break;
}
seqstart = (((unsigned int) cmd[1] & 0xf0) << 4)
| cmd[0];
seqlen = ((unsigned int) cmd[1] & 0x0f) + THRESHOLD;
output_block(decoder, buf, &result, seqstart, seqlen);
}
}
return result;
}
LHADecoderType lha_lz5_decoder = {
lha_lz5_init,
NULL,
lha_lz5_read,
sizeof(LHALZ5Decoder),
OUTPUT_BUFFER_SIZE,
RING_BUFFER_SIZE
};

View file

@ -0,0 +1,156 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "lha_decoder.h"
#include "bit_stream_reader.c"
// Parameters for ring buffer, used for storing history. This acts
// as the dictionary for copy operations.
#define RING_BUFFER_SIZE 2048
#define START_OFFSET 17
// Threshold offset. In the copy operation, the copy length is a 4-bit
// value, giving a range 0..15. The threshold offsets this so that it
// is interpreted as 2..17 - a more useful range.
#define THRESHOLD 2
// Size of output buffer. Must be large enough to hold the results of
// the maximum copy operation.
#define OUTPUT_BUFFER_SIZE (15 + THRESHOLD)
// Decoder for the -lzs- compression method used by old versions of LArc.
//
// The input stream consists of commands, each of which is either "output
// a literal byte value" or "copy block". A bit at the start of each
// command signals which command it is.
typedef struct {
BitStreamReader bit_stream_reader;
uint8_t ringbuf[RING_BUFFER_SIZE];
unsigned int ringbuf_pos;
} LHALZSDecoder;
static int lha_lzs_init(void *data, LHADecoderCallback callback,
void *callback_data)
{
LHALZSDecoder *decoder = data;
memset(decoder->ringbuf, ' ', RING_BUFFER_SIZE);
decoder->ringbuf_pos = RING_BUFFER_SIZE - START_OFFSET;
bit_stream_reader_init(&decoder->bit_stream_reader, callback,
callback_data);
return 1;
}
// Add a single byte to the output buffer.
static void output_byte(LHALZSDecoder *decoder, uint8_t *buf,
size_t *buf_len, uint8_t b)
{
buf[*buf_len] = b;
++*buf_len;
decoder->ringbuf[decoder->ringbuf_pos] = b;
decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
}
// Output a "block" of data from the specified range in the ring buffer.
static void output_block(LHALZSDecoder *decoder,
uint8_t *buf,
size_t *buf_len,
unsigned int start,
unsigned int len)
{
unsigned int i;
for (i = 0; i < len; ++i) {
output_byte(decoder, buf, buf_len,
decoder->ringbuf[(start + i) % RING_BUFFER_SIZE]);
}
}
// Process a single command from the LZS input stream.
static size_t lha_lzs_read(void *data, uint8_t *buf)
{
LHALZSDecoder *decoder = data;
int bit;
size_t result;
// Start from an empty buffer.
result = 0;
// Each command starts with a bit that signals the type:
bit = read_bit(&decoder->bit_stream_reader);
if (bit < 0) {
return 0;
}
// What type of command is this?
if (bit) {
int b;
b = read_bits(&decoder->bit_stream_reader, 8);
if (b < 0) {
return 0;
}
output_byte(decoder, buf, &result, (uint8_t) b);
} else {
int pos, len;
pos = read_bits(&decoder->bit_stream_reader, 11);
len = read_bits(&decoder->bit_stream_reader, 4);
if (pos < 0 || len < 0) {
return 0;
}
output_block(decoder, buf, &result, (unsigned int) pos,
(unsigned int) len + THRESHOLD);
}
return result;
}
LHADecoderType lha_lzs_decoder = {
lha_lzs_init,
NULL,
lha_lzs_read,
sizeof(LHALZSDecoder),
OUTPUT_BUFFER_SIZE,
RING_BUFFER_SIZE
};

View file

@ -0,0 +1,451 @@
/*
Copyright (c) 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Code for handling MacBinary headers.
//
// Classic Mac OS attaches more metadata to files than other operating
// systems. For example, each file has a file type that is used to
// determine the application to open it with. Files can also have both
// a data fork and a resource fork. Because of this design, when
// transferring a file between computers (eg. over a network), all of
// the data associated with the file must be bundled up together to
// preserve the file.
//
// MacLHA uses the MacBinary container format to do this. Within the
// compressed data, the file contents are preceded by a 128 byte
// header that contains the metadata. The data from the data fork can
// also be followed by the data from the resource fork.
//
// Because this is incompatible with .lzh archives from other operating
// systems, MacLHA has two menu items to create new archives - one
// creates a "Mac" archive, while the other creates a "non-Mac"
// (standard) archive that contains just the file contents. This quote
// from the documentation (MacLHAE.doc) describes what is stored when
// the latter option is used:
//
// > If a file has only either Data Fork or Resource Fork, it's stored
// > into archives. In case a file has both Data Fork and Resource Fork,
// > only the Data Fork is stored.
//
// --
//
// Mac OS X has essentially abandoned this practise of using filesystem
// metadata and other systems do not use it, either. It is therefore
// sensible and desirable to strip off the MacBinary header (if present)
// and extract just the normal file contents. It makes sense to use the
// same strategy quoted above.
//
// The possible presence of a MacBinary header can be inferred using the
// OS type field from the LHA header - a value of 'm' indicates that it
// was generated by MacLHA. However, there are some issues with this:
//
// 1. This type is set regardless of whether a MacBinary header is
// attached or not. There is no other field to indicate the
// difference, and MacBinary headers do not have a magic number, so
// the presence of one must be determined heuristically.
// Realistically, though, this can be done without too much
// difficulty, by strictly checking all the fields in the MacBinary
// header. If an invalid header is seen, it can be rejected and
// assumed to be a normal file.
//
// 2. MacBinary is a standard container format for transferring files
// between Macs and not used just by MacLHA. Therefore, it is
// plausible that a .lzh archive might "deliberately" contain a
// MacBinary file, in which case it would be a mistake to strip
// off the header.
//
// This is an unlikely but still a plausible scenario. It can be
// mitigated by comparing the MacBinary header values against the
// values from the .lzh header. A header added by MacLHA will have
// a filename that matches the .lzh header's filename (MacBinary
// files usually have a .bin extension appended, so the filenames
// would not match. Also, the modification timestamp should match
// the timestamp from the .lzh header.
//
// 3. Comparing the MacBinary header modification timestamp with the
// .lzh header modification timestamp is complicated by the fact
// that the former is stored as a Macintosh 1904-based timestamp
// in the local timezone, while the latter is stored as a Unix
// timestamp in UTC time. Although converting timestamp formats
// is trivial, the two do not compare exactly due to the timezone
// offset.
//
// --
//
// Summary of MacBinary header fields and policy for each
// (Z = check zero, C = check value, I = ignore):
//
// 0x00 - Z - "Old version number", must be zero for compatibility
// 0x01 - C - Filename length, must match .lzh header filename.
// 0x02-0x40 - C - Filename, must match .lzh header filename.
// Z - Remainder following filename contents must be zero
// 0x41-0x44 - I - File type
// 0x45-0x48 - I - File creator
// 0x49 - I - Finder flags
// 0x4a - Z - "Must be zero for compatibility"
// 0x4b-0x4c - I - Icon vertical position
// 0x4d-0x4e - I - Icon horizonal position
// 0x4f-0x50 - I - Window ID
// 0x51 - I - "Protected" flag
// 0x52 - Z - "Must be zero for compatibility"
// 0x53-0x56 - C - Data fork length }- added together, equal uncompressed
// 0x57-0x5a - C - Resource fork length }- data length rounded up to 256
// 0x5b-0x5e - I - File creation date
// 0x5f-0x62 - C - File modification date - should match .lzh header
// 0x63-0x64 - Z - Finder "Get Info" comment length - unused by MacLHA
// 0x65-0x7f - Z - MacBinary II data - unused by MacLHA
#include <stdlib.h>
#include <string.h>
#include "lha_decoder.h"
#include "lha_endian.h"
#include "lha_file_header.h"
#define OUTPUT_BUFFER_SIZE 4096 /* bytes */
// Classic Mac OS represents time in seconds since 1904, instead of
// Unix time's 1970 epoch. This is the difference between the two.
#define MAC_TIME_OFFSET 2082844800 /* seconds */
// Size of the MacBinary header.
#define MBHDR_SIZE 128 /* bytes */
// Offsets of fields in MacBinary header (and their sizes):
#define MBHDR_OFF_VERSION 0x00
#define MBHDR_OFF_FILENAME_LEN 0x01
#define MBHDR_OFF_FILENAME 0x02
#define MBHDR_LEN_FILENAME 63
#define MBHDR_OFF_ZERO_COMPAT1 0x4a
#define MBHDR_OFF_ZERO_COMPAT2 0x52
#define MBHDR_OFF_DATA_FORK_LEN 0x53
#define MBHDR_OFF_RES_FORK_LEN 0x57
#define MBHDR_OFF_FILE_MOD_DATE 0x5f
#define MBHDR_OFF_COMMENT_LEN 0x63
#define MBHDR_OFF_MACBINARY2_DATA 0x65
#define MBHDR_LEN_MACBINARY2_DATA (MBHDR_SIZE - MBHDR_OFF_MACBINARY2_DATA)
// Check that the given block of data contains only zero bytes.
static int block_is_zero(uint8_t *data, size_t data_len)
{
unsigned int i;
for (i = 0; i < data_len; ++i) {
if (data[i] != 0) {
return 0;
}
}
return 1;
}
// Check that the specified modification time matches the modification
// time from the file header.
static int check_modification_time(unsigned int mod_time,
LHAFileHeader *header)
{
unsigned int time_diff;
// In an ideal world, mod_time should match header->timestamp
// exactly. However, there's an additional complication
// because mod_time is local time, not UTC time, so there is
// a timezone difference.
if (header->timestamp > mod_time) {
time_diff = header->timestamp - mod_time;
} else {
time_diff = mod_time - header->timestamp;
}
// The maximum UTC timezone difference is UTC+14, used in
// New Zealand and some other islands in the Pacific.
if (time_diff > 14 * 60 * 60) {
return 0;
}
// If the world was simpler, all time zones would be exact
// hour offsets, but in fact, some regions use half or
// quarter hour offsets. So the difference should be a
// multiple of 15 minutes. Actually, the control panel in
// Mac OS allows any minute offset to be configured, but if
// people are crazy enough to do that, they deserve the
// brokenness they get as a result. It's preferable to use
// a 15 minute check rather than a 1 minute check, because
// this allows MacLHA-added MacBinary headers to be
// distinguished from archived MacBinary files more reliably.
//return (time_diff % (15 * 60)) == 0;
// It turns out the assumption above doesn't hold, and MacLHA
// does generate archives where the timestamps don't always
// exactly match. Oh well.
return 1;
}
// Given the specified data buffer, check whether it has a MacBinary
// header with contents that match the specified .lzh header.
static int is_macbinary_header(uint8_t *data, LHAFileHeader *header)
{
unsigned int filename_len;
unsigned int data_fork_len, res_fork_len, expected_len;
unsigned int mod_time;
// Check fields in the header that should be zero.
if (data[MBHDR_OFF_VERSION] != 0
|| data[MBHDR_OFF_ZERO_COMPAT1] != 0
|| data[MBHDR_OFF_ZERO_COMPAT2] != 0
|| !block_is_zero(&data[MBHDR_OFF_COMMENT_LEN], 2)
|| !block_is_zero(&data[MBHDR_OFF_MACBINARY2_DATA],
MBHDR_LEN_MACBINARY2_DATA)) {
return 0;
}
// Check that the filename matches the filename from the
// lzh header.
filename_len = data[MBHDR_OFF_FILENAME_LEN];
if (filename_len > MBHDR_LEN_FILENAME
|| filename_len != strlen(header->filename)
|| memcmp(&data[MBHDR_OFF_FILENAME],
header->filename, filename_len) != 0) {
return 0;
}
// Data following the filename must be zero as well.
if (!block_is_zero(data + MBHDR_OFF_FILENAME + filename_len,
MBHDR_LEN_FILENAME - filename_len)) {
return 0;
}
// Decode data fork / resource fork lengths. Their combined
// lengths, plus the MacBinary header, should match the
// compressed data length (rounded up to the nearest 128).
data_fork_len = lha_decode_be_uint32(&data[MBHDR_OFF_DATA_FORK_LEN]);
res_fork_len = lha_decode_be_uint32(&data[MBHDR_OFF_RES_FORK_LEN]);
expected_len = (data_fork_len + res_fork_len + MBHDR_SIZE);
if (header->length != ((expected_len + 0x7f) & ~0x7f)) {
return 0;
}
// Check modification time.
mod_time = lha_decode_be_uint32(&data[MBHDR_OFF_FILE_MOD_DATE]);
if (mod_time < MAC_TIME_OFFSET
|| !check_modification_time(mod_time - MAC_TIME_OFFSET, header)) {
return 0;
}
return 1;
}
//
// MacBinary "decoder". This reuses the LHADecoder framework to provide
// a "pass-through" decoder that detects and strips the MacBinary header.
//
typedef struct {
// When the decoder is initialized, the first 128 bytes of
// data are read into this buffer and analysed. If it is
// not a MacBinary header, the data must be kept so that it
// can be returned in the first call to .read().
// mb_header_bytes contains the number of bytes still to read.
uint8_t mb_header[MBHDR_SIZE];
size_t mb_header_bytes;
// The "inner" decoder used to read the compressed data.
LHADecoder *decoder;
// Number of bytes still to read before decode should be
// terminated.
size_t stream_remaining;
} MacBinaryDecoder;
// Structure used when initializing a MacBinaryDecoder.
typedef struct {
LHADecoder *decoder;
LHAFileHeader *header;
} MacBinaryDecoderClosure;
static int read_macbinary_header(MacBinaryDecoder *decoder,
LHAFileHeader *header)
{
unsigned int data_fork_len, res_fork_len;
size_t n, bytes;
bytes = 0;
while (bytes < MBHDR_SIZE) {
n = lha_decoder_read(decoder->decoder,
decoder->mb_header + bytes,
MBHDR_SIZE - bytes);
// Unexpected EOF?
if (n == 0) {
return 0;
}
bytes += n;
}
// Check if the data that was read corresponds to a MacBinary
// header that matches the .lzh header. If not, just decode it
// as a normal stream.
if (!is_macbinary_header(decoder->mb_header, header)) {
decoder->mb_header_bytes = bytes;
return 1;
}
// We have a MacBinary header, so skip over it. Decide how
// long the data stream is (see policy in comment at start
// of file).
decoder->mb_header_bytes = 0;
data_fork_len = lha_decode_be_uint32(
&decoder->mb_header[MBHDR_OFF_DATA_FORK_LEN]);
res_fork_len = lha_decode_be_uint32(
&decoder->mb_header[MBHDR_OFF_RES_FORK_LEN]);
if (data_fork_len > 0) {
decoder->stream_remaining = data_fork_len;
} else {
decoder->stream_remaining = res_fork_len;
}
return 1;
}
static int macbinary_decoder_init(void *_decoder,
LHADecoderCallback callback,
void *_closure)
{
MacBinaryDecoder *decoder = _decoder;
MacBinaryDecoderClosure *closure = _closure;
decoder->decoder = closure->decoder;
decoder->mb_header_bytes = 0;
decoder->stream_remaining = closure->header->length;
if (closure->header->length >= MBHDR_SIZE
&& !read_macbinary_header(decoder, closure->header)) {
return 0;
}
return 1;
}
static void decode_to_end(LHADecoder *decoder)
{
uint8_t buf[128];
size_t n;
do {
n = lha_decoder_read(decoder, buf, sizeof(buf));
} while (n > 0);
}
static size_t macbinary_decoder_read(void *_decoder, uint8_t *buf)
{
MacBinaryDecoder *decoder = _decoder;
size_t result;
size_t to_read;
size_t n;
result = 0;
// If there is data from the mb_header buffer waiting to be
// read, add it first.
if (decoder->mb_header_bytes > 0) {
memcpy(buf, decoder->mb_header, decoder->mb_header_bytes);
result = decoder->mb_header_bytes;
decoder->mb_header_bytes = 0;
}
// Read further data, if there is some in the stream still to read.
to_read = OUTPUT_BUFFER_SIZE - result;
if (to_read > decoder->stream_remaining) {
to_read = decoder->stream_remaining;
}
n = lha_decoder_read(decoder->decoder, buf + result, to_read);
decoder->stream_remaining -= n;
result += n;
// Once the end of the stream is reached, there may still be
// data from the inner decoder to decompress. When this happens,
// run the decoder until the end.
if (decoder->stream_remaining == 0) {
decode_to_end(decoder->decoder);
}
return result;
}
static LHADecoderType macbinary_decoder_type = {
macbinary_decoder_init,
NULL,
macbinary_decoder_read,
sizeof(MacBinaryDecoder),
OUTPUT_BUFFER_SIZE,
0,
};
LHADecoder *lha_macbinary_passthrough(LHADecoder *decoder,
LHAFileHeader *header)
{
MacBinaryDecoderClosure closure;
LHADecoder *result;
closure.decoder = decoder;
closure.header = header;
result = lha_decoder_new(&macbinary_decoder_type, NULL,
&closure, header->length);
return result;
}

View file

@ -0,0 +1,49 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_MACBINARY_H
#define LHASA_MACBINARY_H
#include "lha_decoder.h"
#include "lha_file_header.h"
/**
* Create a passthrough decoder to handle MacBinary headers added
* by MacLHA.
*
* The new decoder reads from the specified decoder and filters
* out the header. The contents of the MacBinary header must match
* the details from the specified file header.
*
* @param decoder The "inner" decoder from which to read data.
* @param header The file header, that the contents of the
* MacBinary header must match.
* @return A new decoder, which passes through the
* contents of the inner decoder, stripping
* off the MacBinary header and truncating
* as appropriate. Both decoders must be freed
* by the caller.
*/
LHADecoder *lha_macbinary_passthrough(LHADecoder *decoder,
LHAFileHeader *header);
#endif /* #ifndef LHASA_MACBINARY_H */

View file

@ -0,0 +1,61 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Null decoder, for uncompressed files.
#include <stdlib.h>
#include <inttypes.h>
#include "lha_decoder.h"
#define BLOCK_READ_SIZE 1024
typedef struct {
LHADecoderCallback callback;
void *callback_data;
} LHANullDecoder;
static int lha_null_init(void *data, LHADecoderCallback callback,
void *callback_data)
{
LHANullDecoder *decoder = data;
decoder->callback = callback;
decoder->callback_data = callback_data;
return 1;
}
static size_t lha_null_read(void *data, uint8_t *buf)
{
LHANullDecoder *decoder = data;
return decoder->callback(buf, BLOCK_READ_SIZE, decoder->callback_data);
}
LHADecoderType lha_null_decoder = {
lha_null_init,
NULL,
lha_null_read,
sizeof(LHANullDecoder),
BLOCK_READ_SIZE,
2048
};

View file

@ -0,0 +1,714 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Decoder for -pm1- compressed files.
//
// This was difficult to put together. I can't find any versions of
// PMarc that will generate -pm1- encoded files (only -pm2-); however,
// the extraction tool, PMext, will extract them. I have therefore been
// able to reverse engineer the format and write a decoder. I am
// indebted to Alwin Henseler for publishing the Z80 assembly source to
// his UNPMA10 tool, which was apparently decompiled from the original
// PMarc and includes the -pm1- decoding code.
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "lha_decoder.h"
#include "bit_stream_reader.c"
#include "pma_common.c"
// Size of the ring buffer used to hold the history.
#define RING_BUFFER_SIZE 16384
// Maximum length of a command representing a block of bytes:
#define MAX_BYTE_BLOCK_LEN 216
// Maximum number of bytes that can be copied by a single copy command.
#define MAX_COPY_BLOCK_LEN 244
// Output buffer length. A single call to lha_pm1_read can perform one
// byte block output followed by a copy command.
#define OUTPUT_BUFFER_SIZE (MAX_BYTE_BLOCK_LEN + MAX_COPY_BLOCK_LEN)
typedef struct {
BitStreamReader bit_stream_reader;
// Position in output stream, in bytes.
unsigned int output_stream_pos;
// Pointer to the entry in byte_decode_table used to decode
// byte value indices.
const uint8_t *byte_decode_tree;
// History ring buffer.
uint8_t ringbuf[RING_BUFFER_SIZE];
unsigned int ringbuf_pos;
// History linked list, for adaptively encoding byte values.
HistoryLinkedList history_list;
// Callback to read more compressed data from the input (see
// read_callback_wrapper below).
LHADecoderCallback callback;
void *callback_data;
} LHAPM1Decoder;
// Table used to decode distance into history buffer to copy data.
static const VariableLengthTable copy_ranges[] = {
{ 0, 6 }, // 0 + (1 << 6) = 64
{ 64, 8 }, // 64 + (1 << 8) = 320
{ 0, 6 }, // 0 + (1 << 6) = 64
{ 64, 9 }, // 64 + (1 << 9) = 576
{ 576, 11 }, // 576 + (1 << 11) = 2624
{ 2624, 13 }, // 2624 + (1 << 13) = 10816
// The above table entries are used after a certain number of
// bytes have been decoded.
// Early in the stream, some of the copy ranges are more limited
// in their range, so that fewer bits are needed. The above
// table entries are redirected to these entries instead.
// Table entry #3 (64):
{ 64, 8 }, // < 320 bytes
// Table entry #4 (576):
{ 576, 8 }, // < 832 bytes
{ 576, 9 }, // < 1088 bytes
{ 576, 10 }, // < 1600 bytes
// Table entry #5 (2624):
{ 2624, 8 }, // < 2880 bytes
{ 2624, 9 }, // < 3136 bytes
{ 2624, 10 }, // < 3648 bytes
{ 2624, 11 }, // < 4672 bytes
{ 2624, 12 }, // < 6720 bytes
};
// Table used to decode byte values.
static const VariableLengthTable byte_ranges[] = {
{ 0, 4 }, // 0 + (1 << 4) = 16
{ 16, 4 }, // 16 + (1 << 4) = 32
{ 32, 5 }, // 32 + (1 << 5) = 64
{ 64, 6 }, // 64 + (1 << 6) = 128
{ 128, 6 }, // 128 + (1 << 6) = 191
{ 192, 6 }, // 192 + (1 << 6) = 255
};
// This table is a list of trees to decode indices into byte_ranges.
// Each line is actually a mini binary tree, starting with the first
// byte as the root node. Each nybble of the byte is one of the two
// branches: either a leaf value (a-f) or an offset to the child node.
// Expanded representation is shown in comments below.
static const uint8_t byte_decode_trees[][5] = {
{ 0x12, 0x2d, 0xef, 0x1c, 0xab }, // ((((a b) c) d) (e f))
{ 0x12, 0x23, 0xde, 0xab, 0xcf }, // (((a b) (c f)) (d e))
{ 0x12, 0x2c, 0xd2, 0xab, 0xef }, // (((a b) c) (d (e f)))
{ 0x12, 0xa2, 0xd2, 0xbc, 0xef }, // ((a (b c)) (d (e f)))
{ 0x12, 0xa2, 0xc2, 0xbd, 0xef }, // ((a (b d)) (c (e f)))
{ 0x12, 0xa2, 0xcd, 0xb1, 0xef }, // ((a (b (e f))) (c d))
{ 0x12, 0xab, 0x12, 0xcd, 0xef }, // ((a b) ((c d) (e f)))
{ 0x12, 0xab, 0x1d, 0xc1, 0xef }, // ((a b) ((c (e f)) d))
{ 0x12, 0xab, 0xc1, 0xd1, 0xef }, // ((a b) (c (d (e f))))
{ 0xa1, 0x12, 0x2c, 0xde, 0xbf }, // (a (((b f) c) (d e)))
{ 0xa1, 0x1d, 0x1c, 0xb1, 0xef }, // (a (((b (e f)) c) d))
{ 0xa1, 0x12, 0x2d, 0xef, 0xbc }, // (a (((b c) d) (e f)))
{ 0xa1, 0x12, 0xb2, 0xde, 0xcf }, // (a ((b (c f)) (d e)))
{ 0xa1, 0x12, 0xbc, 0xd1, 0xef }, // (a ((b c) (d (e f))))
{ 0xa1, 0x1c, 0xb1, 0xd1, 0xef }, // (a ((b (d (e f))) c))
{ 0xa1, 0xb1, 0x12, 0xcd, 0xef }, // (a (b ((c d) (e f))))
{ 0xa1, 0xb1, 0xc1, 0xd1, 0xef }, // (a (b (c (d (e f)))))
{ 0x12, 0x1c, 0xde, 0xab }, // (((d e) c) (d e)) <- BROKEN!
{ 0x12, 0xa2, 0xcd, 0xbe }, // ((a (b e)) (c d))
{ 0x12, 0xab, 0xc1, 0xde }, // ((a b) (c (d e)))
{ 0xa1, 0x1d, 0x1c, 0xbe }, // (a (((b e) c) d))
{ 0xa1, 0x12, 0xbc, 0xde }, // (a ((b c) (d e)))
{ 0xa1, 0x1c, 0xb1, 0xde }, // (a ((b (d e)) c))
{ 0xa1, 0xb1, 0xc1, 0xde }, // (a (b (c (d e))))
{ 0x1d, 0x1c, 0xab }, // (((a b) c) d)
{ 0x1c, 0xa1, 0xbd }, // ((a (b d)) c)
{ 0x12, 0xab, 0xcd }, // ((a b) (c d))
{ 0xa1, 0x1c, 0xbd }, // (a ((b d) c))
{ 0xa1, 0xb1, 0xcd }, // (a (b (c d)))
{ 0xa1, 0xbc }, // (a (b c))
{ 0xab }, // (a b)
{ 0x00 }, // -- special entry: 0, no tree
};
// Wrapper function invoked to read more data from the input. This mostly just
// calls the real function that does the read. However, when the end of file
// is reached, instead of returning zero, the buffer is filled with zero bytes
// instead. There seem to be archive files that actually depend on this
// ability to read "beyond" the length of the compressed data.
static size_t read_callback_wrapper(void *buf, size_t buf_len, void *user_data)
{
LHAPM1Decoder *decoder = user_data;
size_t result;
result = decoder->callback(buf, buf_len, decoder->callback_data);
if (result == 0) {
memset(buf, 0, buf_len);
result = buf_len;
}
return result;
}
static int lha_pm1_init(void *data, LHADecoderCallback callback,
void *callback_data)
{
LHAPM1Decoder *decoder = data;
memset(decoder, 0, sizeof(LHAPM1Decoder));
// Unlike other decoders, the bitstream code must call the wrapper
// function above to read data.
decoder->callback = callback;
decoder->callback_data = callback_data;
bit_stream_reader_init(&decoder->bit_stream_reader,
read_callback_wrapper, decoder);
decoder->output_stream_pos = 0;
decoder->byte_decode_tree = NULL;
decoder->ringbuf_pos = 0;
init_history_list(&decoder->history_list);
return 1;
}
// Read the 5-bit header from the start of the input stream. This
// specifies the table entry to use for byte decodes.
static int read_start_header(LHAPM1Decoder *decoder)
{
int index;
index = read_bits(&decoder->bit_stream_reader, 5);
if (index < 0) {
return 0;
}
decoder->byte_decode_tree = byte_decode_trees[index];
return 1;
}
// Function called when a new byte is outputted, to update the
// appropriate data structures.
static void outputted_byte(LHAPM1Decoder *decoder, uint8_t b)
{
// Add to history ring buffer.
decoder->ringbuf[decoder->ringbuf_pos] = b;
decoder->ringbuf_pos
= (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
// Other updates: history linked list, output stream position:
update_history_list(&decoder->history_list, b);
++decoder->output_stream_pos;
}
// Decode a count of the number of bytes to copy in a copy command.
// Returns -1 for failure.
static int read_copy_byte_count(LHAPM1Decoder *decoder)
{
int x;
// This is a form of static huffman encoding that uses less bits
// to encode short copy amounts (again).
// Value in the range 3..5?
// Length values start at 3: if it was 2, a different copy
// range would have been used and this function would not
// have been called.
x = read_bits(&decoder->bit_stream_reader, 2);
if (x < 0) {
return -1;
} else if (x < 3) {
return x + 3;
}
// Value in range 6..10?
x = read_bits(&decoder->bit_stream_reader, 3);
if (x < 0) {
return -1;
} else if (x < 5) {
return x + 6;
}
// Value in range 11..14?
else if (x == 5) {
x = read_bits(&decoder->bit_stream_reader, 2);
if (x < 0) {
return -1;
} else {
return x + 11;
}
}
// Value in range 15..22?
else if (x == 6) {
x = read_bits(&decoder->bit_stream_reader, 3);
if (x < 0) {
return -1;
} else {
return x + 15;
}
}
// else x == 7...
x = read_bits(&decoder->bit_stream_reader, 6);
if (x < 0) {
return -1;
} else if (x < 62) {
return x + 23;
}
// Value in range 85..116?
else if (x == 62) {
x = read_bits(&decoder->bit_stream_reader, 5);
if (x < 0) {
return -1;
} else {
return x + 85;
}
}
// Value in range 117..244?
else { // a = 63
x = read_bits(&decoder->bit_stream_reader, 7);
if (x < 0) {
return -1;
} else {
return x + 117;
}
}
}
// Read a single bit from the input stream, but only once the specified
// point is reached in the output stream. Before that point is reached,
// return the value of 'def' instead. Returns -1 for error.
static int read_bit_after_threshold(LHAPM1Decoder *decoder,
unsigned int threshold,
int def)
{
if (decoder->output_stream_pos >= threshold) {
return read_bit(&decoder->bit_stream_reader);
} else {
return def;
}
}
// Read the range index for the copy type used when performing a copy command.
static int read_copy_type_range(LHAPM1Decoder *decoder)
{
int x;
// This is another static huffman tree, but the path grows as
// more data is decoded. The progression is as follows:
// 1. Initially, only '0' and '2' can be returned.
// 2. After 64 bytes, '1' and '3' can be returned as well.
// 3. After 576 bytes, '4' can be returned.
// 4. After 2624 bytes, '5' can be returned.
x = read_bit(&decoder->bit_stream_reader);
if (x < 0) {
return -1;
} else if (x == 0) {
x = read_bit_after_threshold(decoder, 576, 0);
if (x < 0) {
return -1;
} else if (x != 0) {
return 4;
} else {
// Return either 0 or 1.
return read_bit_after_threshold(decoder, 64, 0);
}
} else {
x = read_bit_after_threshold(decoder, 64, 1);
if (x < 0) {
return -1;
} else if (x == 0) {
return 3;
}
x = read_bit_after_threshold(decoder, 2624, 1);
if (x < 0) {
return -1;
} else if (x != 0) {
return 2;
} else {
return 5;
}
}
}
// Read a copy command from the input stream and copy from history.
// Returns 0 for failure.
static size_t read_copy_command(LHAPM1Decoder *decoder, uint8_t *buf)
{
int range_index;
int history_distance;
int copy_index, i;
int count;
range_index = read_copy_type_range(decoder);
if (range_index < 0) {
return 0;
}
// The first two entries in the copy_ranges table are used as
// a shorthand to copy two bytes. Otherwise, decode the number
// of bytes to copy.
if (range_index < 2) {
count = 2;
} else {
count = read_copy_byte_count(decoder);
if (count < 0) {
return 0;
}
}
// The 'range_index' variable is an index into the copy_ranges
// array. As a special-case hack, early in the output stream
// some history ranges are inaccessible, so fewer bits can be
// used. Redirect range_index to special entries to do this.
if (range_index == 3) {
if (decoder->output_stream_pos < 320) {
range_index = 6;
}
} else if (range_index == 4) {
if (decoder->output_stream_pos < 832) {
range_index = 7;
} else if (decoder->output_stream_pos < 1088) {
range_index = 8;
} else if (decoder->output_stream_pos < 1600) {
range_index = 9;
}
} else if (range_index == 5) {
if (decoder->output_stream_pos < 2880) {
range_index = 10;
} else if (decoder->output_stream_pos < 3136) {
range_index = 11;
} else if (decoder->output_stream_pos < 3648) {
range_index = 12;
} else if (decoder->output_stream_pos < 4672) {
range_index = 13;
} else if (decoder->output_stream_pos < 6720) {
range_index = 14;
}
}
// Calculate the number of bytes back into the history buffer
// to read.
history_distance = decode_variable_length(&decoder->bit_stream_reader,
copy_ranges, range_index);
if (history_distance < 0
|| (unsigned) history_distance >= decoder->output_stream_pos) {
return 0;
}
// Copy from the ring buffer.
copy_index = (decoder->ringbuf_pos + RING_BUFFER_SIZE
- history_distance - 1) % RING_BUFFER_SIZE;
for (i = 0; i < count; ++i) {
buf[i] = decoder->ringbuf[copy_index];
outputted_byte(decoder, decoder->ringbuf[copy_index]);
copy_index = (copy_index + 1) % RING_BUFFER_SIZE;
}
return count;
}
// Read the index into the byte decode table, using the byte_decode_tree
// set at the start of the stream. Returns -1 for failure.
static int read_byte_decode_index(LHAPM1Decoder *decoder)
{
const uint8_t *ptr;
unsigned int child;
int bit;
ptr = decoder->byte_decode_tree;
if (ptr[0] == 0) {
return 0;
}
// Walk down the tree, reading a bit at each node to determine
// which path to take.
for (;;) {
bit = read_bit(&decoder->bit_stream_reader);
if (bit < 0) {
return -1;
} else if (bit == 0) {
child = (*ptr >> 4) & 0x0f;
} else {
child = *ptr & 0x0f;
}
// Reached a leaf node?
if (child >= 10) {
return child - 10;
}
ptr += child;
}
}
// Read a single byte value from the input stream.
// Returns -1 for failure.
static int read_byte(LHAPM1Decoder *decoder)
{
int index;
int count;
// Read the index into the byte_ranges table to use.
index = read_byte_decode_index(decoder);
if (index < 0) {
return -1;
}
// Decode value using byte_ranges table. This is actually
// a distance to walk along the history linked list - it
// is static huffman encoding, so that recently used byte
// values use fewer bits.
count = decode_variable_length(&decoder->bit_stream_reader,
byte_ranges, index);
if (count < 0) {
return -1;
}
// Walk through the history linked list to get the actual
// value.
return find_in_history_list(&decoder->history_list, count);
}
// Read the length of a block of bytes.
static int read_byte_block_count(BitStreamReader *reader)
{
int x;
// This is a form of static huffman coding, where smaller
// lengths are encoded using shorter bit sequences.
// Value in the range 1..3?
x = read_bits(reader, 2);
if (x < 0) {
return 0;
} else if (x < 3) {
return x + 1;
}
// Value in the range 4..10?
x = read_bits(reader, 3);
if (x < 0) {
return 0;
} else if (x < 7) {
return x + 4;
}
// Value in the range 11..25?
x = read_bits(reader, 4);
if (x < 0) {
return 0;
} else if (x < 14) {
return x + 11;
} else if (x == 14) {
// Value in the range 25-88:
x = read_bits(reader, 6);
if (x < 0) {
return 0;
} else {
return x + 25;
}
} else { // x = 15
// Value in the range 89-216:
x = read_bits(reader, 7);
if (x < 0) {
return 0;
} else {
return x + 89;
}
}
}
// Read a block of bytes from the input stream.
// Returns 0 for failure.
static size_t read_byte_block(LHAPM1Decoder *decoder, uint8_t *buf)
{
size_t result, result2;
int byteval;
int block_len;
int i;
// How many bytes to decode?
block_len = read_byte_block_count(&decoder->bit_stream_reader);
if (block_len == 0) {
return 0;
}
// Decode the byte values and add them to the output buffer.
for (i = 0; i < block_len; ++i) {
byteval = read_byte(decoder);
if (byteval < 0) {
return 0;
}
buf[i] = byteval;
outputted_byte(decoder, byteval);
}
result = (size_t) block_len;
// Because this is a block of bytes, it can be assumed that the
// block ended for a copy command. The one exception is that if
// the maximum block length was reached, the block may have
// ended just because it could not be any larger.
if (result == MAX_BYTE_BLOCK_LEN) {
return result;
}
result2 = read_copy_command(decoder, buf + result);
if (result2 == 0) {
return 0;
}
return result + result2;
}
static size_t lha_pm1_read(void *data, uint8_t *buf)
{
LHAPM1Decoder *decoder = data;
int command_type;
// Start of input stream? Read the header.
if (decoder->byte_decode_tree == NULL
&& !read_start_header(decoder)) {
return 0;
}
// Read what type of commmand this is.
command_type = read_bit(&decoder->bit_stream_reader);
if (command_type == 0) {
return read_copy_command(decoder, buf);
} else {
return read_byte_block(decoder, buf);
}
}
LHADecoderType lha_pm1_decoder = {
lha_pm1_init,
NULL,
lha_pm1_read,
sizeof(LHAPM1Decoder),
OUTPUT_BUFFER_SIZE,
2048
};

View file

@ -0,0 +1,549 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Decoder for PMarc -pm2- compression format. PMarc is a variant
// of LHA commonly used on the MSX computer architecture.
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "lha_decoder.h"
#include "bit_stream_reader.c"
#include "pma_common.c"
// Include tree decoder.
typedef uint8_t TreeElement;
#include "tree_decode.c"
// Size of the ring buffer (in bytes) used to store past history
// for copies.
#define RING_BUFFER_SIZE 8192
// Maximum number of bytes that might be placed in the output buffer
// from a single call to lha_pm2_decoder_read (largest copy size).
#define OUTPUT_BUFFER_SIZE 256
// Number of tree elements in the code tree.
#define CODE_TREE_ELEMENTS 65
// Number of tree elements in the offset tree.
#define OFFSET_TREE_ELEMENTS 17
typedef enum {
PM2_REBUILD_UNBUILT, // At start of stream
PM2_REBUILD_BUILD1, // After 1KiB
PM2_REBUILD_BUILD2, // After 2KiB
PM2_REBUILD_BUILD3, // After 4KiB
PM2_REBUILD_CONTINUING, // 8KiB onwards...
} PM2RebuildState;
typedef struct {
BitStreamReader bit_stream_reader;
// State of decode tree.
PM2RebuildState tree_state;
// Number of bytes until we initiate a tree rebuild.
size_t tree_rebuild_remaining;
// History ring buffer, for copies:
uint8_t ringbuf[RING_BUFFER_SIZE];
unsigned int ringbuf_pos;
// History linked list, for adaptively encoding byte values.
HistoryLinkedList history_list;
// Array representing the huffman tree used for representing
// code values. A given node of the tree has children
// code_tree[n] and code_tree[n + 1]. code_tree[0] is the
// root node.
TreeElement code_tree[CODE_TREE_ELEMENTS];
// If zero, we don't need an offset tree:
int need_offset_tree;
// Array representing huffman tree used to look up offsets.
// Same format as code_tree[].
TreeElement offset_tree[OFFSET_TREE_ELEMENTS];
} LHAPM2Decoder;
// Decode table for history value. Characters that appeared recently in
// the history are more likely than ones that appeared a long time ago,
// so the history value is huffman coded so that small values require
// fewer bits. The history value is then used to search within the
// history linked list to get the actual character.
static const VariableLengthTable history_decode[] = {
{ 0, 3 }, // 0 + (1 << 3) = 8
{ 8, 3 }, // 8 + (1 << 3) = 16
{ 16, 4 }, // 16 + (1 << 4) = 32
{ 32, 5 }, // 32 + (1 << 5) = 64
{ 64, 5 }, // 64 + (1 << 5) = 96
{ 96, 5 }, // 96 + (1 << 5) = 128
{ 128, 6 }, // 128 + (1 << 6) = 192
{ 192, 6 }, // 192 + (1 << 6) = 256
};
// Decode table for copies. As with history_decode[], small copies
// are more common, and require fewer bits.
static const VariableLengthTable copy_decode[] = {
{ 17, 3 }, // 17 + (1 << 3) = 25
{ 25, 3 }, // 25 + (1 << 3) = 33
{ 33, 5 }, // 33 + (1 << 5) = 65
{ 65, 6 }, // 65 + (1 << 6) = 129
{ 129, 7 }, // 129 + (1 << 7) = 256
{ 256, 0 }, // 256 (unique value)
};
// Initialize PMA decoder.
static int lha_pm2_decoder_init(void *data, LHADecoderCallback callback,
void *callback_data)
{
LHAPM2Decoder *decoder = data;
bit_stream_reader_init(&decoder->bit_stream_reader,
callback, callback_data);
// Tree has not been built yet. It needs to be built on
// the first call to read().
decoder->tree_state = PM2_REBUILD_UNBUILT;
decoder->tree_rebuild_remaining = 0;
// Initialize ring buffer contents.
memset(&decoder->ringbuf, ' ', RING_BUFFER_SIZE);
decoder->ringbuf_pos = 0;
// Init history lookup list.
init_history_list(&decoder->history_list);
// Initialize the lookup trees to a known state.
init_tree(decoder->code_tree, CODE_TREE_ELEMENTS);
init_tree(decoder->offset_tree, OFFSET_TREE_ELEMENTS);
return 1;
}
// Read the list of code lengths to use for the code tree and construct
// the code_tree structure.
static int read_code_tree(LHAPM2Decoder *decoder)
{
uint8_t code_lengths[31];
int num_codes, min_code_length, length_bits, val;
unsigned int i;
// Read the number of codes in the tree.
num_codes = read_bits(&decoder->bit_stream_reader, 5);
// Read min_code_length, which is used as an offset.
min_code_length = read_bits(&decoder->bit_stream_reader, 3);
if (min_code_length < 0 || num_codes < 0) {
return 0;
}
// Store flag variable indicating whether we want to read
// the offset tree as well.
decoder->need_offset_tree
= num_codes >= 10
&& !(num_codes == 29 && min_code_length == 0);
// Minimum length of zero means a tree containing a single code.
if (min_code_length == 0) {
set_tree_single(decoder->code_tree, num_codes - 1);
return 1;
}
// How many bits are used to represent each table entry?
length_bits = read_bits(&decoder->bit_stream_reader, 3);
if (length_bits < 0) {
return 0;
}
// Read table of code lengths:
for (i = 0; i < (unsigned int) num_codes; ++i) {
// Read a table entry. A value of zero represents an
// unused code. Otherwise the value represents
// an offset from the minimum length (previously read).
val = read_bits(&decoder->bit_stream_reader,
(unsigned int) length_bits);
if (val < 0) {
return 0;
} else if (val == 0) {
code_lengths[i] = 0;
} else {
code_lengths[i] = (uint8_t) (min_code_length + val - 1);
}
}
// Build the tree.
build_tree(decoder->code_tree, sizeof(decoder->code_tree),
code_lengths, (unsigned int) num_codes);
return 1;
}
// Read the code lengths for the offset tree and construct the offset
// tree lookup table.
static int read_offset_tree(LHAPM2Decoder *decoder,
unsigned int num_offsets)
{
uint8_t offset_lengths[8];
unsigned int off;
unsigned int single_offset, num_codes;
int len;
if (!decoder->need_offset_tree) {
return 1;
}
// Read 'num_offsets' 3-bit length values. For each offset
// value 'off', offset_lengths[off] is the length of the
// code that will represent 'off', or 0 if it will not
// appear within the tree.
num_codes = 0;
single_offset = 0;
for (off = 0; off < num_offsets; ++off) {
len = read_bits(&decoder->bit_stream_reader, 3);
if (len < 0) {
return 0;
}
offset_lengths[off] = (uint8_t) len;
// Track how many actual codes were in the tree.
if (len != 0) {
single_offset = off;
++num_codes;
}
}
// If there was a single code, this is a single node tree.
if (num_codes == 1) {
set_tree_single(decoder->offset_tree, single_offset);
return 1;
}
// Build the tree.
build_tree(decoder->offset_tree, sizeof(decoder->offset_tree),
offset_lengths, num_offsets);
return 1;
}
// Rebuild the decode trees used to compress data. This is called when
// decoder->tree_rebuild_remaining reaches zero.
static void rebuild_tree(LHAPM2Decoder *decoder)
{
switch (decoder->tree_state) {
// Initial tree build, from start of stream:
case PM2_REBUILD_UNBUILT:
read_code_tree(decoder);
read_offset_tree(decoder, 5);
decoder->tree_state = PM2_REBUILD_BUILD1;
decoder->tree_rebuild_remaining = 1024;
break;
// Tree rebuild after 1KiB of data has been read:
case PM2_REBUILD_BUILD1:
read_offset_tree(decoder, 6);
decoder->tree_state = PM2_REBUILD_BUILD2;
decoder->tree_rebuild_remaining = 1024;
break;
// Tree rebuild after 2KiB of data has been read:
case PM2_REBUILD_BUILD2:
read_offset_tree(decoder, 7);
decoder->tree_state = PM2_REBUILD_BUILD3;
decoder->tree_rebuild_remaining = 2048;
break;
// Tree rebuild after 4KiB of data has been read:
case PM2_REBUILD_BUILD3:
if (read_bit(&decoder->bit_stream_reader) == 1) {
read_code_tree(decoder);
}
read_offset_tree(decoder, 8);
decoder->tree_state = PM2_REBUILD_CONTINUING;
decoder->tree_rebuild_remaining = 4096;
break;
// Tree rebuild after 8KiB of data has been read,
// and every 4KiB after that:
case PM2_REBUILD_CONTINUING:
if (read_bit(&decoder->bit_stream_reader) == 1) {
read_code_tree(decoder);
read_offset_tree(decoder, 8);
}
decoder->tree_rebuild_remaining = 4096;
break;
}
}
static void output_byte(LHAPM2Decoder *decoder, uint8_t *buf,
size_t *buf_len, uint8_t b)
{
// Add to history ring buffer.
decoder->ringbuf[decoder->ringbuf_pos] = b;
decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
// Add to output buffer.
buf[*buf_len] = b;
++*buf_len;
// Update history chain.
update_history_list(&decoder->history_list, b);
// Count down until it is time to perform a rebuild of the
// lookup trees.
--decoder->tree_rebuild_remaining;
if (decoder->tree_rebuild_remaining == 0) {
rebuild_tree(decoder);
}
}
// Read a single byte from the input stream and add it to the output
// buffer.
static void read_single_byte(LHAPM2Decoder *decoder, unsigned int code,
uint8_t *buf, size_t *buf_len)
{
int offset;
uint8_t b;
offset = decode_variable_length(&decoder->bit_stream_reader,
history_decode, code);
if (offset < 0) {
return;
}
b = find_in_history_list(&decoder->history_list, (uint8_t) offset);
output_byte(decoder, buf, buf_len, b);
}
// Calculate how many bytes from history to copy:
static int history_get_count(LHAPM2Decoder *decoder, unsigned int code)
{
// How many bytes to copy? A small value represents the
// literal number of bytes to copy; larger values are a header
// for a variable length value to be decoded.
if (code < 15) {
return (int) code + 2;
} else {
return decode_variable_length(&decoder->bit_stream_reader,
copy_decode, code - 15);
}
}
// Calculate the offset within history at which to start copying:
static int history_get_offset(LHAPM2Decoder *decoder, unsigned int code)
{
unsigned int bits;
int result, val;
result = 0;
// Calculate number of bits to read.
// Code of zero indicates a simple 6-bit value giving the offset.
if (code == 0) {
bits = 6;
}
// Mid-range encoded offset value.
// Read a code using the offset tree, indicating the length
// of the offset value to follow. The code indicates the
// number of bits (values 0-7 = 6-13 bits).
else if (code < 20) {
val = read_from_tree(&decoder->bit_stream_reader,
decoder->offset_tree);
if (val < 0) {
return -1;
} else if (val == 0) {
bits = 6;
} else {
bits = (unsigned int) val + 5;
result = 1 << bits;
}
}
// Large copy values start from offset zero.
else {
return 0;
}
// Read a number of bits representing the offset value. The
// number of length of this value is variable, and is calculated
// above.
val = read_bits(&decoder->bit_stream_reader, bits);
if (val < 0) {
return -1;
}
result += val;
return result;
}
static void copy_from_history(LHAPM2Decoder *decoder, unsigned int code,
uint8_t *buf, size_t *buf_len)
{
int to_copy, offset;
unsigned int i, pos, start;
// Read number of bytes to copy and offset within history to copy
// from.
to_copy = history_get_count(decoder, code);
offset = history_get_offset(decoder, code);
if (to_copy < 0 || offset < 0) {
return;
}
// Sanity check to prevent the potential for buffer overflow.
if (to_copy > OUTPUT_BUFFER_SIZE) {
return;
}
// Perform copy.
start = decoder->ringbuf_pos + RING_BUFFER_SIZE - 1
- (unsigned int) offset;
for (i = 0; i < (unsigned int) to_copy; ++i) {
pos = (start + i) % RING_BUFFER_SIZE;
output_byte(decoder, buf, buf_len, decoder->ringbuf[pos]);
}
}
// Decode data and store it into buf[], returning the number of
// bytes decoded.
static size_t lha_pm2_decoder_read(void *data, uint8_t *buf)
{
LHAPM2Decoder *decoder = data;
size_t result;
int code;
// On first pass through, build initial lookup trees.
if (decoder->tree_state == PM2_REBUILD_UNBUILT) {
// First bit in stream is discarded?
read_bit(&decoder->bit_stream_reader);
rebuild_tree(decoder);
}
result = 0;
code = read_from_tree(&decoder->bit_stream_reader, decoder->code_tree);
if (code < 0) {
return 0;
}
if (code < 8) {
read_single_byte(decoder, (unsigned int) code, buf, &result);
} else {
copy_from_history(decoder, (unsigned int) code - 8,
buf, &result);
}
return result;
}
LHADecoderType lha_pm2_decoder = {
lha_pm2_decoder_init,
NULL,
lha_pm2_decoder_read,
sizeof(LHAPM2Decoder),
OUTPUT_BUFFER_SIZE,
RING_BUFFER_SIZE
};

View file

@ -0,0 +1,162 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//
// Common functions used by PMarc decoders.
//
typedef struct {
unsigned int offset;
unsigned int bits;
} VariableLengthTable;
// Read a variable length code, given the header bits already read.
// Returns the decoded value, or -1 for error.
static int decode_variable_length(BitStreamReader *reader,
const VariableLengthTable *table,
unsigned int header)
{
int value;
value = read_bits(reader, table[header].bits);
if (value < 0) {
return -1;
}
return (int) table[header].offset + value;
}
typedef struct {
uint8_t prev;
uint8_t next;
} HistoryNode;
// History linked list. In the decode stream, codes representing
// characters are not the character itself, but the number of
// nodes to count back in time in the linked list. Every time
// a character is output, it is moved to the front of the linked
// list. The entry point index into the list is the last output
// character, given by history_head;
typedef struct {
HistoryNode history[256];
uint8_t history_head;
} HistoryLinkedList;
// Initialize the history buffer.
static void init_history_list(HistoryLinkedList *list)
{
unsigned int i;
// History buffer is initialized to a linear chain to
// start off with:
for (i = 0; i < 256; ++i) {
list->history[i].prev = (uint8_t) (i + 1);
list->history[i].next = (uint8_t) (i - 1);
}
// The chain is cut into groups and initially arranged so
// that the ASCII characters are closest to the start of
// the chain. This is followed by ASCII control characters,
// then various other groups.
list->history_head = 0x20;
list->history[0x7f].prev = 0x00; // 0x20 ... 0x7f -> 0x00
list->history[0x00].next = 0x7f;
list->history[0x1f].prev = 0xa0; // 0x00 ... 0x1f -> 0xa0
list->history[0xa0].next = 0x1f;
list->history[0xdf].prev = 0x80; // 0xa0 ... 0xdf -> 0x80
list->history[0x80].next = 0xdf;
list->history[0x9f].prev = 0xe0; // 0x80 ... 0x9f -> 0xe0
list->history[0xe0].next = 0x9f;
list->history[0xff].prev = 0x20; // 0xe0 ... 0xff -> 0x20
list->history[0x20].next = 0xff;
}
// Look up an entry in the history list, returning the code found.
static uint8_t find_in_history_list(HistoryLinkedList *list, uint8_t count)
{
unsigned int i;
uint8_t code;
// Start from the last outputted byte.
code = list->history_head;
// Walk along the history chain until we reach the desired
// node. If we will have to walk more than half the chain,
// go the other way around.
if (count < 128) {
for (i = 0; i < count; ++i) {
code = list->history[code].prev;
}
} else {
for (i = 0; i < 256U - count; ++i) {
code = list->history[code].next;
}
}
return code;
}
// Update history list, by moving the specified byte to the head
// of the queue.
static void update_history_list(HistoryLinkedList *list, uint8_t b)
{
HistoryNode *node, *old_head;
// No update necessary?
if (list->history_head == b) {
return;
}
// Unhook the entry from its current position:
node = &list->history[b];
list->history[node->next].prev = node->prev;
list->history[node->prev].next = node->next;
// Hook in between the old head and old_head->next:
old_head = &list->history[list->history_head];
node->prev = list->history_head;
node->next = old_head->next;
list->history[old_head->next].prev = b;
old_head->next = b;
// 'b' is now the head of the queue:
list->history_head = b;
}

View file

@ -0,0 +1,9 @@
headerfilesdir=$(includedir)/liblhasa-1.0
headerfiles_HEADERS= \
lhasa.h \
lha_decoder.h \
lha_file_header.h \
lha_input_stream.h \
lha_reader.h

View file

@ -0,0 +1,183 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_PUBLIC_LHA_DECODER_H
#define LHASA_PUBLIC_LHA_DECODER_H
#include <stdlib.h>
#include <inttypes.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @file lha_decoder.h
*
* @brief Raw LHA data decoder.
*
* This file defines the interface to the decompression code, which can
* be used to decompress the raw compressed data from an LZH file.
*
* Implementations of the various compression algorithms used in LZH
* archives are provided - these are represented by the
* @ref LHADecoderType structure, and can be retrieved using the
* @ref lha_decoder_for_name function. One of these can then be passed to
* the @ref lha_decoder_new function to create a @ref LHADecoder structure
* and decompress the data.
*/
/**
* Opaque type representing a type of decoder.
*
* This is an implementation of the decompression code for one of the
* algorithms used in LZH archive files. Pointers to these structures are
* retrieved by using the @ref lha_decoder_for_name function.
*/
typedef struct _LHADecoderType LHADecoderType;
/**
* Opaque type representing an instance of a decoder.
*
* This is a decoder structure being used to decompress a stream of
* compressed data. Instantiated using the @ref lha_decoder_new
* function and freed using the @ref lha_decoder_free function.
*/
typedef struct _LHADecoder LHADecoder;
/**
* Callback function invoked when a decoder wants to read more compressed
* data.
*
* @param buf Pointer to the buffer in which to store the data.
* @param buf_len Size of the buffer, in bytes.
* @param user_data Extra pointer to pass to the decoder.
* @return Number of bytes read.
*/
typedef size_t (*LHADecoderCallback)(void *buf, size_t buf_len,
void *user_data);
/**
* Callback function used for monitoring decode progress.
* The callback is invoked for every block processed (block size depends on
* decode algorithm).
*
* @param num_blocks Number of blocks processed so far.
* @param total_blocks Total number of blocks to process.
* @paaram callback_data Extra user-specified data passed to the callback.
*/
typedef void (*LHADecoderProgressCallback)(unsigned int num_blocks,
unsigned int total_blocks,
void *callback_data);
/**
* Get the decoder type for the specified name.
*
* @param name String identifying the decoder type, for
* example, "-lh1-".
* @return Pointer to the decoder type, or NULL if there
* is no decoder type for the specified name.
*/
LHADecoderType *lha_decoder_for_name(char *name);
/**
* Allocate a new decoder for the specified type.
*
* @param dtype The decoder type.
* @param callback Callback function for the decoder to call to read
* more compressed data.
* @param callback_data Extra data to pass to the callback function.
* @param stream_length Length of the uncompressed data, in bytes. When
* this point is reached, decompression will stop.
* @return Pointer to the new decoder, or NULL for failure.
*/
LHADecoder *lha_decoder_new(LHADecoderType *dtype,
LHADecoderCallback callback,
void *callback_data,
size_t stream_length);
/**
* Free a decoder.
*
* @param decoder The decoder to free.
*/
void lha_decoder_free(LHADecoder *decoder);
/**
* Set a callback function to monitor decode progress.
*
* @param decoder The decoder.
* @param callback Callback function to monitor decode progress.
* @param callback_data Extra data to pass to the decoder.
*/
void lha_decoder_monitor(LHADecoder *decoder,
LHADecoderProgressCallback callback,
void *callback_data);
/**
* Decode (decompress) more data.
*
* @param decoder The decoder.
* @param buf Pointer to buffer to store decompressed data.
* @param buf_len Size of the buffer, in bytes.
* @return Number of bytes decompressed.
*/
size_t lha_decoder_read(LHADecoder *decoder, uint8_t *buf, size_t buf_len);
/**
* Get the current 16-bit CRC of the decompressed data.
*
* This should be called at the end of decompression to check that the
* data was extracted correctly, and the value compared against the CRC
* from the file header.
*
* @param decoder The decoder.
* @return 16-bit CRC of the data decoded so far.
*/
uint16_t lha_decoder_get_crc(LHADecoder *decoder);
/**
* Get the count of the number of bytes decoded.
*
* This should be called at the end of decompression, and the value
* compared against the file length from the file header.
*
* @param decoder The decoder.
* @return The number of decoded bytes.
*/
size_t lha_decoder_get_length(LHADecoder *decoder);
#ifdef __cplusplus
}
#endif
#endif /* #ifndef LHASA_LHA_DECODER_H */

View file

@ -0,0 +1,248 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_PUBLIC_LHA_FILE_HEADER_H
#define LHASA_PUBLIC_LHA_FILE_HEADER_H
#include <inttypes.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @file lha_file_header.h
*
* @brief LHA file header structure.
*
* This file contains the definition of the @ref LHAFileHeader structure,
* representing a decoded file header from an LZH file.
*/
/** OS type value for an unknown OS. */
#define LHA_OS_TYPE_UNKNOWN 0x00
/** OS type value for Microsoft MS/DOS. */
#define LHA_OS_TYPE_MSDOS 'M'
/** OS type value for Microsoft Windows 95. */
#define LHA_OS_TYPE_WIN95 'w'
/** OS type value for Microsoft Windows NT. */
#define LHA_OS_TYPE_WINNT 'W'
/** OS type value for Unix. */
#define LHA_OS_TYPE_UNIX 'U'
/** OS type value for IBM OS/2. */
#define LHA_OS_TYPE_OS2 '2'
/** OS type for Apple Mac OS (Classic). */
#define LHA_OS_TYPE_MACOS 'm'
/** OS type for Amiga OS. */
#define LHA_OS_TYPE_AMIGA 'A'
/** OS type for Atari TOS. */
#define LHA_OS_TYPE_ATARI 'a'
// Obscure:
/** OS type for Sun (Oracle) Java. */
#define LHA_OS_TYPE_JAVA 'J'
/** OS type for Digital Research CP/M. */
#define LHA_OS_TYPE_CPM 'C'
/** OS type for Digital Research FlexOS. */
#define LHA_OS_TYPE_FLEX 'F'
/** OS type for Runser (?). */
#define LHA_OS_TYPE_RUNSER 'R'
/** OS type for Fujitsu FM Towns OS. */
#define LHA_OS_TYPE_TOWNSOS 'T'
/** OS type for Microware OS-9. */
#define LHA_OS_TYPE_OS9 '9'
/** OS type for Microware OS-9/68k. */
#define LHA_OS_TYPE_OS9_68K 'K'
/** OS type for OS/386 (?). */
#define LHA_OS_TYPE_OS386 '3'
/** OS type for Sharp X68000 Human68K OS. */
#define LHA_OS_TYPE_HUMAN68K 'H'
/**
* Compression type for a stored directory. The same value is also
* used for Unix symbolic links.
*/
#define LHA_COMPRESS_TYPE_DIR "-lhd-"
/**
* Bit field value set in extra_flags to indicate that the
* Unix file permission header (0x50) was parsed.
*/
#define LHA_FILE_UNIX_PERMS 0x01
/**
* Bit field value set in extra_flags to indicate that the
* Unix UID/GID header (0x51) was parsed.
*/
#define LHA_FILE_UNIX_UID_GID 0x02
/**
* Bit field value set in extra_flags to indicate that the 'common
* header' extended header (0x00) was parsed, and the common_crc
* field has been set.
*/
#define LHA_FILE_COMMON_CRC 0x04
/**
* Bit field value set in extra_flags to indicate that the
* Windows time stamp header (0x41) was parsed, and the Windows
* FILETIME timestamp fields have been set.
*/
#define LHA_FILE_WINDOWS_TIMESTAMPS 0x08
/**
* Bit field value set in extra_flags to indicate that the OS-9
* permissions field is set.
*/
#define LHA_FILE_OS9_PERMS 0x10
typedef struct _LHAFileHeader LHAFileHeader;
#define LHA_FILE_HAVE_EXTRA(header, flag) \
(((header)->extra_flags & (flag)) != 0)
/**
* Structure containing a decoded LZH file header.
*
* A file header precedes the compressed data of each file stored
* within an LZH archive. It contains the name of the file, and
* various additional metadata, some of which is optional, and
* can depend on the header format, the tool used to create the
* archive, and the operating system on which it was created.
*/
struct _LHAFileHeader {
// Internal fields, do not touch!
unsigned int _refcount;
LHAFileHeader *_next;
/**
* Stored path, with Unix-style ('/') path separators.
*
* This may be NULL, although if this is a directory
* (@ref LHA_COMPRESS_TYPE_DIR), it is never NULL.
*/
char *path;
/**
* File name.
*
* This is never NULL, except if this is a directory
* (@ref LHA_COMPRESS_TYPE_DIR), where it is always NULL.
*/
char *filename;
/**
* Target for symbolic link.
*
* This is NULL unless this header represents a symbolic link
* (@ref LHA_COMPRESS_TYPE_DIR).
*/
char *symlink_target;
/**
* Compression method.
*
* If the header represents a directory or a symbolic link, the
* compression method is equal to @ref LHA_COMPRESS_TYPE_DIR.
*/
char compress_method[6];
/** Length of the compressed data. */
size_t compressed_length;
/** Length of the uncompressed data. */
size_t length;
/** LZH header format used to store this header. */
uint8_t header_level;
/**
* OS type indicator, identifying the OS on which
* the archive was created.
*/
uint8_t os_type;
/** 16-bit CRC of the compressed data. */
uint16_t crc;
/** Unix timestamp of the modification time of the file. */
unsigned int timestamp;
/** Pointer to a buffer containing the raw header data. */
uint8_t *raw_data;
/** Length of the raw header data. */
size_t raw_data_len;
/**
* Flags bitfield identifying extra data decoded from extended
* headers.
*/
unsigned int extra_flags;
/** Unix permissions, set if @ref LHA_FILE_UNIX_PERMS is set. */
unsigned int unix_perms;
/** Unix user ID, set if @ref LHA_FILE_UNIX_UID_GID is set. */
unsigned int unix_uid;
/** Unix group ID, set if @ref LHA_FILE_UNIX_UID_GID is set. */
unsigned int unix_gid;
/** OS-9 permissions, set if @ref LHA_FILE_OS9_PERMS is set. */
unsigned int os9_perms;
/** Unix username. */
char *unix_username;
/** Unix group name. */
char *unix_group;
/** 16-bit CRC of header contents. */
uint16_t common_crc;
/**
* Windows FILETIME file creation time, set if
* @ref LHA_FILE_WINDOWS_TIMESTAMPS is set.
*/
uint64_t win_creation_time;
/**
* Windows FILETIME file modification time, set if
* @ref LHA_FILE_WINDOWS_TIMESTAMPS is set.
*/
uint64_t win_modification_time;
/**
* Windows FILETIME file access time, set if
* @ref LHA_FILE_WINDOWS_TIMESTAMPS is set.
*/
uint64_t win_access_time;
};
#ifdef __cplusplus
}
#endif
#endif /* #ifndef LHASA_PUBLIC_LHA_FILE_HEADER_H */

View file

@ -0,0 +1,134 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_PUBLIC_LHA_INPUT_STREAM_H
#define LHASA_PUBLIC_LHA_INPUT_STREAM_H
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @file lha_input_stream.h
*
* @brief LHA input stream structure.
*
* This file defines the functions relating to the @ref LHAInputStream
* structure, used to read data from an LZH file.
*/
/**
* Opaque structure, representing an input stream used to read data from
* an LZH file.
*/
typedef struct _LHAInputStream LHAInputStream;
/**
* Structure containing pointers to callback functions to read data from
* the input stream.
*/
typedef struct {
/**
* Read a block of data into the specified buffer.
*
* @param handle Handle pointer.
* @param buf Pointer to buffer in which to store read data.
* @param buf_len Size of buffer, in bytes.
* @return Number of bytes read, or -1 for error.
*/
int (*read)(void *handle, void *buf, size_t buf_len);
/**
* Skip the specified number of bytes from the input stream.
* This is an optional function.
*
* @param handle Handle pointer.
* @param bytes Number of bytes to skip.
* @return Non-zero for success, or zero for failure.
*/
int (*skip)(void *handle, size_t bytes);
/**
* Close the input stream.
*
* @param handle Handle pointer.
*/
void (*close)(void *handle);
} LHAInputStreamType;
/**
* Create new @ref LHAInputStream structure, using a set of generic functions
* to provide LHA data.
*
* @param type Pointer to a @ref LHAInputStreamType structure
* containing callback functions to read data.
* @param handle Handle pointer to be passed to callback functions.
* @return Pointer to a new @ref LHAInputStream or NULL for error.
*/
LHAInputStream *lha_input_stream_new(const LHAInputStreamType *type,
void *handle);
/**
* Create new @ref LHAInputStream, reading from the specified filename.
* The file is automatically closed when the input stream is freed.
*
* @param filename Name of the file to read from.
* @return Pointer to a new @ref LHAInputStream or NULL for error.
*/
LHAInputStream *lha_input_stream_from(char *filename);
/**
* Create new @ref LHAInputStream, to read from an already-open FILE pointer.
* The FILE is not closed when the input stream is freed; the calling code
* must close it.
*
* @param stream The open FILE structure from which to read data.
* @return Pointer to a new @ref LHAInputStream or NULL for error.
*/
LHAInputStream *lha_input_stream_from_FILE(FILE *stream);
/**
* Free an @ref LHAInputStream structure.
*
* @param stream The input stream.
*/
void lha_input_stream_free(LHAInputStream *stream);
#ifdef __cplusplus
}
#endif
#endif /* #ifndef LHASA_PUBLIC_LHA_INPUT_STREAM_H */

View file

@ -0,0 +1,217 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_PUBLIC_LHA_READER_H
#define LHASA_PUBLIC_LHA_READER_H
#include "lha_decoder.h"
#include "lha_input_stream.h"
#include "lha_file_header.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* @file lha_reader.h
*
* @brief LHA file reader.
*
* This file contains the interface functions for the @ref LHAReader
* structure, used to decode data from a compressed LZH file and
* extract compressed files.
*/
/**
* Opaque structure used to decode the contents of an LZH file.
*/
typedef struct _LHAReader LHAReader;
/**
* Policy for extracting directories.
*
* When extracting a directory, some of the metadata associated with
* it needs to be set after the contents of the directory have been
* extracted. This includes the modification time (which would
* otherwise be reset to the current time) and the permissions (which
* can affect the ability to extract files into the directory).
* To work around this problem there are several ways of handling
* directory extraction.
*/
typedef enum {
/**
* "Plain" policy. In this mode, the metadata is set at the
* same time that the directory is created. This is the
* simplest to comprehend, and the files returned from
* @ref lha_reader_next_file will match the files in the
* archive, but it is not recommended.
*/
LHA_READER_DIR_PLAIN,
/**
* "End of directory" policy. In this mode, if a directory
* is extracted, the directory name will be saved. Once the
* contents of the directory appear to have been extracted
* (ie. a file is found that is not within the directory),
* the directory will be returned again by
* @ref lha_reader_next_file. This time, when the directory
* is "extracted" (via @ref lha_reader_extract), the metadata
* will be set.
*
* This method uses less memory than
* @ref LHA_READER_DIR_END_OF_FILE, but there is the risk
* that a file will appear within the archive after the
* metadata has been set for the directory. However, this is
* not normally the case, as files and directories typically
* appear within an archive in order. GNU tar uses the same
* method to address this problem with tar files.
*
* This is the default policy.
*/
LHA_READER_DIR_END_OF_DIR,
/**
* "End of file" policy. In this mode, each directory that
* is extracted is recorded in a list. When the end of the
* archive is reached, these directories are returned again by
* @ref lha_reader_next_file. When the directories are
* "extracted" again (via @ref lha_reader_extract), the
* metadata is set.
*
* This avoids the problems that can potentially occur with
* @ref LHA_READER_DIR_END_OF_DIR, but uses more memory.
*/
LHA_READER_DIR_END_OF_FILE
} LHAReaderDirPolicy;
/**
* Create a new @ref LHAReader to read data from an @ref LHAInputStream.
*
* @param stream The input stream to read data from.
* @return Pointer to a new @ref LHAReader structure,
* or NULL for error.
*/
LHAReader *lha_reader_new(LHAInputStream *stream);
/**
* Free a @ref LHAReader structure.
*
* @param reader The @ref LHAReader structure.
*/
void lha_reader_free(LHAReader *reader);
/**
* Set the @ref LHAReaderDirPolicy used to extract directories.
*
* @param reader The @ref LHAReader structure.
* @param policy The policy to use for directories.
*/
void lha_reader_set_dir_policy(LHAReader *reader,
LHAReaderDirPolicy policy);
/**
* Read the header of the next archived file from the input stream.
*
* @param reader The @ref LHAReader structure.
* @return Pointer to an @ref LHAFileHeader structure, or NULL if
* an error occurred. This pointer is only valid until
* the next time that lha_reader_next_file is called.
*/
LHAFileHeader *lha_reader_next_file(LHAReader *reader);
/**
* Read some of the (decompresed) data for the current archived file,
* decompressing as appropriate.
*
* @param reader The @ref LHAReader structure.
* @param buf Pointer to a buffer in which to store the data.
* @param buf_len Size of the buffer, in bytes.
* @return Number of bytes stored in the buffer, or zero if
* there is no more data to decompress.
*/
size_t lha_reader_read(LHAReader *reader, void *buf, size_t buf_len);
/**
* Decompress the contents of the current archived file, and check
* that the checksum matches correctly.
*
* @param reader The @ref LHAReader structure.
* @param callback Callback function to invoke to monitor progress (or
* NULL if progress does not need to be monitored).
* @param callback_data Extra data to pass to the callback function.
* @return Non-zero if the checksum matches.
*/
int lha_reader_check(LHAReader *reader,
LHADecoderProgressCallback callback,
void *callback_data);
/**
* Extract the contents of the current archived file.
*
* @param reader The @ref LHAReader structure.
* @param filename Filename to extract the archived file to, or NULL
* to use the path and filename from the header.
* @param callback Callback function to invoke to monitor progress (or
* NULL if progress does not need to be monitored).
* @param callback_data Extra data to pass to the callback function.
* @return Non-zero for success, or zero for failure (including
* CRC error).
*/
int lha_reader_extract(LHAReader *reader,
char *filename,
LHADecoderProgressCallback callback,
void *callback_data);
/**
* Check if the current file (last returned by @ref lha_reader_next_file)
* was generated internally by the extract process. This occurs when a
* directory or symbolic link must be created as a two-stage process, with
* some of the extraction process deferred to later in the stream.
*
* These "fake" duplicates should usually be hidden in the user interface
* when a summary of extraction is presented.
*
* @param reader The @ref LHAReader structure.
* @return Non-zero if the current file is a "fake", or zero
* for a normal file.
*/
int lha_reader_current_is_fake(LHAReader *reader);
#ifdef __cplusplus
}
#endif
#endif /* #ifndef LHASA_PUBLIC_LHA_READER_H */

View file

@ -0,0 +1,30 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LHASA_PUBLIC_LHASA_H
#define LHASA_PUBLIC_LHASA_H
#include "lha_decoder.h"
#include "lha_file_header.h"
#include "lha_input_stream.h"
#include "lha_reader.h"
#endif /* #ifndef LHASA_PUBLIC_LHASA_H */

View file

@ -0,0 +1,252 @@
/*
Copyright (c) 2011, 2012, Simon Howard
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Common tree decoding code.
//
// A recurring feature used by the different LHA algorithms is to
// encode a set of codes, which have varying bit lengths. This is
// implemented using a binary tree, stored inside an array of
// elements.
//
// This file is implemented as a "template" file to be #include-d by
// other files. The typedef for TreeElement must be defined before
// include.
// Upper bit is set in a node value to indicate a leaf.
#define TREE_NODE_LEAF (TreeElement) (1 << (sizeof(TreeElement) * 8 - 1))
// Structure used to hold data needed to build the tree.
typedef struct {
// The tree data and its size (must not be exceeded)
TreeElement *tree;
unsigned int tree_len;
// Counter used to allocate entries from the tree.
// Every time a new node is allocated, this increase by 2.
unsigned int tree_allocated;
// The next tree entry.
// As entries are allocated sequentially, the range from
// next_entry..tree_allocated-1 constitutes the indices into
// the tree that are available to be filled in. By the
// end of the tree build, next_entry should = tree_allocated.
unsigned int next_entry;
} TreeBuildData;
// Initialize all elements of the given tree to a good initial state.
static void init_tree(TreeElement *tree, size_t tree_len)
{
unsigned int i;
for (i = 0; i < tree_len; ++i) {
tree[i] = TREE_NODE_LEAF;
}
}
// Set tree to always decode to a single code.
static void set_tree_single(TreeElement *tree, TreeElement code)
{
tree[0] = (TreeElement) code | TREE_NODE_LEAF;
}
// "Expand" the list of queue entries. This generates a new child
// node at each of the entries currently in the queue, adding the
// children of those nodes into the queue to replace them.
// The effect of this is to add an extra level to the tree, and
// to increase the tree depth of the indices in the queue.
static void expand_queue(TreeBuildData *build)
{
unsigned int end_offset;
unsigned int new_nodes;
// Sanity check that there is enough space in the tree for
// all the new nodes.
new_nodes = (build->tree_allocated - build->next_entry) * 2;
if (build->tree_allocated + new_nodes > build->tree_len) {
return;
}
// Go through all entries currently in the allocated range, and
// allocate a subnode for each.
end_offset = build->tree_allocated;
while (build->next_entry < end_offset) {
build->tree[build->next_entry] = build->tree_allocated;
build->tree_allocated += 2;
++build->next_entry;
}
}
// Read the next entry from the queue of entries waiting to be used.
static unsigned int read_next_entry(TreeBuildData *build)
{
unsigned int result;
// Sanity check.
if (build->next_entry >= build->tree_allocated) {
return 0;
}
result = build->next_entry;
++build->next_entry;
return result;
}
// Add all codes to the tree that have the specified length.
// Returns non-zero if there are any entries in code_lengths[] still
// waiting to be added to the tree.
static int add_codes_with_length(TreeBuildData *build,
uint8_t *code_lengths,
unsigned int num_code_lengths,
unsigned int code_len)
{
unsigned int i;
unsigned int node;
int codes_remaining;
codes_remaining = 0;
for (i = 0; i < num_code_lengths; ++i) {
// Does this code belong at this depth in the tree?
if (code_lengths[i] == code_len) {
node = read_next_entry(build);
build->tree[node] = (TreeElement) i | TREE_NODE_LEAF;
}
// More work to be done after this pass?
else if (code_lengths[i] > code_len) {
codes_remaining = 1;
}
}
return codes_remaining;
}
// Build a tree, given the specified array of codes indicating the
// required depth within the tree at which each code should be
// located.
static void build_tree(TreeElement *tree, size_t tree_len,
uint8_t *code_lengths, unsigned int num_code_lengths)
{
TreeBuildData build;
unsigned int code_len;
build.tree = tree;
build.tree_len = tree_len;
// Start with a single entry in the queue - the root node
// pointer.
build.next_entry = 0;
// We always have the root ...
build.tree_allocated = 1;
// Iterate over each possible code length.
// Note: code_len == 0 is deliberately skipped over, as 0
// indicates "not used".
code_len = 0;
do {
// Advance to the next code length by allocating extra
// nodes to the tree - the slots waiting in the queue
// will now be one level deeper in the tree (and the
// codes 1 bit longer).
expand_queue(&build);
++code_len;
// Add all codes that have this length.
} while (add_codes_with_length(&build, code_lengths,
num_code_lengths, code_len));
}
/*
static void display_tree(TreeElement *tree, unsigned int node, int offset)
{
unsigned int i;
if (node & TREE_NODE_LEAF) {
for (i = 0; i < offset; ++i) putchar(' ');
printf("leaf %i\n", node & ~TREE_NODE_LEAF);
} else {
for (i = 0; i < offset; ++i) putchar(' ');
printf("0 ->\n");
display_tree(tree, tree[node], offset + 4);
for (i = 0; i < offset; ++i) putchar(' ');
printf("1 ->\n");
display_tree(tree, tree[node + 1], offset + 4);
}
}
*/
// Read bits from the input stream, traversing the specified tree
// from the root node until we reach a leaf. The leaf value is
// returned.
static int read_from_tree(BitStreamReader *reader, TreeElement *tree)
{
TreeElement code;
int bit;
// Start from root.
code = tree[0];
while ((code & TREE_NODE_LEAF) == 0) {
bit = read_bit(reader);
if (bit < 0) {
return -1;
}
code = tree[code + (unsigned int) bit];
}
// Mask off leaf bit to get the plain code.
return (int) (code & ~TREE_NODE_LEAF);
}