rbutil: Update libmspack to 0.10.1alpha.

Update to the most recent release. Fix name / include clashes, as has
been done before.

Change-Id: Ia712bb2b5f4b9018b65a46b8bdd04ba42363be8b
This commit is contained in:
Dominik Riebeling 2020-06-08 21:44:02 +02:00
parent b0f22620a2
commit 729b6e4f33
19 changed files with 2066 additions and 1527 deletions

View file

@ -1,6 +1,6 @@
This folder contains the mspack project for MS files compression/decompression. This folder contains the mspack project for MS files compression/decompression.
These files are distributed under the LGPL. These files are distributed under the LGPL.
The source files have been last synced with libmspack-0.3alpha
http://sourceforge.net/projects/libmspack/on January 28, 2013
The source files have been last synced with libmspack-0.10.1alpha
https://www.cabextract.org.uk/libmspack/ on June 8, 2020

View file

@ -1,5 +1,5 @@
/* This file is part of libmspack. /* This file is part of libmspack.
* (C) 2003-2004 Stuart Caie. * (C) 2003-2018 Stuart Caie.
* *
* libmspack is free software; you can redistribute it and/or modify it under * libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1 * the terms of the GNU Lesser General Public License (LGPL) version 2.1
@ -10,10 +10,6 @@
#ifndef MSPACK_CAB_H #ifndef MSPACK_CAB_H
#define MSPACK_CAB_H 1 #define MSPACK_CAB_H 1
#include "mszip.h"
#include "qtm.h"
#include "lzx.h"
/* generic CAB definitions */ /* generic CAB definitions */
/* structure offsets */ /* structure offsets */
@ -70,6 +66,22 @@
#define CAB_BLOCKMAX (32768) #define CAB_BLOCKMAX (32768)
#define CAB_INPUTMAX (CAB_BLOCKMAX+6144) #define CAB_INPUTMAX (CAB_BLOCKMAX+6144)
/* input buffer needs to be CAB_INPUTMAX + 1 byte to allow for max-sized block
* plus 1 trailer byte added by cabd_sys_read_block() for Quantum alignment.
*
* When MSCABD_PARAM_SALVAGE is set, block size is not checked so can be
* up to 65535 bytes, so max input buffer size needed is 65535 + 1
*/
#define CAB_INPUTMAX_SALVAGE (65535)
#define CAB_INPUTBUF (CAB_INPUTMAX_SALVAGE + 1)
/* There are no more than 65535 data blocks per folder, so a folder cannot
* be more than 32768*65535 bytes in length. As files cannot span more than
* one folder, this is also their max offset, length and offset+length limit.
*/
#define CAB_FOLDERMAX (65535)
#define CAB_LENGTHMAX (CAB_BLOCKMAX * CAB_FOLDERMAX)
/* CAB compression definitions */ /* CAB compression definitions */
struct mscab_compressor_p { struct mscab_compressor_p {
@ -85,6 +97,7 @@ struct mscabd_decompress_state {
struct mscabd_folder_data *data; /* current folder split we're in */ struct mscabd_folder_data *data; /* current folder split we're in */
unsigned int offset; /* uncompressed offset within folder */ unsigned int offset; /* uncompressed offset within folder */
unsigned int block; /* which block are we decompressing? */ unsigned int block; /* which block are we decompressing? */
off_t outlen; /* cumulative sum of block output sizes */
struct mspack_system sys; /* special I/O code for decompressor */ struct mspack_system sys; /* special I/O code for decompressor */
int comp_type; /* type of compression used by folder */ int comp_type; /* type of compression used by folder */
int (*decompress)(void *, off_t); /* decompressor code */ int (*decompress)(void *, off_t); /* decompressor code */
@ -93,14 +106,14 @@ struct mscabd_decompress_state {
struct mspack_file *infh; /* input file handle */ struct mspack_file *infh; /* input file handle */
struct mspack_file *outfh; /* output file handle */ struct mspack_file *outfh; /* output file handle */
unsigned char *i_ptr, *i_end; /* input data consumed, end */ unsigned char *i_ptr, *i_end; /* input data consumed, end */
unsigned char input[CAB_INPUTMAX]; /* one input block of data */ unsigned char input[CAB_INPUTBUF]; /* one input block of data */
}; };
struct mscab_decompressor_p { struct mscab_decompressor_p {
struct mscab_decompressor base; struct mscab_decompressor base;
struct mscabd_decompress_state *d; struct mscabd_decompress_state *d;
struct mspack_system *system; struct mspack_system *system;
int param[3]; /* !!! MATCH THIS TO NUM OF PARAMS IN MSPACK.H !!! */ int buf_size, searchbuf_size, fix_mszip, salvage; /* params */
int error, read_error; int error, read_error;
}; };

View file

@ -1,5 +1,5 @@
/* This file is part of libmspack. /* This file is part of libmspack.
* (C) 2003-2011 Stuart Caie. * (C) 2003-2018 Stuart Caie.
* *
* libmspack is free software; you can redistribute it and/or modify it under * libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1 * the terms of the GNU Lesser General Public License (LGPL) version 2.1
@ -23,7 +23,9 @@
#include "system-mspack.h" #include "system-mspack.h"
#include "cab.h" #include "cab.h"
#include <assert.h> #include "mszip.h"
#include "lzx.h"
#include "qtm.h"
/* Notes on compliance with cabinet specification: /* Notes on compliance with cabinet specification:
* *
@ -72,10 +74,9 @@ static void cabd_close(
struct mscab_decompressor *base, struct mscabd_cabinet *origcab); struct mscab_decompressor *base, struct mscabd_cabinet *origcab);
static int cabd_read_headers( static int cabd_read_headers(
struct mspack_system *sys, struct mspack_file *fh, struct mspack_system *sys, struct mspack_file *fh,
struct mscabd_cabinet_p *cab, off_t offset, int quiet); struct mscabd_cabinet_p *cab, off_t offset, int salvage, int quiet);
static char *cabd_read_string( static char *cabd_read_string(
struct mspack_system *sys, struct mspack_file *fh, struct mspack_system *sys, struct mspack_file *fh, int *error);
struct mscabd_cabinet_p *cab, int *error);
static struct mscabd_cabinet *cabd_search( static struct mscabd_cabinet *cabd_search(
struct mscab_decompressor *base, const char *filename); struct mscab_decompressor *base, const char *filename);
@ -110,7 +111,7 @@ static int cabd_sys_write(
struct mspack_file *file, void *buffer, int bytes); struct mspack_file *file, void *buffer, int bytes);
static int cabd_sys_read_block( static int cabd_sys_read_block(
struct mspack_system *sys, struct mscabd_decompress_state *d, int *out, struct mspack_system *sys, struct mscabd_decompress_state *d, int *out,
int ignore_cksum); int ignore_cksum, int ignore_blocksize);
static unsigned int cabd_checksum( static unsigned int cabd_checksum(
unsigned char *data, unsigned int bytes, unsigned int cksum); unsigned char *data, unsigned int bytes, unsigned int cksum);
static struct noned_state *noned_init( static struct noned_state *noned_init(
@ -155,9 +156,10 @@ struct mscab_decompressor *
self->d = NULL; self->d = NULL;
self->error = MSPACK_ERR_OK; self->error = MSPACK_ERR_OK;
self->param[MSCABD_PARAM_SEARCHBUF] = 32768; self->searchbuf_size = 32768;
self->param[MSCABD_PARAM_FIXMSZIP] = 0; self->fix_mszip = 0;
self->param[MSCABD_PARAM_DECOMPBUF] = 4096; self->buf_size = 4096;
self->salvage = 0;
} }
return (struct mscab_decompressor *) self; return (struct mscab_decompressor *) self;
} }
@ -171,9 +173,9 @@ void mspack_destroy_cab_decompressor(struct mscab_decompressor *base) {
struct mscab_decompressor_p *self = (struct mscab_decompressor_p *) base; struct mscab_decompressor_p *self = (struct mscab_decompressor_p *) base;
if (self) { if (self) {
struct mspack_system *sys = self->system; struct mspack_system *sys = self->system;
cabd_free_decomp(self);
if (self->d) { if (self->d) {
if (self->d->infh) sys->close(self->d->infh); if (self->d->infh) sys->close(self->d->infh);
cabd_free_decomp(self);
sys->free(self->d); sys->free(self->d);
} }
sys->free(self); sys->free(self);
@ -201,7 +203,7 @@ static struct mscabd_cabinet *cabd_open(struct mscab_decompressor *base,
if ((fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ))) { if ((fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ))) {
if ((cab = (struct mscabd_cabinet_p *) sys->alloc(sys, sizeof(struct mscabd_cabinet_p)))) { if ((cab = (struct mscabd_cabinet_p *) sys->alloc(sys, sizeof(struct mscabd_cabinet_p)))) {
cab->base.filename = filename; cab->base.filename = filename;
error = cabd_read_headers(sys, fh, cab, (off_t) 0, 0); error = cabd_read_headers(sys, fh, cab, (off_t) 0, self->salvage, 0);
if (error) { if (error) {
cabd_close(base, (struct mscabd_cabinet *) cab); cabd_close(base, (struct mscabd_cabinet *) cab);
cab = NULL; cab = NULL;
@ -306,9 +308,9 @@ static void cabd_close(struct mscab_decompressor *base,
static int cabd_read_headers(struct mspack_system *sys, static int cabd_read_headers(struct mspack_system *sys,
struct mspack_file *fh, struct mspack_file *fh,
struct mscabd_cabinet_p *cab, struct mscabd_cabinet_p *cab,
off_t offset, int quiet) off_t offset, int salvage, int quiet)
{ {
int num_folders, num_files, folder_resv, i, x; int num_folders, num_files, folder_resv, i, x, err, fidx;
struct mscabd_folder_p *fol, *linkfol = NULL; struct mscabd_folder_p *fol, *linkfol = NULL;
struct mscabd_file *file, *linkfile = NULL; struct mscabd_file *file, *linkfile = NULL;
unsigned char buf[64]; unsigned char buf[64];
@ -364,6 +366,7 @@ static int cabd_read_headers(struct mspack_system *sys,
/* read the reserved-sizes part of header, if present */ /* read the reserved-sizes part of header, if present */
cab->base.flags = EndGetI16(&buf[cfhead_Flags]); cab->base.flags = EndGetI16(&buf[cfhead_Flags]);
if (cab->base.flags & cfheadRESERVE_PRESENT) { if (cab->base.flags & cfheadRESERVE_PRESENT) {
if (sys->read(fh, &buf[0], cfheadext_SIZEOF) != cfheadext_SIZEOF) { if (sys->read(fh, &buf[0], cfheadext_SIZEOF) != cfheadext_SIZEOF) {
return MSPACK_ERR_READ; return MSPACK_ERR_READ;
@ -391,14 +394,18 @@ static int cabd_read_headers(struct mspack_system *sys,
/* read name and info of preceeding cabinet in set, if present */ /* read name and info of preceeding cabinet in set, if present */
if (cab->base.flags & cfheadPREV_CABINET) { if (cab->base.flags & cfheadPREV_CABINET) {
cab->base.prevname = cabd_read_string(sys, fh, cab, &x); if (x) return x; cab->base.prevname = cabd_read_string(sys, fh, &err);
cab->base.previnfo = cabd_read_string(sys, fh, cab, &x); if (x) return x; if (err) return err;
cab->base.previnfo = cabd_read_string(sys, fh, &err);
if (err) return err;
} }
/* read name and info of next cabinet in set, if present */ /* read name and info of next cabinet in set, if present */
if (cab->base.flags & cfheadNEXT_CABINET) { if (cab->base.flags & cfheadNEXT_CABINET) {
cab->base.nextname = cabd_read_string(sys, fh, cab, &x); if (x) return x; cab->base.nextname = cabd_read_string(sys, fh, &err);
cab->base.nextinfo = cabd_read_string(sys, fh, cab, &x); if (x) return x; if (err) return err;
cab->base.nextinfo = cabd_read_string(sys, fh, &err);
if (err) return err;
} }
/* read folders */ /* read folders */
@ -447,25 +454,24 @@ static int cabd_read_headers(struct mspack_system *sys,
file->offset = EndGetI32(&buf[cffile_FolderOffset]); file->offset = EndGetI32(&buf[cffile_FolderOffset]);
/* set folder pointer */ /* set folder pointer */
x = EndGetI16(&buf[cffile_FolderIndex]); fidx = EndGetI16(&buf[cffile_FolderIndex]);
if (x < cffileCONTINUED_FROM_PREV) { if (fidx < cffileCONTINUED_FROM_PREV) {
/* normal folder index; count up to the correct folder. the folder /* normal folder index; count up to the correct folder */
* pointer will be NULL if folder index is invalid */ if (fidx < num_folders) {
struct mscabd_folder *ifol = cab->base.folders; struct mscabd_folder *ifol = cab->base.folders;
while (x--) if (ifol) ifol = ifol->next; while (fidx--) if (ifol) ifol = ifol->next;
file->folder = ifol; file->folder = ifol;
}
if (!ifol) { else {
sys->free(file);
D(("invalid folder index")) D(("invalid folder index"))
return MSPACK_ERR_DATAFORMAT; file->folder = NULL;
} }
} }
else { else {
/* either CONTINUED_TO_NEXT, CONTINUED_FROM_PREV or /* either CONTINUED_TO_NEXT, CONTINUED_FROM_PREV or
* CONTINUED_PREV_AND_NEXT */ * CONTINUED_PREV_AND_NEXT */
if ((x == cffileCONTINUED_TO_NEXT) || if ((fidx == cffileCONTINUED_TO_NEXT) ||
(x == cffileCONTINUED_PREV_AND_NEXT)) (fidx == cffileCONTINUED_PREV_AND_NEXT))
{ {
/* get last folder */ /* get last folder */
struct mscabd_folder *ifol = cab->base.folders; struct mscabd_folder *ifol = cab->base.folders;
@ -477,8 +483,8 @@ static int cabd_read_headers(struct mspack_system *sys,
if (!fol->merge_next) fol->merge_next = file; if (!fol->merge_next) fol->merge_next = file;
} }
if ((x == cffileCONTINUED_FROM_PREV) || if ((fidx == cffileCONTINUED_FROM_PREV) ||
(x == cffileCONTINUED_PREV_AND_NEXT)) (fidx == cffileCONTINUED_PREV_AND_NEXT))
{ {
/* get first folder */ /* get first folder */
file->folder = cab->base.folders; file->folder = cab->base.folders;
@ -502,10 +508,14 @@ static int cabd_read_headers(struct mspack_system *sys,
file->date_y = (x >> 9) + 1980; file->date_y = (x >> 9) + 1980;
/* get filename */ /* get filename */
file->filename = cabd_read_string(sys, fh, cab, &x); file->filename = cabd_read_string(sys, fh, &err);
if (x) {
/* if folder index or filename are bad, either skip it or fail */
if (err || !file->folder) {
sys->free(file->filename);
sys->free(file); sys->free(file);
return x; if (salvage) continue;
return err ? err : MSPACK_ERR_DATAFORMAT;
} }
/* link file entry into file list */ /* link file entry into file list */
@ -514,23 +524,34 @@ static int cabd_read_headers(struct mspack_system *sys,
linkfile = file; linkfile = file;
} }
if (cab->base.files == NULL) {
/* We never actually added any files to the file list. Something went wrong.
* The file header may have been invalid */
D(("No files found, even though header claimed to have %d files", num_files))
return MSPACK_ERR_DATAFORMAT;
}
return MSPACK_ERR_OK; return MSPACK_ERR_OK;
} }
static char *cabd_read_string(struct mspack_system *sys, static char *cabd_read_string(struct mspack_system *sys,
struct mspack_file *fh, struct mspack_file *fh, int *error)
struct mscabd_cabinet_p *cab, int *error)
{ {
off_t base = sys->tell(fh); off_t base = sys->tell(fh);
char buf[256], *str; char buf[256], *str;
unsigned int len, i, ok; int len, i, ok;
(void)cab;
/* read up to 256 bytes */ /* read up to 256 bytes */
len = sys->read(fh, &buf[0], 256); if ((len = sys->read(fh, &buf[0], 256)) <= 0) {
*error = MSPACK_ERR_READ;
return NULL;
}
/* search for a null terminator in the buffer */ /* search for a null terminator in the buffer */
for (i = 0, ok = 0; i < len; i++) if (!buf[i]) { ok = 1; break; } for (i = 0, ok = 0; i < len; i++) if (!buf[i]) { ok = 1; break; }
/* reject empty strings */
if (i == 0) ok = 0;
if (!ok) { if (!ok) {
*error = MSPACK_ERR_DATAFORMAT; *error = MSPACK_ERR_DATAFORMAT;
return NULL; return NULL;
@ -579,7 +600,7 @@ static struct mscabd_cabinet *cabd_search(struct mscab_decompressor *base,
sys = self->system; sys = self->system;
/* allocate a search buffer */ /* allocate a search buffer */
search_buf = (unsigned char *) sys->alloc(sys, (size_t) self->param[MSCABD_PARAM_SEARCHBUF]); search_buf = (unsigned char *) sys->alloc(sys, (size_t) self->searchbuf_size);
if (!search_buf) { if (!search_buf) {
self->error = MSPACK_ERR_NOMEMORY; self->error = MSPACK_ERR_NOMEMORY;
return NULL; return NULL;
@ -630,7 +651,7 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf,
unsigned int cablen_u32 = 0, foffset_u32 = 0; unsigned int cablen_u32 = 0, foffset_u32 = 0;
int false_cabs = 0; int false_cabs = 0;
#ifndef LARGEFILE_SUPPORT #if !LARGEFILE_SUPPORT
/* detect 32-bit off_t overflow */ /* detect 32-bit off_t overflow */
if (flen < 0) { if (flen < 0) {
sys->message(fh, largefile_msg); sys->message(fh, largefile_msg);
@ -643,8 +664,8 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf,
/* search length is either the full length of the search buffer, or the /* search length is either the full length of the search buffer, or the
* amount of data remaining to the end of the file, whichever is less. */ * amount of data remaining to the end of the file, whichever is less. */
length = flen - offset; length = flen - offset;
if (length > self->param[MSCABD_PARAM_SEARCHBUF]) { if (length > self->searchbuf_size) {
length = self->param[MSCABD_PARAM_SEARCHBUF]; length = self->searchbuf_size;
} }
/* fill the search buffer with data from disk */ /* fill the search buffer with data from disk */
@ -654,9 +675,8 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf,
/* FAQ avoidance strategy */ /* FAQ avoidance strategy */
if ((offset == 0) && (EndGetI32(&buf[0]) == 0x28635349)) { if ((offset == 0) && (EndGetI32(&buf[0]) == 0x28635349)) {
sys->message(fh, "WARNING; found InstallShield header. " sys->message(fh, "WARNING; found InstallShield header. Use unshield "
"This is probably an InstallShield file. " "(https://github.com/twogood/unshield) to unpack this file");
"Use UNSHIELD from www.synce.org to unpack it.");
} }
/* read through the entire buffer. */ /* read through the entire buffer. */
@ -705,17 +725,18 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf,
/* check that the files offset is less than the alleged length of /* check that the files offset is less than the alleged length of
* the cabinet, and that the offset + the alleged length are * the cabinet, and that the offset + the alleged length are
* 'roughly' within the end of overall file length */ * 'roughly' within the end of overall file length. In salvage
* mode, don't check the alleged length, allow it to be garbage */
if ((foffset_u32 < cablen_u32) && if ((foffset_u32 < cablen_u32) &&
((caboff + (off_t) foffset_u32) < (flen + 32)) && ((caboff + (off_t) foffset_u32) < (flen + 32)) &&
((caboff + (off_t) cablen_u32) < (flen + 32)) ) (((caboff + (off_t) cablen_u32) < (flen + 32)) || self->salvage))
{ {
/* likely cabinet found -- try reading it */ /* likely cabinet found -- try reading it */
if (!(cab = (struct mscabd_cabinet_p *) sys->alloc(sys, sizeof(struct mscabd_cabinet_p)))) { if (!(cab = (struct mscabd_cabinet_p *) sys->alloc(sys, sizeof(struct mscabd_cabinet_p)))) {
return MSPACK_ERR_NOMEMORY; return MSPACK_ERR_NOMEMORY;
} }
cab->base.filename = filename; cab->base.filename = filename;
if (cabd_read_headers(sys, fh, cab, caboff, 1)) { if (cabd_read_headers(sys, fh, cab, caboff, self->salvage, 1)) {
/* destroy the failed cabinet */ /* destroy the failed cabinet */
cabd_close((struct mscab_decompressor *) self, cabd_close((struct mscab_decompressor *) self,
(struct mscabd_cabinet *) cab); (struct mscabd_cabinet *) cab);
@ -732,7 +753,7 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf,
/* cause the search to restart after this cab's data. */ /* cause the search to restart after this cab's data. */
offset = caboff + (off_t) cablen_u32; offset = caboff + (off_t) cablen_u32;
#ifndef LARGEFILE_SUPPORT #if !LARGEFILE_SUPPORT
/* detect 32-bit off_t overflow */ /* detect 32-bit off_t overflow */
if (offset < caboff) { if (offset < caboff) {
sys->message(fh, largefile_msg); sys->message(fh, largefile_msg);
@ -940,6 +961,12 @@ static int cabd_can_merge_folders(struct mspack_system *sys,
return 0; return 0;
} }
/* check there are not too many data blocks after merging */
if ((lfol->base.num_blocks + rfol->base.num_blocks) > CAB_FOLDERMAX) {
D(("folder merge: too many data blocks in merged folders"))
return 0;
}
if (!(lfi = lfol->merge_next) || !(rfi = rfol->merge_prev)) { if (!(lfi = lfol->merge_next) || !(rfi = rfol->merge_prev)) {
D(("folder merge: one cabinet has no files to merge")) D(("folder merge: one cabinet has no files to merge"))
return 0; return 0;
@ -985,6 +1012,7 @@ static int cabd_extract(struct mscab_decompressor *base,
struct mscabd_folder_p *fol; struct mscabd_folder_p *fol;
struct mspack_system *sys; struct mspack_system *sys;
struct mspack_file *fh; struct mspack_file *fh;
off_t filelen;
if (!self) return MSPACK_ERR_ARGS; if (!self) return MSPACK_ERR_ARGS;
if (!file) return self->error = MSPACK_ERR_ARGS; if (!file) return self->error = MSPACK_ERR_ARGS;
@ -992,15 +1020,43 @@ static int cabd_extract(struct mscab_decompressor *base,
sys = self->system; sys = self->system;
fol = (struct mscabd_folder_p *) file->folder; fol = (struct mscabd_folder_p *) file->folder;
/* check if file can be extracted */ /* if offset is beyond 2GB, nothing can be extracted */
if ((!fol) || (fol->merge_prev) || if (file->offset > CAB_LENGTHMAX) {
(((file->offset + file->length) / CAB_BLOCKMAX) > fol->base.num_blocks))
{
sys->message(NULL, "ERROR; file \"%s\" cannot be extracted, "
"cabinet set is incomplete.", file->filename);
return self->error = MSPACK_ERR_DATAFORMAT; return self->error = MSPACK_ERR_DATAFORMAT;
} }
/* if file claims to go beyond 2GB either error out,
* or in salvage mode reduce file length so it fits 2GB limit
*/
filelen = file->length;
if (filelen > CAB_LENGTHMAX || (file->offset + filelen) > CAB_LENGTHMAX) {
if (self->salvage) {
filelen = CAB_LENGTHMAX - file->offset;
}
else {
return self->error = MSPACK_ERR_DATAFORMAT;
}
}
/* extraction impossible if no folder, or folder needs predecessor */
if (!fol || fol->merge_prev) {
sys->message(NULL, "ERROR; file \"%s\" cannot be extracted, "
"cabinet set is incomplete", file->filename);
return self->error = MSPACK_ERR_DECRUNCH;
}
/* if file goes beyond what can be decoded, given an error.
* In salvage mode, don't assume block sizes, just try decoding
*/
if (!self->salvage) {
off_t maxlen = fol->base.num_blocks * CAB_BLOCKMAX;
if ((file->offset + filelen) > maxlen) {
sys->message(NULL, "ERROR; file \"%s\" cannot be extracted, "
"cabinet set is incomplete", file->filename);
return self->error = MSPACK_ERR_DECRUNCH;
}
}
/* allocate generic decompression state */ /* allocate generic decompression state */
if (!self->d) { if (!self->d) {
self->d = (struct mscabd_decompress_state *) sys->alloc(sys, sizeof(struct mscabd_decompress_state)); self->d = (struct mscabd_decompress_state *) sys->alloc(sys, sizeof(struct mscabd_decompress_state));
@ -1016,7 +1072,12 @@ static int cabd_extract(struct mscab_decompressor *base,
} }
/* do we need to change folder or reset the current folder? */ /* do we need to change folder or reset the current folder? */
if ((self->d->folder != fol) || (self->d->offset > file->offset)) { if ((self->d->folder != fol) || (self->d->offset > file->offset) ||
!self->d->state)
{
/* free any existing decompressor */
cabd_free_decomp(self);
/* do we need to open a new cab file? */ /* do we need to open a new cab file? */
if (!self->d->infh || (fol->data.cab != self->d->incab)) { if (!self->d->infh || (fol->data.cab != self->d->incab)) {
/* close previous file handle if from a different cab */ /* close previous file handle if from a different cab */
@ -1041,6 +1102,7 @@ static int cabd_extract(struct mscab_decompressor *base,
self->d->data = &fol->data; self->d->data = &fol->data;
self->d->offset = 0; self->d->offset = 0;
self->d->block = 0; self->d->block = 0;
self->d->outlen = 0;
self->d->i_ptr = self->d->i_end = &self->d->input[0]; self->d->i_ptr = self->d->i_end = &self->d->input[0];
/* read_error lasts for the lifetime of a decompressor */ /* read_error lasts for the lifetime of a decompressor */
@ -1055,7 +1117,7 @@ static int cabd_extract(struct mscab_decompressor *base,
self->error = MSPACK_ERR_OK; self->error = MSPACK_ERR_OK;
/* if file has more than 0 bytes */ /* if file has more than 0 bytes */
if (file->length) { if (filelen) {
off_t bytes; off_t bytes;
int error; int error;
/* get to correct offset. /* get to correct offset.
@ -1072,7 +1134,7 @@ static int cabd_extract(struct mscab_decompressor *base,
/* if getting to the correct offset was error free, unpack file */ /* if getting to the correct offset was error free, unpack file */
if (!self->error) { if (!self->error) {
self->d->outfh = fh; self->d->outfh = fh;
error = self->d->decompress(self->d->state, (off_t) file->length); error = self->d->decompress(self->d->state, filelen);
self->error = (error == MSPACK_ERR_READ) ? self->read_error : error; self->error = (error == MSPACK_ERR_READ) ? self->read_error : error;
} }
} }
@ -1098,34 +1160,27 @@ static int cabd_init_decomp(struct mscab_decompressor_p *self, unsigned int ct)
{ {
struct mspack_file *fh = (struct mspack_file *) self; struct mspack_file *fh = (struct mspack_file *) self;
assert(self && self->d);
/* free any existing decompressor */
cabd_free_decomp(self);
self->d->comp_type = ct; self->d->comp_type = ct;
switch (ct & cffoldCOMPTYPE_MASK) { switch (ct & cffoldCOMPTYPE_MASK) {
case cffoldCOMPTYPE_NONE: case cffoldCOMPTYPE_NONE:
self->d->decompress = (int (*)(void *, off_t)) &noned_decompress; self->d->decompress = (int (*)(void *, off_t)) &noned_decompress;
self->d->state = noned_init(&self->d->sys, fh, fh, self->d->state = noned_init(&self->d->sys, fh, fh, self->buf_size);
self->param[MSCABD_PARAM_DECOMPBUF]);
break; break;
case cffoldCOMPTYPE_MSZIP: case cffoldCOMPTYPE_MSZIP:
self->d->decompress = (int (*)(void *, off_t)) &mszipd_decompress; self->d->decompress = (int (*)(void *, off_t)) &mszipd_decompress;
self->d->state = mszipd_init(&self->d->sys, fh, fh, self->d->state = mszipd_init(&self->d->sys, fh, fh, self->buf_size,
self->param[MSCABD_PARAM_DECOMPBUF], self->fix_mszip);
self->param[MSCABD_PARAM_FIXMSZIP]);
break; break;
case cffoldCOMPTYPE_QUANTUM: case cffoldCOMPTYPE_QUANTUM:
self->d->decompress = (int (*)(void *, off_t)) &qtmd_decompress; self->d->decompress = (int (*)(void *, off_t)) &qtmd_decompress;
self->d->state = qtmd_init(&self->d->sys, fh, fh, (int) (ct >> 8) & 0x1f, self->d->state = qtmd_init(&self->d->sys, fh, fh, (int) (ct >> 8) & 0x1f,
self->param[MSCABD_PARAM_DECOMPBUF]); self->buf_size);
break; break;
case cffoldCOMPTYPE_LZX: case cffoldCOMPTYPE_LZX:
self->d->decompress = (int (*)(void *, off_t)) &lzxd_decompress; self->d->decompress = (int (*)(void *, off_t)) &lzxd_decompress;
self->d->state = lzxd_init(&self->d->sys, fh, fh, (int) (ct >> 8) & 0x1f, 0, self->d->state = lzxd_init(&self->d->sys, fh, fh, (int) (ct >> 8) & 0x1f, 0,
self->param[MSCABD_PARAM_DECOMPBUF], (off_t) 0); self->buf_size, (off_t)0,0);
break; break;
default: default:
return self->error = MSPACK_ERR_DATAFORMAT; return self->error = MSPACK_ERR_DATAFORMAT;
@ -1134,7 +1189,7 @@ static int cabd_init_decomp(struct mscab_decompressor_p *self, unsigned int ct)
} }
static void cabd_free_decomp(struct mscab_decompressor_p *self) { static void cabd_free_decomp(struct mscab_decompressor_p *self) {
if (!self || !self->d || !self->d->folder || !self->d->state) return; if (!self || !self->d || !self->d->state) return;
switch (self->d->comp_type & cffoldCOMPTYPE_MASK) { switch (self->d->comp_type & cffoldCOMPTYPE_MASK) {
case cffoldCOMPTYPE_NONE: noned_free((struct noned_state *) self->d->state); break; case cffoldCOMPTYPE_NONE: noned_free((struct noned_state *) self->d->state); break;
@ -1162,10 +1217,12 @@ static int cabd_sys_read(struct mspack_file *file, void *buffer, int bytes) {
struct mscab_decompressor_p *self = (struct mscab_decompressor_p *) file; struct mscab_decompressor_p *self = (struct mscab_decompressor_p *) file;
unsigned char *buf = (unsigned char *) buffer; unsigned char *buf = (unsigned char *) buffer;
struct mspack_system *sys = self->system; struct mspack_system *sys = self->system;
int avail, todo, outlen, ignore_cksum; int avail, todo, outlen, ignore_cksum, ignore_blocksize;
ignore_cksum = self->param[MSCABD_PARAM_FIXMSZIP] && ignore_cksum = self->salvage ||
((self->d->comp_type & cffoldCOMPTYPE_MASK) == cffoldCOMPTYPE_MSZIP); (self->fix_mszip &&
((self->d->comp_type & cffoldCOMPTYPE_MASK) == cffoldCOMPTYPE_MSZIP));
ignore_blocksize = self->salvage;
todo = bytes; todo = bytes;
while (todo > 0) { while (todo > 0) {
@ -1185,13 +1242,20 @@ static int cabd_sys_read(struct mspack_file *file, void *buffer, int bytes) {
/* check if we're out of input blocks, advance block counter */ /* check if we're out of input blocks, advance block counter */
if (self->d->block++ >= self->d->folder->base.num_blocks) { if (self->d->block++ >= self->d->folder->base.num_blocks) {
if (!self->salvage) {
self->read_error = MSPACK_ERR_DATAFORMAT; self->read_error = MSPACK_ERR_DATAFORMAT;
}
else {
D(("Ran out of CAB input blocks prematurely"))
}
break; break;
} }
/* read a block */ /* read a block */
self->read_error = cabd_sys_read_block(sys, self->d, &outlen, ignore_cksum); self->read_error = cabd_sys_read_block(sys, self->d, &outlen,
ignore_cksum, ignore_blocksize);
if (self->read_error) return -1; if (self->read_error) return -1;
self->d->outlen += outlen;
/* special Quantum hack -- trailer byte to allow the decompressor /* special Quantum hack -- trailer byte to allow the decompressor
* to realign itself. CAB Quantum blocks, unlike LZX blocks, can have * to realign itself. CAB Quantum blocks, unlike LZX blocks, can have
@ -1202,19 +1266,10 @@ static int cabd_sys_read(struct mspack_file *file, void *buffer, int bytes) {
/* is this the last block? */ /* is this the last block? */
if (self->d->block >= self->d->folder->base.num_blocks) { if (self->d->block >= self->d->folder->base.num_blocks) {
/* last block */
if ((self->d->comp_type & cffoldCOMPTYPE_MASK) == cffoldCOMPTYPE_LZX) { if ((self->d->comp_type & cffoldCOMPTYPE_MASK) == cffoldCOMPTYPE_LZX) {
/* special LZX hack -- on the last block, inform LZX of the /* special LZX hack -- on the last block, inform LZX of the
* size of the output data stream. */ * size of the output data stream. */
lzxd_set_output_length((struct lzxd_stream *) self->d->state, (off_t) lzxd_set_output_length((struct lzxd_stream *) self->d->state, self->d->outlen);
((self->d->block-1) * CAB_BLOCKMAX + outlen));
}
}
else {
/* not the last block */
if (outlen != CAB_BLOCKMAX) {
self->system->message(self->d->infh,
"WARNING; non-maximal data block");
} }
} }
} /* if (avail) */ } /* if (avail) */
@ -1239,11 +1294,12 @@ static int cabd_sys_write(struct mspack_file *file, void *buffer, int bytes) {
*/ */
static int cabd_sys_read_block(struct mspack_system *sys, static int cabd_sys_read_block(struct mspack_system *sys,
struct mscabd_decompress_state *d, struct mscabd_decompress_state *d,
int *out, int ignore_cksum) int *out, int ignore_cksum,
int ignore_blocksize)
{ {
unsigned char hdr[cfdata_SIZEOF]; unsigned char hdr[cfdata_SIZEOF];
unsigned int cksum; unsigned int cksum;
int len; int len, full_len;
/* reset the input block pointer and end of block pointer */ /* reset the input block pointer and end of block pointer */
d->i_ptr = d->i_end = &d->input[0]; d->i_ptr = d->i_end = &d->input[0];
@ -1264,15 +1320,19 @@ static int cabd_sys_read_block(struct mspack_system *sys,
/* blocks must not be over CAB_INPUTMAX in size */ /* blocks must not be over CAB_INPUTMAX in size */
len = EndGetI16(&hdr[cfdata_CompressedSize]); len = EndGetI16(&hdr[cfdata_CompressedSize]);
if (((d->i_end - d->i_ptr) + len) > CAB_INPUTMAX) { full_len = (d->i_end - d->i_ptr) + len; /* include cab-spanning blocks */
D(("block size > CAB_INPUTMAX (%ld + %d)", d->i_end - d->i_ptr, len)) if (full_len > CAB_INPUTMAX) {
D(("block size %d > CAB_INPUTMAX", full_len));
/* in salvage mode, blocks can be 65535 bytes but no more than that */
if (!ignore_blocksize || full_len > CAB_INPUTMAX_SALVAGE) {
return MSPACK_ERR_DATAFORMAT; return MSPACK_ERR_DATAFORMAT;
} }
}
/* blocks must not expand to more than CAB_BLOCKMAX */ /* blocks must not expand to more than CAB_BLOCKMAX */
if (EndGetI16(&hdr[cfdata_UncompressedSize]) > CAB_BLOCKMAX) { if (EndGetI16(&hdr[cfdata_UncompressedSize]) > CAB_BLOCKMAX) {
D(("block size > CAB_BLOCKMAX")) D(("block size > CAB_BLOCKMAX"))
return MSPACK_ERR_DATAFORMAT; if (!ignore_blocksize) return MSPACK_ERR_DATAFORMAT;
} }
/* read the block data */ /* read the block data */
@ -1310,7 +1370,7 @@ static int cabd_sys_read_block(struct mspack_system *sys,
/* advance to next member in the cabinet set */ /* advance to next member in the cabinet set */
if (!(d->data = d->data->next)) { if (!(d->data = d->data->next)) {
D(("ran out of splits in cabinet set")) sys->message(d->infh, "WARNING; ran out of cabinets in set. Are any missing?");
return MSPACK_ERR_DATAFORMAT; return MSPACK_ERR_DATAFORMAT;
} }
@ -1342,8 +1402,8 @@ static unsigned int cabd_checksum(unsigned char *data, unsigned int bytes,
} }
switch (bytes & 3) { switch (bytes & 3) {
case 3: ul |= *data++ << 16; case 3: ul |= *data++ << 16; /*@fallthrough@*/
case 2: ul |= *data++ << 8; case 2: ul |= *data++ << 8; /*@fallthrough@*/
case 1: ul |= *data; case 1: ul |= *data;
} }
cksum ^= ul; cksum ^= ul;
@ -1419,14 +1479,17 @@ static int cabd_param(struct mscab_decompressor *base, int param, int value) {
switch (param) { switch (param) {
case MSCABD_PARAM_SEARCHBUF: case MSCABD_PARAM_SEARCHBUF:
if (value < 4) return MSPACK_ERR_ARGS; if (value < 4) return MSPACK_ERR_ARGS;
self->param[MSCABD_PARAM_SEARCHBUF] = value; self->searchbuf_size = value;
break; break;
case MSCABD_PARAM_FIXMSZIP: case MSCABD_PARAM_FIXMSZIP:
self->param[MSCABD_PARAM_FIXMSZIP] = value; self->fix_mszip = value;
break; break;
case MSCABD_PARAM_DECOMPBUF: case MSCABD_PARAM_DECOMPBUF:
if (value < 4) return MSPACK_ERR_ARGS; if (value < 4) return MSPACK_ERR_ARGS;
self->param[MSCABD_PARAM_DECOMPBUF] = value; self->buf_size = value;
break;
case MSCABD_PARAM_SALVAGE:
self->salvage = value;
break; break;
default: default:
return MSPACK_ERR_ARGS; return MSPACK_ERR_ARGS;

View file

@ -1,5 +1,5 @@
/* This file is part of libmspack. /* This file is part of libmspack.
* (C) 2003-2011 Stuart Caie. * (C) 2003-2018 Stuart Caie.
* *
* libmspack is free software; you can redistribute it and/or modify it under * libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1 * the terms of the GNU Lesser General Public License (LGPL) version 2.1
@ -44,7 +44,7 @@ static int chmd_init_decomp(
struct mschm_decompressor_p *self, struct mschmd_file *file); struct mschm_decompressor_p *self, struct mschmd_file *file);
static int read_reset_table( static int read_reset_table(
struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
int entry, off_t *length_ptr, off_t *offset_ptr); unsigned int entry, off_t *length_ptr, off_t *offset_ptr);
static int read_spaninfo( static int read_spaninfo(
struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
off_t *length_ptr); off_t *length_ptr);
@ -254,7 +254,7 @@ static const unsigned char guids[32] = {
#define READ_ENCINT(var) do { \ #define READ_ENCINT(var) do { \
(var) = 0; \ (var) = 0; \
do { \ do { \
if (p > end) goto chunk_end; \ if (p >= end) goto chunk_end; \
(var) = ((var) << 7) | (*p & 0x7F); \ (var) = ((var) << 7) | (*p & 0x7F); \
} while (*p++ & 0x80); \ } while (*p++ & 0x80); \
} while (0) } while (0)
@ -292,7 +292,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
} }
/* check both header GUIDs */ /* check both header GUIDs */
if (mspack_memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) { if (memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) {
D(("incorrect GUIDs")) D(("incorrect GUIDs"))
return MSPACK_ERR_SIGNATURE; return MSPACK_ERR_SIGNATURE;
} }
@ -356,8 +356,53 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
chm->sec0.offset = chm->dir_offset + (chm->chunk_size * chm->num_chunks); chm->sec0.offset = chm->dir_offset + (chm->chunk_size * chm->num_chunks);
} }
/* ensure chunk size is large enough for signature and num_entries */ /* check if content offset or file size is wrong */
if (chm->sec0.offset > chm->length) {
D(("content section begins after file has ended"))
return MSPACK_ERR_DATAFORMAT;
}
/* ensure there are chunks and that chunk size is
* large enough for signature and num_entries */
if (chm->chunk_size < (pmgl_Entries + 2)) { if (chm->chunk_size < (pmgl_Entries + 2)) {
D(("chunk size not large enough"))
return MSPACK_ERR_DATAFORMAT;
}
if (chm->num_chunks == 0) {
D(("no chunks"))
return MSPACK_ERR_DATAFORMAT;
}
/* The chunk_cache data structure is not great; large values for num_chunks
* or num_chunks*chunk_size can exhaust all memory. Until a better chunk
* cache is implemented, put arbitrary limits on num_chunks and chunk size.
*/
if (chm->num_chunks > 100000) {
D(("more than 100,000 chunks"))
return MSPACK_ERR_DATAFORMAT;
}
if (chm->chunk_size > 8192) {
D(("chunk size over 8192 (get in touch if this is valid)"))
return MSPACK_ERR_DATAFORMAT;
}
if ((off_t)chm->chunk_size * (off_t)chm->num_chunks > chm->length) {
D(("chunks larger than entire file"))
return MSPACK_ERR_DATAFORMAT;
}
/* common sense checks on header section 1 fields */
if (chm->chunk_size != 4096) {
sys->message(fh, "WARNING; chunk size is not 4096");
}
if (chm->first_pmgl != 0) {
sys->message(fh, "WARNING; first PMGL chunk is not zero");
}
if (chm->first_pmgl > chm->last_pmgl) {
D(("first pmgl chunk is after last pmgl chunk"))
return MSPACK_ERR_DATAFORMAT;
}
if (chm->index_root != 0xFFFFFFFF && chm->index_root >= chm->num_chunks) {
D(("index_root outside valid range"))
return MSPACK_ERR_DATAFORMAT; return MSPACK_ERR_DATAFORMAT;
} }
@ -394,7 +439,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
sys->message(fh, "WARNING; PMGL quickref area is too small"); sys->message(fh, "WARNING; PMGL quickref area is too small");
} }
if (EndGetI32(&chunk[pmgl_QuickRefSize]) > if (EndGetI32(&chunk[pmgl_QuickRefSize]) >
((int)chm->chunk_size - pmgl_Entries)) (chm->chunk_size - pmgl_Entries))
{ {
sys->message(fh, "WARNING; PMGL quickref area is too large"); sys->message(fh, "WARNING; PMGL quickref area is too large");
} }
@ -404,11 +449,16 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
num_entries = EndGetI16(end); num_entries = EndGetI16(end);
while (num_entries--) { while (num_entries--) {
READ_ENCINT(name_len); name = p; p += name_len; READ_ENCINT(name_len);
if (name_len > (unsigned int) (end - p)) goto chunk_end;
name = p; p += name_len;
READ_ENCINT(section); READ_ENCINT(section);
READ_ENCINT(offset); READ_ENCINT(offset);
READ_ENCINT(length); READ_ENCINT(length);
/* ignore blank or one-char (e.g. "/") filenames we'd return as blank */
if (name_len < 2 || !name[0] || !name[1]) continue;
/* empty files and directory names are stored as a file entry at /* empty files and directory names are stored as a file entry at
* offset 0 with length 0. We want to keep empty files, but not * offset 0 with length 0. We want to keep empty files, but not
* directory names, which end with a "/" */ * directory names, which end with a "/" */
@ -437,20 +487,18 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
if (name[0] == ':' && name[1] == ':') { if (name[0] == ':' && name[1] == ':') {
/* system file */ /* system file */
if (mspack_memcmp(&name[2], &content_name[2], 31L) == 0) { if (name_len == 40 && memcmp(name, content_name, 40) == 0) {
if (mspack_memcmp(&name[33], &content_name[33], 8L) == 0) {
chm->sec1.content = fi; chm->sec1.content = fi;
} }
else if (mspack_memcmp(&name[33], &control_name[33], 11L) == 0) { else if (name_len == 44 && memcmp(name, control_name, 44) == 0) {
chm->sec1.control = fi; chm->sec1.control = fi;
} }
else if (mspack_memcmp(&name[33], &spaninfo_name[33], 8L) == 0) { else if (name_len == 41 && memcmp(name, spaninfo_name, 41) == 0) {
chm->sec1.spaninfo = fi; chm->sec1.spaninfo = fi;
} }
else if (mspack_memcmp(&name[33], &rtable_name[33], 72L) == 0) { else if (name_len == 105 && memcmp(name, rtable_name, 105) == 0) {
chm->sec1.rtable = fi; chm->sec1.rtable = fi;
} }
}
fi->next = chm->sysfiles; fi->next = chm->sysfiles;
chm->sysfiles = fi; chm->sysfiles = fi;
} }
@ -481,7 +529,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
* directly from the on-disk index. * directly from the on-disk index.
* *
* TODO: protect against infinite loops in chunks (where pgml_NextChunk * TODO: protect against infinite loops in chunks (where pgml_NextChunk
* or a PGMI index entry point to an already visited chunk) * or a PMGI index entry point to an already visited chunk)
*/ */
static int chmd_fast_find(struct mschm_decompressor *base, static int chmd_fast_find(struct mschm_decompressor *base,
struct mschmd_header *chm, const char *filename, struct mschmd_header *chm, const char *filename,
@ -490,7 +538,10 @@ static int chmd_fast_find(struct mschm_decompressor *base,
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
struct mspack_system *sys; struct mspack_system *sys;
struct mspack_file *fh; struct mspack_file *fh;
const unsigned char *chunk, *p, *end; /* p and end are initialised to prevent MSVC warning about "potentially"
* uninitialised usage. This is provably untrue, but MS won't fix:
* https://developercommunity.visualstudio.com/content/problem/363489/c4701-false-positive-warning.html */
const unsigned char *chunk, *p = NULL, *end = NULL;
int err = MSPACK_ERR_OK, result = -1; int err = MSPACK_ERR_OK, result = -1;
unsigned int n, sec; unsigned int n, sec;
@ -538,6 +589,11 @@ static int chmd_fast_find(struct mschm_decompressor *base,
if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) { if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) {
break; break;
} }
/* stop simple infinite loops: can't visit the same chunk twice */
if (n == EndGetI32(&chunk[pmgl_NextChunk])) {
break;
}
} }
} }
@ -574,7 +630,7 @@ static unsigned char *read_chunk(struct mschm_decompressor_p *self,
unsigned char *buf; unsigned char *buf;
/* check arguments - most are already checked by chmd_fast_find */ /* check arguments - most are already checked by chmd_fast_find */
if (chunk_num > chm->num_chunks) return NULL; if (chunk_num >= chm->num_chunks) return NULL;
/* ensure chunk cache is available */ /* ensure chunk cache is available */
if (!chm->chunk_cache) { if (!chm->chunk_cache) {
@ -640,7 +696,7 @@ static int search_chunk(struct mschmd_header *chm,
{ {
const unsigned char *start, *end, *p; const unsigned char *start, *end, *p;
unsigned int qr_size, num_entries, qr_entries, qr_density, name_len; unsigned int qr_size, num_entries, qr_entries, qr_density, name_len;
unsigned int L, R, M, sec, fname_len, entries_off, is_pmgl; unsigned int L, R, M, fname_len, entries_off, is_pmgl;
int cmp; int cmp;
fname_len = strlen(filename); fname_len = strlen(filename);
@ -700,7 +756,7 @@ static int search_chunk(struct mschmd_header *chm,
/* compare filename with entry QR points to */ /* compare filename with entry QR points to */
p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)];
READ_ENCINT(name_len); READ_ENCINT(name_len);
if (p + name_len > end) goto chunk_end; if (name_len > (unsigned int) (end - p)) goto chunk_end;
cmp = compare(filename, (char *)p, fname_len, name_len); cmp = compare(filename, (char *)p, fname_len, name_len);
if (cmp == 0) break; if (cmp == 0) break;
@ -737,7 +793,7 @@ static int search_chunk(struct mschmd_header *chm,
*result = NULL; *result = NULL;
while (num_entries-- > 0) { while (num_entries-- > 0) {
READ_ENCINT(name_len); READ_ENCINT(name_len);
if (p + name_len > end) goto chunk_end; if (name_len > (unsigned int) (end - p)) goto chunk_end;
cmp = compare(filename, (char *)p, fname_len, name_len); cmp = compare(filename, (char *)p, fname_len, name_len);
p += name_len; p += name_len;
@ -773,66 +829,34 @@ static int search_chunk(struct mschmd_header *chm,
} }
#if HAVE_TOWLOWER #if HAVE_TOWLOWER
# if HAVE_WCTYPE_H
# include <wctype.h> # include <wctype.h>
# endif
# define TOLOWER(x) towlower(x) # define TOLOWER(x) towlower(x)
#elif HAVE_TOLOWER
# if HAVE_CTYPE_H
# include <ctype.h>
# endif
# define TOLOWER(x) tolower(x)
#else #else
# define TOLOWER(x) (((x)<0||(x)>256)?(x):mspack_tolower_map[(x)]) # include <ctype.h>
/* Map of char -> lowercase char for the first 256 chars. Generated with: # define TOLOWER(x) tolower(x)
* LC_CTYPE=en_GB.utf-8 perl -Mlocale -le 'print map{ord(lc chr).","} 0..255'
*/
static const unsigned char mspack_tolower_map[256] = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,
53,54,55,56,57,58,59,60,61,62,63,64,97,98,99,100,101,102,103,104,105,106,
107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,
95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,
115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,
134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,
153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,
172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
191,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,
242,243,244,245,246,215,248,249,250,251,252,253,254,223,224,225,226,227,228,
229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255
};
#endif #endif
/* decodes a UTF-8 character from s[] into c. Will not read past e. */ /* decodes a UTF-8 character from s[] into c. Will not read past e.
* doesn't test that extension bytes are %10xxxxxx.
* allows some overlong encodings.
*/
#define GET_UTF8_CHAR(s, e, c) do { \ #define GET_UTF8_CHAR(s, e, c) do { \
unsigned char x = *s++; \ unsigned char x = *s++; \
if (x < 0x80) c = x; \ if (x < 0x80) c = x; \
else if (x < 0xC0) c = -1; \ else if (x >= 0xC2 && x < 0xE0 && s < e) { \
else if (x < 0xE0) { \ c = (x & 0x1F) << 6 | (*s++ & 0x3F); \
c = (s >= e) ? -1 : ((x & 0x1F) << 6) | (*s++ & 0x3F); \
} \ } \
else if (x < 0xF0) { \ else if (x >= 0xE0 && x < 0xF0 && s+1 < e) { \
c = (s+2 > e) ? -1 : ((x & 0x0F) << 12) | ((s[0] & 0x3F) << 6) \ c = (x & 0x0F) << 12 | (s[0] & 0x3F) << 6 | (s[1] & 0x3F); \
| (s[1] & 0x3F); \
s += 2; \ s += 2; \
} \ } \
else if (x < 0xF8) { \ else if (x >= 0xF0 && x <= 0xF5 && s+2 < e) { \
c = (s+3 > e) ? -1 : ((x & 0x07) << 18) | ((s[0] & 0x3F) << 12) \ c = (x & 0x07) << 18 | (s[0] & 0x3F) << 12 | \
| ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); \ (s[1] & 0x3F) << 6 | (s[2] & 0x3F); \
if (c > 0x10FFFF) c = 0xFFFD; \
s += 3; \ s += 3; \
} \ } \
else if (x < 0xFC) { \ else c = 0xFFFD; \
c = (s+4 > e) ? -1 : ((x & 0x03) << 24) | ((s[0] & 0x3F) << 18) \
| ((s[1] & 0x3F) << 12)|((s[2] & 0x3F) << 6)|(s[3] & 0x3F); \
s += 4; \
} \
else if (x < 0xFE) { \
c = (s+5>e)?-1:((x&1)<<30)|((s[0]&0x3F)<<24)|((s[1]&0x3F)<<18)| \
((s[2] & 0x3F) << 12) | ((s[3] & 0x3F) << 6)|(s[4] & 0x3F); \
s += 5; \
} \
else c = -1; \
} while (0) } while (0)
/* case-insensitively compares two UTF8 encoded strings. String length for /* case-insensitively compares two UTF8 encoded strings. String length for
@ -1077,7 +1101,7 @@ static int chmd_init_decomp(struct mschm_decompressor_p *self,
} }
/* validate reset_interval */ /* validate reset_interval */
if (reset_interval % LZX_FRAME_SIZE) { if (reset_interval == 0 || reset_interval % LZX_FRAME_SIZE) {
D(("bad controldata reset interval")) D(("bad controldata reset interval"))
return self->error = MSPACK_ERR_DATAFORMAT; return self->error = MSPACK_ERR_DATAFORMAT;
} }
@ -1118,7 +1142,7 @@ static int chmd_init_decomp(struct mschm_decompressor_p *self,
self->d->state = lzxd_init(&self->d->sys, self->d->infh, self->d->state = lzxd_init(&self->d->sys, self->d->infh,
(struct mspack_file *) self, window_bits, (struct mspack_file *) self, window_bits,
reset_interval / LZX_FRAME_SIZE, reset_interval / LZX_FRAME_SIZE,
4096, length); 4096, length, 0);
if (!self->d->state) self->error = MSPACK_ERR_NOMEMORY; if (!self->d->state) self->error = MSPACK_ERR_NOMEMORY;
return self->error; return self->error;
} }
@ -1132,11 +1156,12 @@ static int chmd_init_decomp(struct mschm_decompressor_p *self,
*/ */
static int read_reset_table(struct mschm_decompressor_p *self, static int read_reset_table(struct mschm_decompressor_p *self,
struct mschmd_sec_mscompressed *sec, struct mschmd_sec_mscompressed *sec,
int entry, off_t *length_ptr, off_t *offset_ptr) unsigned int entry,
off_t *length_ptr, off_t *offset_ptr)
{ {
struct mspack_system *sys = self->system; struct mspack_system *sys = self->system;
unsigned char *data; unsigned char *data;
int pos, entrysize; unsigned int pos, entrysize;
/* do we have a ResetTable file? */ /* do we have a ResetTable file? */
int err = find_sys_file(self, sec, &sec->rtable, rtable_name); int err = find_sys_file(self, sec, &sec->rtable, rtable_name);
@ -1160,7 +1185,7 @@ static int read_reset_table(struct mschm_decompressor_p *self,
} }
/* get the uncompressed length of the LZX stream */ /* get the uncompressed length of the LZX stream */
if (read_off64(length_ptr, data, sys, self->d->infh)) { if (read_off64(length_ptr, &data[lzxrt_UncompLen], sys, self->d->infh)) {
sys->free(data); sys->free(data);
return 0; return 0;
} }
@ -1170,7 +1195,7 @@ static int read_reset_table(struct mschm_decompressor_p *self,
/* ensure reset table entry for this offset exists */ /* ensure reset table entry for this offset exists */
if (entry < EndGetI32(&data[lzxrt_NumEntries]) && if (entry < EndGetI32(&data[lzxrt_NumEntries]) &&
((pos + entrysize) <= sec->rtable->length)) pos <= (sec->rtable->length - entrysize))
{ {
switch (entrysize) { switch (entrysize) {
case 4: case 4:
@ -1229,9 +1254,15 @@ static int read_spaninfo(struct mschm_decompressor_p *self,
/* get the uncompressed length of the LZX stream */ /* get the uncompressed length of the LZX stream */
err = read_off64(length_ptr, data, sys, self->d->infh); err = read_off64(length_ptr, data, sys, self->d->infh);
sys->free(data); sys->free(data);
return (err) ? MSPACK_ERR_DATAFORMAT : MSPACK_ERR_OK; if (err) return MSPACK_ERR_DATAFORMAT;
if (*length_ptr <= 0) {
D(("output length is invalid"))
return MSPACK_ERR_DATAFORMAT;
}
return MSPACK_ERR_OK;
} }
/*************************************** /***************************************
@ -1333,7 +1364,7 @@ static int chmd_error(struct mschm_decompressor *base) {
static int read_off64(off_t *var, unsigned char *mem, static int read_off64(off_t *var, unsigned char *mem,
struct mspack_system *sys, struct mspack_file *fh) struct mspack_system *sys, struct mspack_file *fh)
{ {
#ifdef LARGEFILE_SUPPORT #if LARGEFILE_SUPPORT
*var = EndGetI64(mem); *var = EndGetI64(mem);
#else #else
*var = EndGetI32(mem); *var = EndGetI32(mem);

View file

@ -1,5 +1,5 @@
/* This file is part of libmspack. /* This file is part of libmspack.
* (C) 2003-2010 Stuart Caie. * (C) 2003-2011 Stuart Caie.
* *
* KWAJ is a format very similar to SZDD. KWAJ method 3 (LZH) was * KWAJ is a format very similar to SZDD. KWAJ method 3 (LZH) was
* written by Jeff Johnson. * written by Jeff Johnson.
@ -14,6 +14,7 @@
#include "system-mspack.h" #include "system-mspack.h"
#include "kwaj.h" #include "kwaj.h"
#include "mszip.h"
/* prototypes */ /* prototypes */
static struct mskwajd_header *kwajd_open( static struct mskwajd_header *kwajd_open(
@ -40,7 +41,7 @@ static void lzh_free(
static int lzh_read_lens( static int lzh_read_lens(
struct kwajd_stream *kwaj, struct kwajd_stream *kwaj,
unsigned int type, unsigned int numsyms, unsigned int type, unsigned int numsyms,
unsigned char *lens, unsigned short *table); unsigned char *lens);
static int lzh_read_input( static int lzh_read_input(
struct kwajd_stream *kwaj); struct kwajd_stream *kwaj);
@ -113,7 +114,7 @@ static struct mskwajd_header *kwajd_open(struct mskwaj_decompressor *base,
if (self->error) { if (self->error) {
if (fh) sys->close(fh); if (fh) sys->close(fh);
if (hdr) sys->free(hdr); sys->free(hdr);
hdr = NULL; hdr = NULL;
} }
@ -197,30 +198,36 @@ static int kwajd_read_headers(struct mspack_system *sys,
/* filename and extension */ /* filename and extension */
if (hdr->headers & (MSKWAJ_HDR_HASFILENAME | MSKWAJ_HDR_HASFILEEXT)) { if (hdr->headers & (MSKWAJ_HDR_HASFILENAME | MSKWAJ_HDR_HASFILEEXT)) {
off_t pos = sys->tell(fh); int len;
char *fn = (char *) sys->alloc(sys, (size_t) 13);
/* allocate memory for maximum length filename */ /* allocate memory for maximum length filename */
if (! fn) return MSPACK_ERR_NOMEMORY; char *fn = (char *) sys->alloc(sys, (size_t) 13);
hdr->filename = fn; if (!(hdr->filename = fn)) return MSPACK_ERR_NOMEMORY;
/* copy filename if present */ /* copy filename if present */
if (hdr->headers & MSKWAJ_HDR_HASFILENAME) { if (hdr->headers & MSKWAJ_HDR_HASFILENAME) {
if (sys->read(fh, &buf[0], 9) != 9) return MSPACK_ERR_READ; /* read and copy up to 9 bytes of a null terminated string */
for (i = 0; i < 9; i++, fn++) if (!(*fn = buf[i])) break; if ((len = sys->read(fh, &buf[0], 9)) < 2) return MSPACK_ERR_READ;
pos += (i < 9) ? i+1 : 9; for (i = 0; i < len; i++) if (!(*fn++ = buf[i])) break;
if (sys->seek(fh, pos, MSPACK_SYS_SEEK_START)) /* if string was 9 bytes with no null terminator, reject it */
if (i == 9 && buf[8] != '\0') return MSPACK_ERR_DATAFORMAT;
/* seek to byte after string ended in file */
if (sys->seek(fh, (off_t)(i + 1 - len), MSPACK_SYS_SEEK_CUR))
return MSPACK_ERR_SEEK; return MSPACK_ERR_SEEK;
fn--; /* remove the null terminator */
} }
/* copy extension if present */ /* copy extension if present */
if (hdr->headers & MSKWAJ_HDR_HASFILEEXT) { if (hdr->headers & MSKWAJ_HDR_HASFILEEXT) {
*fn++ = '.'; *fn++ = '.';
if (sys->read(fh, &buf[0], 4) != 4) return MSPACK_ERR_READ; /* read and copy up to 4 bytes of a null terminated string */
for (i = 0; i < 4; i++, fn++) if (!(*fn = buf[i])) break; if ((len = sys->read(fh, &buf[0], 4)) < 2) return MSPACK_ERR_READ;
pos += (i < 4) ? i+1 : 4; for (i = 0; i < len; i++) if (!(*fn++ = buf[i])) break;
if (sys->seek(fh, pos, MSPACK_SYS_SEEK_START)) /* if string was 4 bytes with no null terminator, reject it */
if (i == 4 && buf[3] != '\0') return MSPACK_ERR_DATAFORMAT;
/* seek to byte after string ended in file */
if (sys->seek(fh, (off_t)(i + 1 - len), MSPACK_SYS_SEEK_CUR))
return MSPACK_ERR_SEEK; return MSPACK_ERR_SEEK;
fn--; /* remove the null terminator */
} }
*fn = '\0'; *fn = '\0';
} }
@ -301,6 +308,11 @@ static int kwajd_extract(struct mskwaj_decompressor *base,
self->error = (lzh) ? lzh_decompress(lzh) : MSPACK_ERR_NOMEMORY; self->error = (lzh) ? lzh_decompress(lzh) : MSPACK_ERR_NOMEMORY;
lzh_free(lzh); lzh_free(lzh);
} }
else if (hdr->comp_type == MSKWAJ_COMP_MSZIP) {
struct mszipd_stream *zip = mszipd_init(sys,fh,outfh,KWAJ_INPUT_SIZE,0);
self->error = (zip) ? mszipd_decompress_kwaj(zip) : MSPACK_ERR_NOMEMORY;
mszipd_free(zip);
}
else { else {
self->error = MSPACK_ERR_DATAFORMAT; self->error = MSPACK_ERR_DATAFORMAT;
} }
@ -361,7 +373,7 @@ static int kwajd_error(struct mskwaj_decompressor *base)
} \ } \
INJECT_BITS(*i_ptr++, 8); \ INJECT_BITS(*i_ptr++, 8); \
} while (0) } while (0)
#include "readbits.h" #include <readbits.h>
/* import huffman-reading macros and code */ /* import huffman-reading macros and code */
#define TABLEBITS(tbl) KWAJ_TABLEBITS #define TABLEBITS(tbl) KWAJ_TABLEBITS
@ -369,7 +381,7 @@ static int kwajd_error(struct mskwaj_decompressor *base)
#define HUFF_TABLE(tbl,idx) lzh->tbl##_table[idx] #define HUFF_TABLE(tbl,idx) lzh->tbl##_table[idx]
#define HUFF_LEN(tbl,idx) lzh->tbl##_len[idx] #define HUFF_LEN(tbl,idx) lzh->tbl##_len[idx]
#define HUFF_ERROR return MSPACK_ERR_DATAFORMAT #define HUFF_ERROR return MSPACK_ERR_DATAFORMAT
#include "readhuff.h" #include <readhuff.h>
/* In the KWAJ LZH format, there is no special 'eof' marker, it just /* In the KWAJ LZH format, there is no special 'eof' marker, it just
* ends. Depending on how many bits are left in the final byte when * ends. Depending on how many bits are left in the final byte when
@ -395,8 +407,7 @@ static int kwajd_error(struct mskwaj_decompressor *base)
#define BUILD_TREE(tbl, type) \ #define BUILD_TREE(tbl, type) \
STORE_BITS; \ STORE_BITS; \
err = lzh_read_lens(lzh, type, MAXSYMBOLS(tbl), \ err = lzh_read_lens(lzh, type, MAXSYMBOLS(tbl), &HUFF_LEN(tbl,0)); \
&HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0)); \
if (err) return err; \ if (err) return err; \
RESTORE_BITS; \ RESTORE_BITS; \
if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \ if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \
@ -488,7 +499,7 @@ static void lzh_free(struct kwajd_stream *lzh)
static int lzh_read_lens(struct kwajd_stream *lzh, static int lzh_read_lens(struct kwajd_stream *lzh,
unsigned int type, unsigned int numsyms, unsigned int type, unsigned int numsyms,
unsigned char *lens, unsigned short *table) unsigned char *lens)
{ {
register unsigned int bit_buffer; register unsigned int bit_buffer;
register int bits_left; register int bits_left;

View file

@ -1,5 +1,5 @@
/* This file is part of libmspack. /* This file is part of libmspack.
* (C) 2003-2004 Stuart Caie. * (C) 2003-2013 Stuart Caie.
* *
* The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
* by Microsoft Corporation. * by Microsoft Corporation.
@ -35,7 +35,7 @@ extern "C" {
/* LZX huffman defines: tweak tablebits as desired */ /* LZX huffman defines: tweak tablebits as desired */
#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS) #define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS)
#define LZX_PRETREE_TABLEBITS (6) #define LZX_PRETREE_TABLEBITS (6)
#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8) #define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 290*8)
#define LZX_MAINTREE_TABLEBITS (12) #define LZX_MAINTREE_TABLEBITS (12)
#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1) #define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1)
#define LZX_LENGTH_TABLEBITS (12) #define LZX_LENGTH_TABLEBITS (12)
@ -55,6 +55,8 @@ struct lzxd_stream {
unsigned char *window; /* decoding window */ unsigned char *window; /* decoding window */
unsigned int window_size; /* window size */ unsigned int window_size; /* window size */
unsigned int ref_data_size; /* LZX DELTA reference data size */
unsigned int num_offsets; /* number of match_offset entries in table */
unsigned int window_posn; /* decompression offset within window */ unsigned int window_posn; /* decompression offset within window */
unsigned int frame_posn; /* current frame offset within in window */ unsigned int frame_posn; /* current frame offset within in window */
unsigned int frame; /* the number of 32kb frames processed */ unsigned int frame; /* the number of 32kb frames processed */
@ -70,8 +72,8 @@ struct lzxd_stream {
unsigned char intel_started; /* has intel E8 decoding started? */ unsigned char intel_started; /* has intel E8 decoding started? */
unsigned char block_type; /* type of the current block */ unsigned char block_type; /* type of the current block */
unsigned char header_read; /* have we started decoding at all yet? */ unsigned char header_read; /* have we started decoding at all yet? */
unsigned char posn_slots; /* how many posn slots in stream? */
unsigned char input_end; /* have we reached the end of input? */ unsigned char input_end; /* have we reached the end of input? */
unsigned char is_delta; /* does stream follow LZX DELTA spec? */
int error; int error;
@ -114,12 +116,14 @@ struct lzxd_stream {
* @param input an input stream with the LZX data. * @param input an input stream with the LZX data.
* @param output an output stream to write the decoded data to. * @param output an output stream to write the decoded data to.
* @param window_bits the size of the decoding window, which must be * @param window_bits the size of the decoding window, which must be
* between 15 and 21 inclusive. * between 15 and 21 inclusive for regular LZX
* data, or between 17 and 25 inclusive for
* LZX DELTA data.
* @param reset_interval the interval at which the LZX bitstream is * @param reset_interval the interval at which the LZX bitstream is
* reset, in multiples of LZX frames (32678 * reset, in multiples of LZX frames (32678
* bytes), e.g. a value of 2 indicates the input * bytes), e.g. a value of 2 indicates the input
* stream resets after every 65536 output bytes. * stream resets after every 65536 output bytes.
* A value of 0 indicates that the bistream never * A value of 0 indicates that the bitstream never
* resets, such as in CAB LZX streams. * resets, such as in CAB LZX streams.
* @param input_buffer_size the number of bytes to use as an input * @param input_buffer_size the number of bytes to use as an input
* bitstream buffer. * bitstream buffer.
@ -135,6 +139,8 @@ struct lzxd_stream {
* lzxd_set_output_length() once it is * lzxd_set_output_length() once it is
* known. If never set, 4 of the final 6 bytes * known. If never set, 4 of the final 6 bytes
* of the output stream may be incorrect. * of the output stream may be incorrect.
* @param is_delta should be zero for all regular LZX data,
* non-zero for LZX DELTA encoded data.
* @return a pointer to an initialised lzxd_stream structure, or NULL if * @return a pointer to an initialised lzxd_stream structure, or NULL if
* there was not enough memory or parameters to the function were wrong. * there was not enough memory or parameters to the function were wrong.
*/ */
@ -144,17 +150,38 @@ extern struct lzxd_stream *lzxd_init(struct mspack_system *system,
int window_bits, int window_bits,
int reset_interval, int reset_interval,
int input_buffer_size, int input_buffer_size,
off_t output_length); off_t output_length,
char is_delta);
/* see description of output_length in lzxd_init() */ /* see description of output_length in lzxd_init() */
extern void lzxd_set_output_length(struct lzxd_stream *lzx, extern void lzxd_set_output_length(struct lzxd_stream *lzx,
off_t output_length); off_t output_length);
/**
* Reads LZX DELTA reference data into the window and allows
* lzxd_decompress() to reference it.
*
* Call this before the first call to lzxd_decompress().
* @param lzx the LZX stream to apply this reference data to
* @param system an mspack_system implementation to use with the
* input param. Only read() will be called.
* @param input an input file handle to read reference data using
* system->read().
* @param length the length of the reference data. Cannot be longer
* than the LZX window size.
* @return an error code, or MSPACK_ERR_OK if successful
*/
extern int lzxd_set_reference_data(struct lzxd_stream *lzx,
struct mspack_system *system,
struct mspack_file *input,
unsigned int length);
/** /**
* Decompresses entire or partial LZX streams. * Decompresses entire or partial LZX streams.
* *
* The number of bytes of data that should be decompressed is given as the * The number of bytes of data that should be decompressed is given as the
* out_bytes parameter. If more bytes are decoded than are needed, they * out_bytes parameter. If more bytes are decoded than are needed, they
* will be kept over for a later invocation. * will be kept over for a later invocation.
* *
* The output bytes will be passed to the system->write() function given in * The output bytes will be passed to the system->write() function given in

View file

@ -1,5 +1,5 @@
/* This file is part of libmspack. /* This file is part of libmspack.
* (C) 2003-2004 Stuart Caie. * (C) 2003-2013 Stuart Caie.
* *
* The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
* by Microsoft Corporation. * by Microsoft Corporation.
@ -70,6 +70,10 @@
* The maximum window size has increased from 2MB to 32MB. This also * The maximum window size has increased from 2MB to 32MB. This also
* increases the maximum number of position slots, etc. * increases the maximum number of position slots, etc.
* *
* If the match length is 257 (the maximum possible), this signals
* a further length decoding step, that allows for matches up to
* 33024 bytes long.
*
* The format now allows for "reference data", supplied by the caller. * The format now allows for "reference data", supplied by the caller.
* If match offsets go further back than the number of bytes * If match offsets go further back than the number of bytes
* decompressed so far, that is them accessing the reference data. * decompressed so far, that is them accessing the reference data.
@ -189,27 +193,70 @@ static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens,
* a small 'position slot' number and a small offset from that slot are * a small 'position slot' number and a small offset from that slot are
* encoded instead of one large offset. * encoded instead of one large offset.
* *
* The number of slots is decided by how many are needed to encode the
* largest offset for a given window size. This is easy when the gap between
* slots is less than 128Kb, it's a linear relationship. But when extra_bits
* reaches its limit of 17 (because LZX can only ensure reading 17 bits of
* data at a time), we can only jump 128Kb at a time and have to start
* using more and more position slots as each window size doubles.
*
* position_base[] is an index to the position slot bases * position_base[] is an index to the position slot bases
* *
* extra_bits[] states how many bits of offset-from-base data is needed. * extra_bits[] states how many bits of offset-from-base data is needed.
* *
* They are generated like so: * They are calculated as follows:
* for (i = 0; i < 4; i++) extra_bits[i] = 0; * extra_bits[i] = 0 where i < 4
* for (i = 4, j = 0; i < 36; i+=2) extra_bits[i] = extra_bits[i+1] = j++; * extra_bits[i] = floor(i/2)-1 where i >= 4 && i < 36
* for (i = 36; i < 51; i++) extra_bits[i] = 17; * extra_bits[i] = 17 where i >= 36
* for (i = 0, j = 0; i < 51; j += 1 << extra_bits[i++]) position_base[i] = j; * position_base[0] = 0
* position_base[i] = position_base[i-1] + (1 << extra_bits[i-1])
*/ */
static const unsigned int position_base[51] = { static const unsigned int position_slots[11] = {
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288,
16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608, 262144,
393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720,
1441792, 1572864, 1703936, 1835008, 1966080, 2097152
}; };
static const unsigned char extra_bits[51] = { static const unsigned char extra_bits[36] = {
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 };
static const unsigned int position_base[290] = {
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512,
768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768,
49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360,
786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
1835008, 1966080, 2097152, 2228224, 2359296, 2490368, 2621440, 2752512,
2883584, 3014656, 3145728, 3276800, 3407872, 3538944, 3670016, 3801088,
3932160, 4063232, 4194304, 4325376, 4456448, 4587520, 4718592, 4849664,
4980736, 5111808, 5242880, 5373952, 5505024, 5636096, 5767168, 5898240,
6029312, 6160384, 6291456, 6422528, 6553600, 6684672, 6815744, 6946816,
7077888, 7208960, 7340032, 7471104, 7602176, 7733248, 7864320, 7995392,
8126464, 8257536, 8388608, 8519680, 8650752, 8781824, 8912896, 9043968,
9175040, 9306112, 9437184, 9568256, 9699328, 9830400, 9961472, 10092544,
10223616, 10354688, 10485760, 10616832, 10747904, 10878976, 11010048,
11141120, 11272192, 11403264, 11534336, 11665408, 11796480, 11927552,
12058624, 12189696, 12320768, 12451840, 12582912, 12713984, 12845056,
12976128, 13107200, 13238272, 13369344, 13500416, 13631488, 13762560,
13893632, 14024704, 14155776, 14286848, 14417920, 14548992, 14680064,
14811136, 14942208, 15073280, 15204352, 15335424, 15466496, 15597568,
15728640, 15859712, 15990784, 16121856, 16252928, 16384000, 16515072,
16646144, 16777216, 16908288, 17039360, 17170432, 17301504, 17432576,
17563648, 17694720, 17825792, 17956864, 18087936, 18219008, 18350080,
18481152, 18612224, 18743296, 18874368, 19005440, 19136512, 19267584,
19398656, 19529728, 19660800, 19791872, 19922944, 20054016, 20185088,
20316160, 20447232, 20578304, 20709376, 20840448, 20971520, 21102592,
21233664, 21364736, 21495808, 21626880, 21757952, 21889024, 22020096,
22151168, 22282240, 22413312, 22544384, 22675456, 22806528, 22937600,
23068672, 23199744, 23330816, 23461888, 23592960, 23724032, 23855104,
23986176, 24117248, 24248320, 24379392, 24510464, 24641536, 24772608,
24903680, 25034752, 25165824, 25296896, 25427968, 25559040, 25690112,
25821184, 25952256, 26083328, 26214400, 26345472, 26476544, 26607616,
26738688, 26869760, 27000832, 27131904, 27262976, 27394048, 27525120,
27656192, 27787264, 27918336, 28049408, 28180480, 28311552, 28442624,
28573696, 28704768, 28835840, 28966912, 29097984, 29229056, 29360128,
29491200, 29622272, 29753344, 29884416, 30015488, 30146560, 30277632,
30408704, 30539776, 30670848, 30801920, 30932992, 31064064, 31195136,
31326208, 31457280, 31588352, 31719424, 31850496, 31981568, 32112640,
32243712, 32374784, 32505856, 32636928, 32768000, 32899072, 33030144,
33161216, 33292288, 33423360
}; };
static void lzxd_reset_state(struct lzxd_stream *lzx) { static void lzxd_reset_state(struct lzxd_stream *lzx) {
@ -235,18 +282,32 @@ struct lzxd_stream *lzxd_init(struct mspack_system *system,
int window_bits, int window_bits,
int reset_interval, int reset_interval,
int input_buffer_size, int input_buffer_size,
off_t output_length) off_t output_length,
char is_delta)
{ {
unsigned int window_size = 1 << window_bits; unsigned int window_size = 1 << window_bits;
struct lzxd_stream *lzx; struct lzxd_stream *lzx;
if (!system) return NULL; if (!system) return NULL;
/* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ /* LZX DELTA window sizes are between 2^17 (128KiB) and 2^25 (32MiB),
* regular LZX windows are between 2^15 (32KiB) and 2^21 (2MiB)
*/
if (is_delta) {
if (window_bits < 17 || window_bits > 25) return NULL;
}
else {
if (window_bits < 15 || window_bits > 21) return NULL; if (window_bits < 15 || window_bits > 21) return NULL;
}
if (reset_interval < 0 || output_length < 0) {
D(("reset interval or output length < 0"))
return NULL;
}
/* round up input buffer size to multiple of two */
input_buffer_size = (input_buffer_size + 1) & -2; input_buffer_size = (input_buffer_size + 1) & -2;
if (!input_buffer_size) return NULL; if (input_buffer_size < 2) return NULL;
/* allocate decompression state */ /* allocate decompression state */
if (!(lzx = (struct lzxd_stream *) system->alloc(system, sizeof(struct lzxd_stream)))) { if (!(lzx = (struct lzxd_stream *) system->alloc(system, sizeof(struct lzxd_stream)))) {
@ -272,6 +333,7 @@ struct lzxd_stream *lzxd_init(struct mspack_system *system,
lzx->inbuf_size = input_buffer_size; lzx->inbuf_size = input_buffer_size;
lzx->window_size = 1 << window_bits; lzx->window_size = 1 << window_bits;
lzx->ref_data_size = 0;
lzx->window_posn = 0; lzx->window_posn = 0;
lzx->frame_posn = 0; lzx->frame_posn = 0;
lzx->frame = 0; lzx->frame = 0;
@ -280,11 +342,8 @@ struct lzxd_stream *lzxd_init(struct mspack_system *system,
lzx->intel_curpos = 0; lzx->intel_curpos = 0;
lzx->intel_started = 0; lzx->intel_started = 0;
lzx->error = MSPACK_ERR_OK; lzx->error = MSPACK_ERR_OK;
lzx->num_offsets = position_slots[window_bits - 15] << 3;
/* window bits: 15 16 17 18 19 20 21 lzx->is_delta = is_delta;
* position slots: 30 32 34 36 38 42 50 */
lzx->posn_slots = ((window_bits == 21) ? 50 :
((window_bits == 20) ? 42 : (window_bits << 1)));
lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0]; lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
lzxd_reset_state(lzx); lzxd_reset_state(lzx);
@ -292,8 +351,44 @@ struct lzxd_stream *lzxd_init(struct mspack_system *system,
return lzx; return lzx;
} }
int lzxd_set_reference_data(struct lzxd_stream *lzx,
struct mspack_system *system,
struct mspack_file *input,
unsigned int length)
{
if (!lzx) return MSPACK_ERR_ARGS;
if (!lzx->is_delta) {
D(("only LZX DELTA streams support reference data"))
return MSPACK_ERR_ARGS;
}
if (lzx->offset) {
D(("too late to set reference data after decoding starts"))
return MSPACK_ERR_ARGS;
}
if (length > lzx->window_size) {
D(("reference length (%u) is longer than the window", length))
return MSPACK_ERR_ARGS;
}
if (length > 0 && (!system || !input)) {
D(("length > 0 but no system or input"))
return MSPACK_ERR_ARGS;
}
lzx->ref_data_size = length;
if (length > 0) {
/* copy reference data */
unsigned char *pos = &lzx->window[lzx->window_size - length];
int bytes = system->read(input, pos, length);
/* length can't be more than 2^25, so no signedness problem */
if (bytes < (int)length) return MSPACK_ERR_READ;
}
lzx->ref_data_size = length;
return MSPACK_ERR_OK;
}
void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) { void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) {
if (lzx) lzx->length = out_bytes; if (lzx && out_bytes > 0) lzx->length = out_bytes;
} }
int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
@ -304,7 +399,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
register unsigned short sym; register unsigned short sym;
int match_length, length_footer, extra, verbatim_bits, bytes_todo; int match_length, length_footer, extra, verbatim_bits, bytes_todo;
int this_run, main_element, aligned_bits, j; int this_run, main_element, aligned_bits, j, warned = 0;
unsigned char *window, *runsrc, *rundest, buf[12]; unsigned char *window, *runsrc, *rundest, buf[12];
unsigned int frame_size=0, end_frame, match_offset, window_posn; unsigned int frame_size=0, end_frame, match_offset, window_posn;
unsigned int R0, R1, R2; unsigned int R0, R1, R2;
@ -340,8 +435,12 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
/* have we reached the reset interval? (if there is one?) */ /* have we reached the reset interval? (if there is one?) */
if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) { if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
if (lzx->block_remaining) { if (lzx->block_remaining) {
/* this is a file format error, we can make a best effort to extract what we can */
D(("%d bytes remaining at reset interval", lzx->block_remaining)) D(("%d bytes remaining at reset interval", lzx->block_remaining))
return lzx->error = MSPACK_ERR_DECRUNCH; if (!warned) {
lzx->sys->message(NULL, "WARNING; invalid reset interval detected during LZX decompression");
warned++;
}
} }
/* re-read the intel header and reset the huffman lengths */ /* re-read the intel header and reset the huffman lengths */
@ -351,6 +450,12 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
R2 = lzx->R2; R2 = lzx->R2;
} }
/* LZX DELTA format has chunk_size, not present in LZX format */
if (lzx->is_delta) {
ENSURE_BITS(16);
REMOVE_BITS(16);
}
/* read header if necessary */ /* read header if necessary */
if (!lzx->header_read) { if (!lzx->header_read) {
/* read 1 bit. if bit=0, intel filesize = 0. /* read 1 bit. if bit=0, intel filesize = 0.
@ -393,11 +498,11 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
/* read lengths of and build aligned huffman decoding tree */ /* read lengths of and build aligned huffman decoding tree */
for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; } for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
BUILD_TABLE(ALIGNED); BUILD_TABLE(ALIGNED);
/* no break -- rest of aligned header is same as verbatim */ /* rest of aligned header is same as verbatim */ /*@fallthrough@*/
case LZX_BLOCKTYPE_VERBATIM: case LZX_BLOCKTYPE_VERBATIM:
/* read lengths of and build main huffman decoding tree */ /* read lengths of and build main huffman decoding tree */
READ_LENGTHS(MAINTREE, 0, 256); READ_LENGTHS(MAINTREE, 0, 256);
READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3)); READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + lzx->num_offsets);
BUILD_TABLE(MAINTREE); BUILD_TABLE(MAINTREE);
/* if the literal 0xE8 is anywhere in the block... */ /* if the literal 0xE8 is anywhere in the block... */
if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1; if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
@ -411,8 +516,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
lzx->intel_started = 1; lzx->intel_started = 1;
/* read 1-16 (not 0-15) bits to align to bytes */ /* read 1-16 (not 0-15) bits to align to bytes */
ENSURE_BITS(16); if (bits_left == 0) ENSURE_BITS(16);
if (bits_left > 16) i_ptr -= 2;
bits_left = 0; bit_buffer = 0; bits_left = 0; bit_buffer = 0;
/* read 12 bytes of stored R0 / R1 / R2 values */ /* read 12 bytes of stored R0 / R1 / R2 values */
@ -473,12 +577,37 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
case 2: match_offset = R2; R2=R0; R0 = match_offset; break; case 2: match_offset = R2; R2=R0; R0 = match_offset; break;
case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break; case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break;
default: default:
extra = extra_bits[match_offset]; extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
READ_BITS(verbatim_bits, extra); READ_BITS(verbatim_bits, extra);
match_offset = position_base[match_offset] - 2 + verbatim_bits; match_offset = position_base[match_offset] - 2 + verbatim_bits;
R2 = R1; R1 = R0; R0 = match_offset; R2 = R1; R1 = R0; R0 = match_offset;
} }
/* LZX DELTA uses max match length to signal even longer match */
if (match_length == LZX_MAX_MATCH && lzx->is_delta) {
int extra_len = 0;
ENSURE_BITS(3); /* 4 entry huffman tree */
if (PEEK_BITS(1) == 0) {
REMOVE_BITS(1); /* '0' -> 8 extra length bits */
READ_BITS(extra_len, 8);
}
else if (PEEK_BITS(2) == 2) {
REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */
READ_BITS(extra_len, 10);
extra_len += 0x100;
}
else if (PEEK_BITS(3) == 6) {
REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */
READ_BITS(extra_len, 12);
extra_len += 0x500;
}
else {
REMOVE_BITS(3); /* '111' -> 15 extra length bits */
READ_BITS(extra_len, 15);
}
match_length += extra_len;
}
if ((window_posn + match_length) > lzx->window_size) { if ((window_posn + match_length) > lzx->window_size) {
D(("match ran over window wrap")) D(("match ran over window wrap"))
return lzx->error = MSPACK_ERR_DECRUNCH; return lzx->error = MSPACK_ERR_DECRUNCH;
@ -489,6 +618,12 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
i = match_length; i = match_length;
/* does match offset wrap the window? */ /* does match offset wrap the window? */
if (match_offset > window_posn) { if (match_offset > window_posn) {
if (match_offset > lzx->offset &&
(match_offset - window_posn) > lzx->ref_data_size)
{
D(("match offset beyond LZX stream"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
/* j = length from match offset to end of window */ /* j = length from match offset to end of window */
j = match_offset - window_posn; j = match_offset - window_posn;
if (j > (int) lzx->window_size) { if (j > (int) lzx->window_size) {
@ -544,7 +679,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
case 1: match_offset = R1; R1 = R0; R0 = match_offset; break; case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
case 2: match_offset = R2; R2 = R0; R0 = match_offset; break; case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
default: default:
extra = extra_bits[match_offset]; extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
match_offset = position_base[match_offset] - 2; match_offset = position_base[match_offset] - 2;
if (extra > 3) { if (extra > 3) {
/* verbatim and aligned bits */ /* verbatim and aligned bits */
@ -572,6 +707,31 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
R2 = R1; R1 = R0; R0 = match_offset; R2 = R1; R1 = R0; R0 = match_offset;
} }
/* LZX DELTA uses max match length to signal even longer match */
if (match_length == LZX_MAX_MATCH && lzx->is_delta) {
int extra_len = 0;
ENSURE_BITS(3); /* 4 entry huffman tree */
if (PEEK_BITS(1) == 0) {
REMOVE_BITS(1); /* '0' -> 8 extra length bits */
READ_BITS(extra_len, 8);
}
else if (PEEK_BITS(2) == 2) {
REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */
READ_BITS(extra_len, 10);
extra_len += 0x100;
}
else if (PEEK_BITS(3) == 6) {
REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */
READ_BITS(extra_len, 12);
extra_len += 0x500;
}
else {
REMOVE_BITS(3); /* '111' -> 15 extra length bits */
READ_BITS(extra_len, 15);
}
match_length += extra_len;
}
if ((window_posn + match_length) > lzx->window_size) { if ((window_posn + match_length) > lzx->window_size) {
D(("match ran over window wrap")) D(("match ran over window wrap"))
return lzx->error = MSPACK_ERR_DECRUNCH; return lzx->error = MSPACK_ERR_DECRUNCH;
@ -582,6 +742,12 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
i = match_length; i = match_length;
/* does match offset wrap the window? */ /* does match offset wrap the window? */
if (match_offset > window_posn) { if (match_offset > window_posn) {
if (match_offset > lzx->offset &&
(match_offset - window_posn) > lzx->ref_data_size)
{
D(("match offset beyond LZX stream"))
return lzx->error = MSPACK_ERR_DECRUNCH;
}
/* j = length from match offset to end of window */ /* j = length from match offset to end of window */
j = match_offset - window_posn; j = match_offset - window_posn;
if (j > (int) lzx->window_size) { if (j > (int) lzx->window_size) {
@ -654,7 +820,8 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
/* check that we've used all of the previous frame first */ /* check that we've used all of the previous frame first */
if (lzx->o_ptr != lzx->o_end) { if (lzx->o_ptr != lzx->o_end) {
D(("%ld avail bytes, new %d frame", lzx->o_end-lzx->o_ptr, frame_size)) D(("%ld avail bytes, new %d frame",
(long)(lzx->o_end - lzx->o_ptr), frame_size))
return lzx->error = MSPACK_ERR_DECRUNCH; return lzx->error = MSPACK_ERR_DECRUNCH;
} }

View file

@ -1,5 +1,5 @@
/* libmspack -- a library for working with Microsoft compression formats. /* libmspack -- a library for working with Microsoft compression formats.
* (C) 2003-2011 Stuart Caie <kyzer@4u.net> * (C) 2003-2019 Stuart Caie <kyzer@cabextract.org.uk>
* *
* libmspack is free software; you can redistribute it and/or modify it under * libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1 * the terms of the GNU Lesser General Public License (LGPL) version 2.1
@ -30,6 +30,7 @@
* - .CAB (MS Cabinet) files, which use deflate, LZX or Quantum compression * - .CAB (MS Cabinet) files, which use deflate, LZX or Quantum compression
* - .CHM (HTML Help) files, which use LZX compression * - .CHM (HTML Help) files, which use LZX compression
* - .LIT (MS EBook) files, which use LZX compression and DES encryption * - .LIT (MS EBook) files, which use LZX compression and DES encryption
* - .LZX (Exchange Offline Addressbook) files, which use LZX compression
* *
* To determine the capabilities of the library, and the binary * To determine the capabilities of the library, and the binary
* compatibility version of any particular compressor or decompressor, use * compatibility version of any particular compressor or decompressor, use
@ -60,6 +61,7 @@
* - mspack_create_hlp_compressor() creates a mshlp_compressor * - mspack_create_hlp_compressor() creates a mshlp_compressor
* - mspack_create_szdd_compressor() creates a msszdd_compressor * - mspack_create_szdd_compressor() creates a msszdd_compressor
* - mspack_create_kwaj_compressor() creates a mskwaj_compressor * - mspack_create_kwaj_compressor() creates a mskwaj_compressor
* - mspack_create_oab_compressor() creates a msoab_compressor
* *
* For decompression: * For decompression:
* - mspack_create_cab_decompressor() creates a mscab_decompressor * - mspack_create_cab_decompressor() creates a mscab_decompressor
@ -68,6 +70,7 @@
* - mspack_create_hlp_decompressor() creates a mshlp_decompressor * - mspack_create_hlp_decompressor() creates a mshlp_decompressor
* - mspack_create_szdd_decompressor() creates a msszdd_decompressor * - mspack_create_szdd_decompressor() creates a msszdd_decompressor
* - mspack_create_kwaj_decompressor() creates a mskwaj_decompressor * - mspack_create_kwaj_decompressor() creates a mskwaj_decompressor
* - mspack_create_oab_decompressor() creates a msoab_decompressor
* *
* Once finished working with a format, each kind of * Once finished working with a format, each kind of
* compressor/decompressor has its own specific destructor: * compressor/decompressor has its own specific destructor:
@ -83,6 +86,8 @@
* - mspack_destroy_szdd_decompressor() * - mspack_destroy_szdd_decompressor()
* - mspack_destroy_kwaj_compressor() * - mspack_destroy_kwaj_compressor()
* - mspack_destroy_kwaj_decompressor() * - mspack_destroy_kwaj_decompressor()
* - mspack_destroy_oab_compressor()
* - mspack_destroy_oab_decompressor()
* *
* Destroying a compressor or decompressor does not destroy any objects, * Destroying a compressor or decompressor does not destroy any objects,
* structures or handles that have been created using that compressor or * structures or handles that have been created using that compressor or
@ -208,6 +213,8 @@ extern int mspack_sys_selftest_internal(int);
* - #MSPACK_VER_MSSZDDC: the msszdd_compressor interface * - #MSPACK_VER_MSSZDDC: the msszdd_compressor interface
* - #MSPACK_VER_MSKWAJD: the mskwaj_decompressor interface * - #MSPACK_VER_MSKWAJD: the mskwaj_decompressor interface
* - #MSPACK_VER_MSKWAJC: the mskwaj_compressor interface * - #MSPACK_VER_MSKWAJC: the mskwaj_compressor interface
* - #MSPACK_VER_MSOABD: the msoab_decompressor interface
* - #MSPACK_VER_MSOABC: the msoab_compressor interface
* *
* The result of the function should be interpreted as follows: * The result of the function should be interpreted as follows:
* - -1: this interface is completely unknown to the library * - -1: this interface is completely unknown to the library
@ -249,6 +256,10 @@ extern int mspack_version(int entity);
#define MSPACK_VER_MSKWAJD (12) #define MSPACK_VER_MSKWAJD (12)
/** Pass to mspack_version() to get the mskwaj_compressor version */ /** Pass to mspack_version() to get the mskwaj_compressor version */
#define MSPACK_VER_MSKWAJC (13) #define MSPACK_VER_MSKWAJC (13)
/** Pass to mspack_version() to get the msoab_decompressor version */
#define MSPACK_VER_MSOABD (14)
/** Pass to mspack_version() to get the msoab_compressor version */
#define MSPACK_VER_MSOABC (15)
/* --- file I/O abstraction ------------------------------------------------ */ /* --- file I/O abstraction ------------------------------------------------ */
@ -317,7 +328,9 @@ struct mspack_system {
* @param bytes the number of bytes to read from the file. * @param bytes the number of bytes to read from the file.
* @return the number of bytes successfully read (this can be less than * @return the number of bytes successfully read (this can be less than
* the number requested), zero to mark the end of file, or less * the number requested), zero to mark the end of file, or less
* than zero to indicate an error. * than zero to indicate an error. The library does not "retry"
* reads and assumes short reads are due to EOF, so you should
* avoid returning short reads because of transient errors.
* @see open(), write() * @see open(), write()
*/ */
int (*read)(struct mspack_file *file, int (*read)(struct mspack_file *file,
@ -411,7 +424,7 @@ struct mspack_system {
/** /**
* Frees memory. * Frees memory.
* *
* @param ptr the memory to be freed. * @param ptr the memory to be freed. NULL is accepted and ignored.
* @see alloc() * @see alloc()
*/ */
void (*free)(void *ptr); void (*free)(void *ptr);
@ -645,6 +658,31 @@ extern void mspack_destroy_kwaj_compressor(struct mskwaj_compressor *self);
extern void mspack_destroy_kwaj_decompressor(struct mskwaj_decompressor *self); extern void mspack_destroy_kwaj_decompressor(struct mskwaj_decompressor *self);
/** Creates a new OAB compressor.
* @param sys a custom mspack_system structure, or NULL to use the default
* @return a #msoab_compressor or NULL
*/
extern struct msoab_compressor *
mspack_create_oab_compressor(struct mspack_system *sys);
/** Creates a new OAB decompressor.
* @param sys a custom mspack_system structure, or NULL to use the default
* @return a #msoab_decompressor or NULL
*/
extern struct msoab_decompressor *
mspack_create_oab_decompressor(struct mspack_system *sys);
/** Destroys an existing OAB compressor.
* @param self the #msoab_compressor to destroy
*/
extern void mspack_destroy_oab_compressor(struct msoab_compressor *self);
/** Destroys an existing OAB decompressor.
* @param self the #msoab_decompressor to destroy
*/
extern void mspack_destroy_oab_decompressor(struct msoab_decompressor *self);
/* --- support for .CAB (MS Cabinet) file format --------------------------- */ /* --- support for .CAB (MS Cabinet) file format --------------------------- */
/** /**
@ -896,6 +934,13 @@ struct mscabd_file {
#define MSCABD_PARAM_FIXMSZIP (1) #define MSCABD_PARAM_FIXMSZIP (1)
/** mscab_decompressor::set_param() parameter: size of decompression buffer */ /** mscab_decompressor::set_param() parameter: size of decompression buffer */
#define MSCABD_PARAM_DECOMPBUF (2) #define MSCABD_PARAM_DECOMPBUF (2)
/** mscab_decompressor::set_param() parameter: salvage data from bad cabinets?
* If enabled, open() will skip file with bad folder indices or filenames
* rather than reject the whole cabinet, and extract() will limit rather than
* reject files with invalid offsets and lengths, and bad data block checksums
* will be ignored. Available only in CAB decoder version 2 and above.
*/
#define MSCABD_PARAM_SALVAGE (3)
/** TODO */ /** TODO */
struct mscab_compressor { struct mscab_compressor {
@ -1509,7 +1554,7 @@ struct mschm_compressor {
*/ */
int (*set_param)(struct mschm_compressor *self, int (*set_param)(struct mschm_compressor *self,
int param, int param,
unsigned int value); int value);
/** /**
* Returns the error code set by the most recently called method. * Returns the error code set by the most recently called method.
@ -1808,7 +1853,7 @@ struct msszdd_compressor {
*/ */
int (*set_param)(struct msszdd_compressor *self, int (*set_param)(struct msszdd_compressor *self,
int param, int param,
unsigned int value); int value);
/** /**
* Returns the error code set by the most recently called method. * Returns the error code set by the most recently called method.
@ -1937,6 +1982,8 @@ struct msszdd_decompressor {
#define MSKWAJ_COMP_SZDD (2) #define MSKWAJ_COMP_SZDD (2)
/** KWAJ compression type: LZ+Huffman compression */ /** KWAJ compression type: LZ+Huffman compression */
#define MSKWAJ_COMP_LZH (3) #define MSKWAJ_COMP_LZH (3)
/** KWAJ compression type: MSZIP */
#define MSKWAJ_COMP_MSZIP (4)
/** KWAJ optional header flag: decompressed file length is included */ /** KWAJ optional header flag: decompressed file length is included */
#define MSKWAJ_HDR_HASLENGTH (0x01) #define MSKWAJ_HDR_HASLENGTH (0x01)
@ -2044,7 +2091,7 @@ struct mskwaj_compressor {
*/ */
int (*set_param)(struct mskwaj_compressor *self, int (*set_param)(struct mskwaj_compressor *self,
int param, int param,
unsigned int value); int value);
/** /**
@ -2196,6 +2243,141 @@ struct mskwaj_decompressor {
int (*last_error)(struct mskwaj_decompressor *self); int (*last_error)(struct mskwaj_decompressor *self);
}; };
/* --- support for .LZX (Offline Address Book) file format ----------------- */
/**
* A compressor for the Offline Address Book (OAB) format.
*
* All fields are READ ONLY.
*
* @see mspack_create_oab_compressor(), mspack_destroy_oab_compressor()
*/
struct msoab_compressor {
/**
* Compress a full OAB file.
*
* The input file will be read and the compressed contents written to the
* output file.
*
* @param self a self-referential pointer to the msoab_decompressor
* instance being called
* @param input the filename of the input file. This is passed
* directly to mspack_system::open().
* @param output the filename of the output file. This is passed
* directly to mspack_system::open().
* @return an error code, or MSPACK_ERR_OK if successful
*/
int (*compress) (struct msoab_compressor *self,
const char *input,
const char *output);
/**
* Generate a compressed incremental OAB patch file.
*
* The two uncompressed files "input" and "base" will be read, and an
* incremental patch to generate "input" from "base" will be written to
* the output file.
*
* @param self a self-referential pointer to the msoab_compressor
* instance being called
* @param input the filename of the input file containing the new
* version of its contents. This is passed directly
* to mspack_system::open().
* @param base the filename of the original base file containing
* the old version of its contents, against which the
* incremental patch shall generated. This is passed
* directly to mspack_system::open().
* @param output the filename of the output file. This is passed
* directly to mspack_system::open().
* @return an error code, or MSPACK_ERR_OK if successful
*/
int (*compress_incremental) (struct msoab_compressor *self,
const char *input,
const char *base,
const char *output);
};
/**
* A decompressor for .LZX (Offline Address Book) files
*
* All fields are READ ONLY.
*
* @see mspack_create_oab_decompressor(), mspack_destroy_oab_decompressor()
*/
struct msoab_decompressor {
/**
* Decompresses a full Offline Address Book file.
*
* If the input file is a valid compressed Offline Address Book file,
* it will be read and the decompressed contents will be written to
* the output file.
*
* @param self a self-referential pointer to the msoab_decompressor
* instance being called
* @param input the filename of the input file. This is passed
* directly to mspack_system::open().
* @param output the filename of the output file. This is passed
* directly to mspack_system::open().
* @return an error code, or MSPACK_ERR_OK if successful
*/
int (*decompress) (struct msoab_decompressor *self,
const char *input,
const char *output);
/**
* Decompresses an Offline Address Book with an incremental patch file.
*
* This requires both a full UNCOMPRESSED Offline Address Book file to
* act as the "base", and a compressed incremental patch file as input.
* If the input file is valid, it will be decompressed with reference to
* the base file, and the decompressed contents will be written to the
* output file.
*
* There is no way to tell what the right base file is for the given
* incremental patch, but if you get it wrong, this will usually result
* in incorrect data being decompressed, which will then fail a checksum
* test.
*
* @param self a self-referential pointer to the msoab_decompressor
* instance being called
* @param input the filename of the input file. This is passed
* directly to mspack_system::open().
* @param base the filename of the base file to which the
* incremental patch shall be applied. This is passed
* directly to mspack_system::open().
* @param output the filename of the output file. This is passed
* directly to mspack_system::open().
* @return an error code, or MSPACK_ERR_OK if successful
*/
int (*decompress_incremental) (struct msoab_decompressor *self,
const char *input,
const char *base,
const char *output);
/**
* Sets an OAB decompression engine parameter. Available only in OAB
* decompressor version 2 and above.
*
* - #MSOABD_PARAM_DECOMPBUF: How many bytes should be used as an input
* buffer by decompressors? The minimum value is 16. The default value
* is 4096.
*
* @param self a self-referential pointer to the msoab_decompressor
* instance being called
* @param param the parameter to set
* @param value the value to set the parameter to
* @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if there
* is a problem with either parameter or value.
*/
int (*set_param)(struct msoab_decompressor *self,
int param,
int value);
};
/** msoab_decompressor::set_param() parameter: size of decompression buffer */
#define MSOABD_PARAM_DECOMPBUF (0)
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -108,6 +108,11 @@ extern struct mszipd_stream *mszipd_init(struct mspack_system *system,
*/ */
extern int mszipd_decompress(struct mszipd_stream *zip, off_t out_bytes); extern int mszipd_decompress(struct mszipd_stream *zip, off_t out_bytes);
/* decompresses an entire MS-ZIP stream in a KWAJ file. Acts very much
* like mszipd_decompress(), but doesn't take an out_bytes parameter
*/
extern int mszipd_decompress_kwaj(struct mszipd_stream *zip);
/* frees all stream associated with an MS-ZIP data stream /* frees all stream associated with an MS-ZIP data stream
* *
* - calls system->free() using the system pointer given in mszipd_init() * - calls system->free() using the system pointer given in mszipd_init()

View file

@ -264,7 +264,7 @@ static int inflate(struct mszipd_stream *zip) {
length += lit_lengths[code]; length += lit_lengths[code];
READ_HUFFSYM(DISTANCE, code); READ_HUFFSYM(DISTANCE, code);
if (code > 30) return INF_ERR_DISTCODE; if (code >= 30) return INF_ERR_DISTCODE;
READ_BITS_T(distance, dist_extrabits[code]); READ_BITS_T(distance, dist_extrabits[code]);
distance += dist_offsets[code]; distance += dist_offsets[code];
@ -349,8 +349,9 @@ struct mszipd_stream *mszipd_init(struct mspack_system *system,
if (!system) return NULL; if (!system) return NULL;
/* round up input buffer size to multiple of two */
input_buffer_size = (input_buffer_size + 1) & -2; input_buffer_size = (input_buffer_size + 1) & -2;
if (!input_buffer_size) return NULL; if (input_buffer_size < 2) return NULL;
/* allocate decompression state */ /* allocate decompression state */
if (!(zip = (struct mszipd_stream *) system->alloc(system, sizeof(struct mszipd_stream)))) { if (!(zip = (struct mszipd_stream *) system->alloc(system, sizeof(struct mszipd_stream)))) {
@ -465,6 +466,45 @@ int mszipd_decompress(struct mszipd_stream *zip, off_t out_bytes) {
return MSPACK_ERR_OK; return MSPACK_ERR_OK;
} }
int mszipd_decompress_kwaj(struct mszipd_stream *zip) {
/* for the bit buffer */
register unsigned int bit_buffer;
register int bits_left;
unsigned char *i_ptr, *i_end;
int i, error, block_len;
/* unpack blocks until block_len == 0 */
for (;;) {
RESTORE_BITS;
/* align to bytestream, read block_len */
i = bits_left & 7; REMOVE_BITS(i);
READ_BITS(block_len, 8);
READ_BITS(i, 8); block_len |= i << 8;
if (block_len == 0) break;
/* read "CK" header */
READ_BITS(i, 8); if (i != 'C') return MSPACK_ERR_DATAFORMAT;
READ_BITS(i, 8); if (i != 'K') return MSPACK_ERR_DATAFORMAT;
/* inflate block */
zip->window_posn = 0;
zip->bytes_output = 0;
STORE_BITS;
if ((error = inflate(zip))) {
D(("inflate error %d", error))
return zip->error = (error > 0) ? error : MSPACK_ERR_DECRUNCH;
}
/* write inflated block */
if (zip->sys->write(zip->output, &zip->window[0], zip->bytes_output)
!= zip->bytes_output) return zip->error = MSPACK_ERR_WRITE;
}
return MSPACK_ERR_OK;
}
void mszipd_free(struct mszipd_stream *zip) { void mszipd_free(struct mszipd_stream *zip) {
struct mspack_system *sys; struct mspack_system *sys;
if (zip) { if (zip) {

View file

@ -197,6 +197,7 @@ struct qtmd_stream *qtmd_init(struct mspack_system *system,
/* Quantum supports window sizes of 2^10 (1Kb) through 2^21 (2Mb) */ /* Quantum supports window sizes of 2^10 (1Kb) through 2^21 (2Mb) */
if (window_bits < 10 || window_bits > 21) return NULL; if (window_bits < 10 || window_bits > 21) return NULL;
/* round up input buffer size to multiple of two */
input_buffer_size = (input_buffer_size + 1) & -2; input_buffer_size = (input_buffer_size + 1) & -2;
if (input_buffer_size < 2) return NULL; if (input_buffer_size < 2) return NULL;

View file

@ -1,5 +1,5 @@
/* This file is part of libmspack. /* This file is part of libmspack.
* (C) 2003-2010 Stuart Caie. * (C) 2003-2014 Stuart Caie.
* *
* libmspack is free software; you can redistribute it and/or modify it under * libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1 * the terms of the GNU Lesser General Public License (LGPL) version 2.1
@ -10,8 +10,7 @@
#ifndef MSPACK_READHUFF_H #ifndef MSPACK_READHUFF_H
#define MSPACK_READHUFF_H 1 #define MSPACK_READHUFF_H 1
/* This implements a fast Huffman tree decoding system. /* This implements a fast Huffman tree decoding system. */
*/
#if !(defined(BITS_ORDER_MSB) || defined(BITS_ORDER_LSB)) #if !(defined(BITS_ORDER_MSB) || defined(BITS_ORDER_LSB))
# error "readhuff.h is used in conjunction with readbits.h, include that first" # error "readhuff.h is used in conjunction with readbits.h, include that first"
@ -140,6 +139,7 @@ static int make_decode_table(unsigned int nsyms, unsigned int nbits,
for (bit_num = nbits+1; bit_num <= HUFF_MAXBITS; bit_num++) { for (bit_num = nbits+1; bit_num <= HUFF_MAXBITS; bit_num++) {
for (sym = 0; sym < nsyms; sym++) { for (sym = 0; sym < nsyms; sym++) {
if (length[sym] != bit_num) continue; if (length[sym] != bit_num) continue;
if (pos >= table_mask) return 1; /* table overflow */
#ifdef BITS_ORDER_MSB #ifdef BITS_ORDER_MSB
leaf = pos >> 16; leaf = pos >> 16;
@ -161,8 +161,7 @@ static int make_decode_table(unsigned int nsyms, unsigned int nbits,
if ((pos >> (15-fill)) & 1) leaf++; if ((pos >> (15-fill)) & 1) leaf++;
} }
table[leaf] = sym; table[leaf] = sym;
pos += bit_mask;
if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
} }
bit_mask >>= 1; bit_mask >>= 1;
} }

View file

@ -8,7 +8,7 @@
*/ */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include <config.h> # include "config.h"
#endif #endif
#include "system-mspack.h" #include "system-mspack.h"

View file

@ -16,7 +16,7 @@ extern "C" {
/* ensure config.h is read before mspack.h */ /* ensure config.h is read before mspack.h */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include <config.h> # include "config.h"
#endif #endif
#include "mspack.h" #include "mspack.h"
@ -61,7 +61,7 @@ extern "C" {
(defined(FILESIZEBITS) && FILESIZEBITS >= 64) || \ (defined(FILESIZEBITS) && FILESIZEBITS >= 64) || \
(defined(SIZEOF_OFF_T) && SIZEOF_OFF_T >= 8) || \ (defined(SIZEOF_OFF_T) && SIZEOF_OFF_T >= 8) || \
defined(_LARGEFILE_SOURCE) || defined(_LARGEFILE64_SOURCE)) defined(_LARGEFILE_SOURCE) || defined(_LARGEFILE64_SOURCE))
# define LARGEFILE_SUPPORT # define LARGEFILE_SUPPORT 1
# define LD "lld" # define LD "lld"
# define LU "llu" # define LU "llu"
#else #else

View file

@ -100,7 +100,7 @@ static struct msszddd_header *szddd_open(struct msszdd_decompressor *base,
if (self->error) { if (self->error) {
if (fh) sys->close(fh); if (fh) sys->close(fh);
if (hdr) sys->free(hdr); sys->free(hdr);
hdr = NULL; hdr = NULL;
} }
@ -150,7 +150,7 @@ static int szddd_read_headers(struct mspack_system *sys,
/* read and check signature */ /* read and check signature */
if (sys->read(fh, buf, 8) != 8) return MSPACK_ERR_READ; if (sys->read(fh, buf, 8) != 8) return MSPACK_ERR_READ;
if ((mspack_memcmp(buf, szdd_signature_expand, 8) == 0)) { if ((memcmp(buf, szdd_signature_expand, 8) == 0)) {
/* common SZDD */ /* common SZDD */
hdr->format = MSSZDD_FMT_NORMAL; hdr->format = MSSZDD_FMT_NORMAL;
@ -160,7 +160,7 @@ static int szddd_read_headers(struct mspack_system *sys,
hdr->missing_char = buf[1]; hdr->missing_char = buf[1];
hdr->length = EndGetI32(&buf[2]); hdr->length = EndGetI32(&buf[2]);
} }
else if ((mspack_memcmp(buf, szdd_signature_qbasic, 8) == 0)) { else if ((memcmp(buf, szdd_signature_qbasic, 8) == 0)) {
/* special QBasic SZDD */ /* special QBasic SZDD */
hdr->format = MSSZDD_FMT_QBASIC; hdr->format = MSSZDD_FMT_QBASIC;
if (sys->read(fh, buf, 4) != 4) return MSPACK_ERR_READ; if (sys->read(fh, buf, 4) != 4) return MSPACK_ERR_READ;