Commit 95278e4 for zlib
commit 95278e4ef9de87294dea0c94184bd8fe4316c3a9
Author: Mark Adler <madler@alumni.caltech.edu>
Date: Wed Apr 9 23:35:16 2025 -0700
Improve the discrimination between trailing garbage and bad gzip.
This proceeds to try to decode whatever follows the last gzip
member, and concludes that it is acceptable trailing garbage only
if it results in a data error without decompressing any data. This
commit also reduces the probability of a false-positive gzip header
detection.
diff --git a/gzguts.h b/gzguts.h
index 69c77eb..687f2ff 100644
--- a/gzguts.h
+++ b/gzguts.h
@@ -183,6 +183,7 @@ typedef struct {
unsigned char *out; /* output buffer (double-sized when reading) */
int direct; /* 0 if processing gzip, 1 if transparent */
/* just for reading */
+ int junk; /* -1 = start, 1 = junk candidate, 0 = in gzip */
int how; /* 0: get header, 1: copy, 2: decompress */
z_off64_t start; /* where the gzip data started, for rewinding */
int eof; /* true if end of input file reached */
diff --git a/gzlib.c b/gzlib.c
index 4c1aa83..79a7e97 100644
--- a/gzlib.c
+++ b/gzlib.c
@@ -72,6 +72,7 @@ local void gz_reset(gz_statep state) {
state->eof = 0; /* not at end of file */
state->past = 0; /* have not read past end yet */
state->how = LOOK; /* look for gzip header */
+ state->junk = -1; /* mark first member */
}
else /* for writing ... */
state->reset = 0; /* no deflateReset pending */
diff --git a/gzread.c b/gzread.c
index 6fefe89..ac8be77 100644
--- a/gzread.c
+++ b/gzread.c
@@ -106,47 +106,42 @@ local int gz_look(gz_statep state) {
}
}
- /* if transparent reading is disabled, simply read as gzip */
- if (state->direct == -1) {
+ /* if transparent reading is disabled, which would only be at the start, or
+ if we're looking for a gzip member after the first one, which is not at
+ the start, then proceed directly to look for a gzip member next */
+ if (state->direct == -1 || state->junk == 0) {
inflateReset(strm);
state->how = GZIP;
+ state->junk = state->junk != -1;
state->direct = 0;
return 0;
}
- /* get at least the magic bytes in the input buffer */
- if (strm->avail_in < 2) {
- if (gz_avail(state) == -1)
- return -1;
- if (strm->avail_in == 0)
- return 0;
- }
+ /* otherwise we're at the start with auto-detect -- we check to see if the
+ first four bytes could be gzip header in order to decide whether or not
+ this will be a transparent read */
+
+ /* load any header bytes into the input buffer -- if the input is empty,
+ then it's not an error as this is a transparent read of zero bytes */
+ if (gz_avail(state) == -1)
+ return -1;
+ if (strm->avail_in == 0)
+ return 0;
- /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
- a logical dilemma here when considering the case of a partially written
- gzip file, to wit, if a single 31 byte is written, then we cannot tell
- whether this is a single-byte file, or just a partially written gzip
- file -- for here we assume that if a gzip file is being written, then
- the header will be written in a single operation, so that reading a
- single byte is sufficient indication that it is not a gzip file) */
- if (strm->avail_in > 1 &&
- strm->next_in[0] == 31 && strm->next_in[1] == 139) {
+ /* see if this is (likely) gzip input -- if the first four bytes are
+ consistent with a gzip header, then go look for the first gzip member,
+ otherwise proceed to copy the input transparently */
+ if (strm->avail_in > 3 &&
+ strm->next_in[0] == 31 && strm->next_in[1] == 139 &&
+ strm->next_in[2] == 8 && strm->next_in[3] < 32) {
inflateReset(strm);
state->how = GZIP;
+ state->junk = 1;
state->direct = 0;
return 0;
}
- /* no gzip header -- if we were decoding gzip before, then this is trailing
- garbage. Ignore the trailing garbage and finish. */
- if (state->direct == 0) {
- strm->avail_in = 0;
- state->eof = 1;
- state->x.have = 0;
- return 0;
- }
-
- /* doing raw i/o, copy any leftover input to output -- this assumes that
+ /* doing raw i/o: copy any leftover input to output -- this assumes that
the output buffer is larger than the input buffer, which also assures
space for gzungetc() */
state->x.next = state->out;
@@ -154,7 +149,6 @@ local int gz_look(gz_statep state) {
state->x.have = strm->avail_in;
strm->avail_in = 0;
state->how = COPY;
- state->direct = 1;
return 0;
}
@@ -181,6 +175,9 @@ local int gz_decomp(gz_statep state) {
/* decompress and handle errors */
ret = inflate(strm, Z_NO_FLUSH);
+ if (strm->avail_out < had)
+ /* any decompressed data marks this as a real gzip stream */
+ state->junk = 0;
if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
gz_error(state, Z_STREAM_ERROR,
"internal error: inflate stream corrupt");
@@ -191,6 +188,12 @@ local int gz_decomp(gz_statep state) {
return -1;
}
if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
+ if (state->junk == 1) { /* trailing garbage is ok */
+ strm->avail_in = 0;
+ state->eof = 1;
+ state->how = LOOK;
+ break;
+ }
gz_error(state, Z_DATA_ERROR,
strm->msg == NULL ? "compressed data error" : strm->msg);
return -1;
@@ -202,8 +205,10 @@ local int gz_decomp(gz_statep state) {
state->x.next = strm->next_out - state->x.have;
/* if the gzip stream completed successfully, look for another */
- if (ret == Z_STREAM_END)
+ if (ret == Z_STREAM_END) {
+ state->junk = 0;
state->how = LOOK;
+ }
/* good decompression */
return 0;