aboutsummaryrefslogtreecommitdiff
path: root/src/libutil/compression.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/libutil/compression.cc')
-rw-r--r--src/libutil/compression.cc557
1 files changed, 238 insertions, 319 deletions
diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc
index 5de85ede1..53b62f62a 100644
--- a/src/libutil/compression.cc
+++ b/src/libutil/compression.cc
@@ -17,228 +17,258 @@ namespace nix {
static const size_t bufSize = 32 * 1024;
-static void decompressNone(Source & source, Sink & sink)
+// Don't feed brotli too much at once.
+struct ChunkedCompressionSink : CompressionSink
{
- std::vector<unsigned char> buf(bufSize);
- while (true) {
- size_t n;
- try {
- n = source.read(buf.data(), buf.size());
- } catch (EndOfFile &) {
- break;
+ uint8_t outbuf[BUFSIZ];
+
+ void write(const unsigned char * data, size_t len) override
+ {
+ const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
+ while (len) {
+ size_t n = std::min(CHUNK_SIZE, len);
+ writeInternal(data, n);
+ data += n;
+ len -= n;
}
- sink(buf.data(), n);
}
-}
-static void decompressXZ(Source & source, Sink & sink)
+ virtual void writeInternal(const unsigned char * data, size_t len) = 0;
+};
+
+struct NoneSink : CompressionSink
{
- lzma_stream strm(LZMA_STREAM_INIT);
-
- lzma_ret ret = lzma_stream_decoder(
- &strm, UINT64_MAX, LZMA_CONCATENATED);
- if (ret != LZMA_OK)
- throw CompressionError("unable to initialise lzma decoder");
-
- Finally free([&]() { lzma_end(&strm); });
-
- lzma_action action = LZMA_RUN;
- std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
- strm.next_in = nullptr;
- strm.avail_in = 0;
- strm.next_out = outbuf.data();
- strm.avail_out = outbuf.size();
- bool eof = false;
-
- while (true) {
- checkInterrupt();
-
- if (strm.avail_in == 0 && !eof) {
- strm.next_in = inbuf.data();
- try {
- strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
- } catch (EndOfFile &) {
- eof = true;
- }
- }
+ Sink & nextSink;
+ NoneSink(Sink & nextSink) : nextSink(nextSink) { }
+ void finish() override { flush(); }
+ void write(const unsigned char * data, size_t len) override { nextSink(data, len); }
+};
- if (strm.avail_in == 0)
- action = LZMA_FINISH;
+struct XzDecompressionSink : CompressionSink
+{
+ Sink & nextSink;
+ uint8_t outbuf[BUFSIZ];
+ lzma_stream strm = LZMA_STREAM_INIT;
+ bool finished = false;
- lzma_ret ret = lzma_code(&strm, action);
+ XzDecompressionSink(Sink & nextSink) : nextSink(nextSink)
+ {
+ lzma_ret ret = lzma_stream_decoder(
+ &strm, UINT64_MAX, LZMA_CONCATENATED);
+ if (ret != LZMA_OK)
+ throw CompressionError("unable to initialise lzma decoder");
- if (strm.avail_out < outbuf.size()) {
- sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
- strm.next_out = outbuf.data();
- strm.avail_out = outbuf.size();
- }
+ strm.next_out = outbuf;
+ strm.avail_out = sizeof(outbuf);
+ }
- if (ret == LZMA_STREAM_END) return;
+ ~XzDecompressionSink()
+ {
+ lzma_end(&strm);
+ }
- if (ret != LZMA_OK)
- throw CompressionError("error %d while decompressing xz file", ret);
+ void finish() override
+ {
+ CompressionSink::flush();
+ write(nullptr, 0);
}
-}
-static void decompressBzip2(Source & source, Sink & sink)
-{
- bz_stream strm;
- memset(&strm, 0, sizeof(strm));
-
- int ret = BZ2_bzDecompressInit(&strm, 0, 0);
- if (ret != BZ_OK)
- throw CompressionError("unable to initialise bzip2 decoder");
-
- Finally free([&]() { BZ2_bzDecompressEnd(&strm); });
-
- std::vector<char> inbuf(bufSize), outbuf(bufSize);
- strm.next_in = nullptr;
- strm.avail_in = 0;
- strm.next_out = outbuf.data();
- strm.avail_out = outbuf.size();
- bool eof = false;
-
- while (true) {
- checkInterrupt();
-
- if (strm.avail_in == 0 && !eof) {
- strm.next_in = inbuf.data();
- try {
- strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
- } catch (EndOfFile &) {
- eof = true;
- }
- }
+ void write(const unsigned char * data, size_t len) override
+ {
+ strm.next_in = data;
+ strm.avail_in = len;
- int ret = BZ2_bzDecompress(&strm);
+ while (!finished && (!data || strm.avail_in)) {
+ checkInterrupt();
- if (strm.avail_in == 0 && strm.avail_out == outbuf.size() && eof)
- throw CompressionError("bzip2 data ends prematurely");
+ lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH);
+ if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+ throw CompressionError("error %d while decompressing xz file", ret);
+
+ finished = ret == LZMA_STREAM_END;
- if (strm.avail_out < outbuf.size()) {
- sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
- strm.next_out = outbuf.data();
- strm.avail_out = outbuf.size();
+ if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+ nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+ strm.next_out = outbuf;
+ strm.avail_out = sizeof(outbuf);
+ }
}
+ }
+};
- if (ret == BZ_STREAM_END) return;
+struct BzipDecompressionSink : ChunkedCompressionSink
+{
+ Sink & nextSink;
+ bz_stream strm;
+ bool finished = false;
+ BzipDecompressionSink(Sink & nextSink) : nextSink(nextSink)
+ {
+ memset(&strm, 0, sizeof(strm));
+ int ret = BZ2_bzDecompressInit(&strm, 0, 0);
if (ret != BZ_OK)
- throw CompressionError("error while decompressing bzip2 file");
+ throw CompressionError("unable to initialise bzip2 decoder");
+
+ strm.next_out = (char *) outbuf;
+ strm.avail_out = sizeof(outbuf);
}
-}
-static void decompressBrotli(Source & source, Sink & sink)
-{
- auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
- if (!s)
- throw CompressionError("unable to initialize brotli decoder");
-
- Finally free([s]() { BrotliDecoderDestroyInstance(s); });
-
- std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
- const uint8_t * next_in = nullptr;
- size_t avail_in = 0;
- bool eof = false;
-
- while (true) {
- checkInterrupt();
-
- if (avail_in == 0 && !eof) {
- next_in = inbuf.data();
- try {
- avail_in = source.read((unsigned char *) next_in, inbuf.size());
- } catch (EndOfFile &) {
- eof = true;
+ ~BzipDecompressionSink()
+ {
+ BZ2_bzDecompressEnd(&strm);
+ }
+
+ void finish() override
+ {
+ flush();
+ write(nullptr, 0);
+ }
+
+ void writeInternal(const unsigned char * data, size_t len)
+ {
+ assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
+
+ strm.next_in = (char *) data;
+ strm.avail_in = len;
+
+ while (strm.avail_in) {
+ checkInterrupt();
+
+ int ret = BZ2_bzDecompress(&strm);
+ if (ret != BZ_OK && ret != BZ_STREAM_END)
+ throw CompressionError("error while decompressing bzip2 file");
+
+ finished = ret == BZ_STREAM_END;
+
+ if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+ nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+ strm.next_out = (char *) outbuf;
+ strm.avail_out = sizeof(outbuf);
}
}
+ }
+};
- uint8_t * next_out = outbuf.data();
- size_t avail_out = outbuf.size();
-
- auto ret = BrotliDecoderDecompressStream(s,
- &avail_in, &next_in,
- &avail_out, &next_out,
- nullptr);
-
- switch (ret) {
- case BROTLI_DECODER_RESULT_ERROR:
- throw CompressionError("error while decompressing brotli file");
- case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
- if (eof)
- throw CompressionError("incomplete or corrupt brotli file");
- break;
- case BROTLI_DECODER_RESULT_SUCCESS:
- if (avail_in != 0)
- throw CompressionError("unexpected input after brotli decompression");
- break;
- case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
- // I'm not sure if this can happen, but abort if this happens with empty buffer
- if (avail_out == outbuf.size())
- throw CompressionError("brotli decompression requires larger buffer");
- break;
- }
+struct BrotliDecompressionSink : ChunkedCompressionSink
+{
+ Sink & nextSink;
+ BrotliDecoderState * state;
+ bool finished = false;
- // Always ensure we have full buffer for next invocation
- if (avail_out < outbuf.size())
- sink((unsigned char *) outbuf.data(), outbuf.size() - avail_out);
+ BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink)
+ {
+ state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+ if (!state)
+ throw CompressionError("unable to initialize brotli decoder");
+ }
- if (ret == BROTLI_DECODER_RESULT_SUCCESS) return;
+ ~BrotliDecompressionSink()
+ {
+ BrotliDecoderDestroyInstance(state);
}
-}
+
+ void finish() override
+ {
+ flush();
+ writeInternal(nullptr, 0);
+ }
+
+ void writeInternal(const unsigned char * data, size_t len)
+ {
+ const uint8_t * next_in = data;
+ size_t avail_in = len;
+ uint8_t * next_out = outbuf;
+ size_t avail_out = sizeof(outbuf);
+
+ while (!finished && (!data || avail_in)) {
+ checkInterrupt();
+
+ if (!BrotliDecoderDecompressStream(state,
+ &avail_in, &next_in,
+ &avail_out, &next_out,
+ nullptr))
+ throw CompressionError("error while decompressing brotli file");
+
+ if (avail_out < sizeof(outbuf) || avail_in == 0) {
+ nextSink(outbuf, sizeof(outbuf) - avail_out);
+ next_out = outbuf;
+ avail_out = sizeof(outbuf);
+ }
+
+ finished = BrotliDecoderIsFinished(state);
+ }
+ }
+};
ref<std::string> decompress(const std::string & method, const std::string & in)
{
- StringSource source(in);
- StringSink sink;
- decompress(method, source, sink);
- return sink.s;
+ StringSink ssink;
+ auto sink = makeDecompressionSink(method, ssink);
+ (*sink)(in);
+ sink->finish();
+ return ssink.s;
}
-void decompress(const std::string & method, Source & source, Sink & sink)
+ref<CompressionSink> makeDecompressionSink(const std::string & method, Sink & nextSink)
{
- if (method == "none")
- return decompressNone(source, sink);
+ if (method == "none" || method == "")
+ return make_ref<NoneSink>(nextSink);
else if (method == "xz")
- return decompressXZ(source, sink);
+ return make_ref<XzDecompressionSink>(nextSink);
else if (method == "bzip2")
- return decompressBzip2(source, sink);
+ return make_ref<BzipDecompressionSink>(nextSink);
else if (method == "br")
- return decompressBrotli(source, sink);
+ return make_ref<BrotliDecompressionSink>(nextSink);
else
throw UnknownCompressionMethod("unknown compression method '%s'", method);
}
-struct NoneSink : CompressionSink
-{
- Sink & nextSink;
- NoneSink(Sink & nextSink) : nextSink(nextSink) { }
- void finish() override { flush(); }
- void write(const unsigned char * data, size_t len) override { nextSink(data, len); }
-};
-
-struct XzSink : CompressionSink
+struct XzCompressionSink : CompressionSink
{
Sink & nextSink;
uint8_t outbuf[BUFSIZ];
lzma_stream strm = LZMA_STREAM_INIT;
bool finished = false;
- template <typename F>
- XzSink(Sink & nextSink, F&& initEncoder) : nextSink(nextSink) {
- lzma_ret ret = initEncoder();
+ XzCompressionSink(Sink & nextSink, bool parallel) : nextSink(nextSink)
+ {
+ lzma_ret ret;
+ bool done = false;
+
+ if (parallel) {
+#ifdef HAVE_LZMA_MT
+ lzma_mt mt_options = {};
+ mt_options.flags = 0;
+ mt_options.timeout = 300; // Using the same setting as the xz cmd line
+ mt_options.preset = LZMA_PRESET_DEFAULT;
+ mt_options.filters = NULL;
+ mt_options.check = LZMA_CHECK_CRC64;
+ mt_options.threads = lzma_cputhreads();
+ mt_options.block_size = 0;
+ if (mt_options.threads == 0)
+ mt_options.threads = 1;
+ // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
+ // number of threads.
+ ret = lzma_stream_encoder_mt(&strm, &mt_options);
+ done = true;
+#else
+ printMsg(lvlError, "warning: parallel compression requested but not supported for metho d '%1%', falling back to single-threaded compression", method);
+#endif
+ }
+
+ if (!done)
+ ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
+
if (ret != LZMA_OK)
throw CompressionError("unable to initialise lzma encoder");
+
// FIXME: apply the x86 BCJ filter?
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
- XzSink(Sink & nextSink) : XzSink(nextSink, [this]() {
- return lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
- }) {}
- ~XzSink()
+ ~XzCompressionSink()
{
lzma_end(&strm);
}
@@ -246,43 +276,25 @@ struct XzSink : CompressionSink
void finish() override
{
CompressionSink::flush();
-
- assert(!finished);
- finished = true;
-
- while (true) {
- checkInterrupt();
-
- lzma_ret ret = lzma_code(&strm, LZMA_FINISH);
- if (ret != LZMA_OK && ret != LZMA_STREAM_END)
- throw CompressionError("error while flushing xz file");
-
- if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
- nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
- strm.next_out = outbuf;
- strm.avail_out = sizeof(outbuf);
- }
-
- if (ret == LZMA_STREAM_END) break;
- }
+ write(nullptr, 0);
}
void write(const unsigned char * data, size_t len) override
{
- assert(!finished);
-
strm.next_in = data;
strm.avail_in = len;
- while (strm.avail_in) {
+ while (!finished && (!data || strm.avail_in)) {
checkInterrupt();
- lzma_ret ret = lzma_code(&strm, LZMA_RUN);
- if (ret != LZMA_OK)
- throw CompressionError("error while compressing xz file");
+ lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH);
+ if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+ throw CompressionError("error %d while compressing xz file", ret);
+
+ finished = ret == LZMA_STREAM_END;
- if (strm.avail_out == 0) {
- nextSink(outbuf, sizeof(outbuf));
+ if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+ nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
strm.next_out = outbuf;
strm.avail_out = sizeof(outbuf);
}
@@ -290,46 +302,24 @@ struct XzSink : CompressionSink
}
};
-#ifdef HAVE_LZMA_MT
-struct ParallelXzSink : public XzSink
-{
- ParallelXzSink(Sink &nextSink) : XzSink(nextSink, [this]() {
- lzma_mt mt_options = {};
- mt_options.flags = 0;
- mt_options.timeout = 300; // Using the same setting as the xz cmd line
- mt_options.preset = LZMA_PRESET_DEFAULT;
- mt_options.filters = NULL;
- mt_options.check = LZMA_CHECK_CRC64;
- mt_options.threads = lzma_cputhreads();
- mt_options.block_size = 0;
- if (mt_options.threads == 0)
- mt_options.threads = 1;
- // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
- // number of threads.
- return lzma_stream_encoder_mt(&strm, &mt_options);
- }) {}
-};
-#endif
-
-struct BzipSink : CompressionSink
+struct BzipCompressionSink : ChunkedCompressionSink
{
Sink & nextSink;
- char outbuf[BUFSIZ];
bz_stream strm;
bool finished = false;
- BzipSink(Sink & nextSink) : nextSink(nextSink)
+ BzipCompressionSink(Sink & nextSink) : nextSink(nextSink)
{
memset(&strm, 0, sizeof(strm));
int ret = BZ2_bzCompressInit(&strm, 9, 0, 30);
if (ret != BZ_OK)
throw CompressionError("unable to initialise bzip2 encoder");
- strm.next_out = outbuf;
+ strm.next_out = (char *) outbuf;
strm.avail_out = sizeof(outbuf);
}
- ~BzipSink()
+ ~BzipCompressionSink()
{
BZ2_bzCompressEnd(&strm);
}
@@ -337,78 +327,49 @@ struct BzipSink : CompressionSink
void finish() override
{
flush();
-
- assert(!finished);
- finished = true;
-
- while (true) {
- checkInterrupt();
-
- int ret = BZ2_bzCompress(&strm, BZ_FINISH);
- if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
- throw CompressionError("error while flushing bzip2 file");
-
- if (strm.avail_out == 0 || ret == BZ_STREAM_END) {
- nextSink((unsigned char *) outbuf, sizeof(outbuf) - strm.avail_out);
- strm.next_out = outbuf;
- strm.avail_out = sizeof(outbuf);
- }
-
- if (ret == BZ_STREAM_END) break;
- }
- }
-
- void write(const unsigned char * data, size_t len) override
- {
- /* Bzip2's 'avail_in' parameter is an unsigned int, so we need
- to split the input into chunks of at most 4 GiB. */
- while (len) {
- auto n = std::min((size_t) std::numeric_limits<decltype(strm.avail_in)>::max(), len);
- writeInternal(data, n);
- data += n;
- len -= n;
- }
+ writeInternal(nullptr, 0);
}
void writeInternal(const unsigned char * data, size_t len)
{
- assert(!finished);
assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
strm.next_in = (char *) data;
strm.avail_in = len;
- while (strm.avail_in) {
+ while (!finished && (!data || strm.avail_in)) {
checkInterrupt();
- int ret = BZ2_bzCompress(&strm, BZ_RUN);
- if (ret != BZ_OK)
- CompressionError("error while compressing bzip2 file");
+ int ret = BZ2_bzCompress(&strm, data ? BZ_RUN : BZ_FINISH);
+ if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
+ throw CompressionError("error %d while compressing bzip2 file", ret);
- if (strm.avail_out == 0) {
- nextSink((unsigned char *) outbuf, sizeof(outbuf));
- strm.next_out = outbuf;
+ finished = ret == BZ_STREAM_END;
+
+ if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+ nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+ strm.next_out = (char *) outbuf;
strm.avail_out = sizeof(outbuf);
}
}
}
};
-struct BrotliSink : CompressionSink
+struct BrotliCompressionSink : ChunkedCompressionSink
{
Sink & nextSink;
uint8_t outbuf[BUFSIZ];
BrotliEncoderState *state;
bool finished = false;
- BrotliSink(Sink & nextSink) : nextSink(nextSink)
+ BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink)
{
state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
if (!state)
throw CompressionError("unable to initialise brotli encoder");
}
- ~BrotliSink()
+ ~BrotliCompressionSink()
{
BrotliEncoderDestroyInstance(state);
}
@@ -416,89 +377,47 @@ struct BrotliSink : CompressionSink
void finish() override
{
flush();
- assert(!finished);
-
- const uint8_t *next_in = nullptr;
- size_t avail_in = 0;
- uint8_t *next_out = outbuf;
- size_t avail_out = sizeof(outbuf);
- while (!finished) {
- checkInterrupt();
-
- if (!BrotliEncoderCompressStream(state,
- BROTLI_OPERATION_FINISH,
- &avail_in, &next_in,
- &avail_out, &next_out,
- nullptr))
- throw CompressionError("error while finishing brotli file");
-
- finished = BrotliEncoderIsFinished(state);
- if (avail_out == 0 || finished) {
- nextSink(outbuf, sizeof(outbuf) - avail_out);
- next_out = outbuf;
- avail_out = sizeof(outbuf);
- }
- }
- }
-
- void write(const unsigned char * data, size_t len) override
- {
- // Don't feed brotli too much at once
- const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
- while (len) {
- size_t n = std::min(CHUNK_SIZE, len);
- writeInternal(data, n);
- data += n;
- len -= n;
- }
+ writeInternal(nullptr, 0);
}
void writeInternal(const unsigned char * data, size_t len)
{
- assert(!finished);
-
- const uint8_t *next_in = data;
+ const uint8_t * next_in = data;
size_t avail_in = len;
- uint8_t *next_out = outbuf;
+ uint8_t * next_out = outbuf;
size_t avail_out = sizeof(outbuf);
- while (avail_in > 0) {
+ while (!finished && (!data || avail_in)) {
checkInterrupt();
if (!BrotliEncoderCompressStream(state,
- BROTLI_OPERATION_PROCESS,
- &avail_in, &next_in,
- &avail_out, &next_out,
- nullptr))
- throw CompressionError("error while compressing brotli file");
+ data ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
+ &avail_in, &next_in,
+ &avail_out, &next_out,
+ nullptr))
+ throw CompressionError("error while compressing brotli compression");
if (avail_out < sizeof(outbuf) || avail_in == 0) {
nextSink(outbuf, sizeof(outbuf) - avail_out);
next_out = outbuf;
avail_out = sizeof(outbuf);
}
+
+ finished = BrotliEncoderIsFinished(state);
}
}
};
ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel)
{
- if (parallel) {
-#ifdef HAVE_LZMA_MT
- if (method == "xz")
- return make_ref<ParallelXzSink>(nextSink);
-#endif
- printMsg(lvlError, format("Warning: parallel compression requested but not supported for method '%1%', falling back to single-threaded compression") % method);
- }
-
if (method == "none")
return make_ref<NoneSink>(nextSink);
else if (method == "xz")
- return make_ref<XzSink>(nextSink);
+ return make_ref<XzCompressionSink>(nextSink, parallel);
else if (method == "bzip2")
- return make_ref<BzipSink>(nextSink);
+ return make_ref<BzipCompressionSink>(nextSink);
else if (method == "br")
- return make_ref<BrotliSink>(nextSink);
+ return make_ref<BrotliCompressionSink>(nextSink);
else
throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
}