From 3a48282b0681d68147e18f7464eaddf1d188c3be Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 14 Dec 2011 23:30:06 +0000 Subject: * Buffer writes in FdSink. This significantly reduces the number of system calls / context switches when dumping a NAR and in the worker protocol. --- src/libutil/serialise.cc | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'src/libutil/serialise.cc') diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 9b4222713..66a64a6be 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -9,7 +9,30 @@ namespace nix { void FdSink::operator () (const unsigned char * data, unsigned int len) { - writeFull(fd, data, len); + if (!buffer) buffer = new unsigned char[bufSize]; + + while (len) { + /* Optimisation: bypass the buffer if the data exceeds the + buffer size and there is no unflushed data. */ + if (bufPos == 0 && len >= bufSize) { + writeFull(fd, data, len); + break; + } + /* Otherwise, copy the bytes to the buffer. Flush the buffer + when it's full. */ + size_t n = bufPos + len > bufSize ? bufSize - bufPos : len; + memcpy(buffer + bufPos, data, n); + data += n; bufPos += n; len -= n; + if (bufPos == bufSize) flush(); + } +} + + +void FdSink::flush() +{ + if (fd == -1 || bufPos == 0) return; + writeFull(fd, buffer, bufPos); + bufPos = 0; } -- cgit v1.2.3 From a3e0656cbbfadba28518e0a29c324edaabb9874a Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 15 Dec 2011 12:32:08 +0000 Subject: * Buffer reads in FdSource. Together with write buffering, this significantly cuts down the number of syscalls (e.g., for "nix-store -qR /var/run/current-system" via the daemon, it reduced the number of syscalls in the client from 29134 to 4766 and in the daemon from 44266 to 20666). --- src/libutil/serialise.cc | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'src/libutil/serialise.cc') diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 66a64a6be..e3a53c0d0 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -2,6 +2,7 @@ #include "util.hh" #include +#include namespace nix { @@ -38,7 +39,28 @@ void FdSink::flush() void FdSource::operator () (unsigned char * data, unsigned int len) { - readFull(fd, data, len); + if (!buffer) buffer = new unsigned char[bufSize]; + + while (len) { + if (!bufPosIn) { + /* Read as much data as is available (up to the buffer + size). */ + checkInterrupt(); + ssize_t n = read(fd, (char *) buffer, bufSize); + if (n == -1) { + if (errno == EINTR) continue; + throw SysError("reading from file"); + } + if (n == 0) throw EndOfFile("unexpected end-of-file"); + bufPosIn = n; + } + + /* Copy out the data in the buffer. */ + size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len; + memcpy(data, buffer + bufPosOut, n); + data += n; bufPosOut += n; len -= n; + if (bufPosIn == bufPosOut) bufPosIn = bufPosOut = 0; + } } -- cgit v1.2.3 From 5a1b9ed0aa3a0c240b667dbe504b61b2b68e4d16 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 15 Dec 2011 16:19:53 +0000 Subject: * Refactoring: move sink/source buffering into separate classes. * Buffer the HashSink. This speeds up hashing a bit because it prevents lots of calls to the hash update functions (e.g. nix-hash went from 9.3s to 8.7s of user time on the closure of my /var/run/current-system). --- src/libutil/serialise.cc | 69 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 23 deletions(-) (limited to 'src/libutil/serialise.cc') diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index e3a53c0d0..a82262704 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -8,7 +8,16 @@ namespace nix { -void FdSink::operator () (const unsigned char * data, unsigned int len) +BufferedSink::~BufferedSink() +{ + /* We can't call flush() here, because C++ for some insane reason + doesn't allow you to call virtual methods from a destructor. */ + assert(!bufPos); + if (buffer) delete[] buffer; +} + + +void BufferedSink::operator () (const unsigned char * data, size_t len) { if (!buffer) buffer = new unsigned char[bufSize]; @@ -16,7 +25,7 @@ void FdSink::operator () (const unsigned char * data, unsigned int len) /* Optimisation: bypass the buffer if the data exceeds the buffer size and there is no unflushed data. */ if (bufPos == 0 && len >= bufSize) { - writeFull(fd, data, len); + write(data, len); break; } /* Otherwise, copy the bytes to the buffer. Flush the buffer @@ -29,31 +38,32 @@ void FdSink::operator () (const unsigned char * data, unsigned int len) } -void FdSink::flush() +void BufferedSink::flush() { - if (fd == -1 || bufPos == 0) return; - writeFull(fd, buffer, bufPos); + if (bufPos == 0) return; + write(buffer, bufPos); bufPos = 0; } -void FdSource::operator () (unsigned char * data, unsigned int len) +void FdSink::write(const unsigned char * data, size_t len) +{ + writeFull(fd, data, len); +} + + +BufferedSource::~BufferedSource() +{ + if (buffer) delete[] buffer; +} + + +void BufferedSource::operator () (unsigned char * data, size_t len) { if (!buffer) buffer = new unsigned char[bufSize]; while (len) { - if (!bufPosIn) { - /* Read as much data as is available (up to the buffer - size). */ - checkInterrupt(); - ssize_t n = read(fd, (char *) buffer, bufSize); - if (n == -1) { - if (errno == EINTR) continue; - throw SysError("reading from file"); - } - if (n == 0) throw EndOfFile("unexpected end-of-file"); - bufPosIn = n; - } + if (!bufPosIn) bufPosIn = read(buffer, bufSize); /* Copy out the data in the buffer. */ size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len; @@ -64,7 +74,20 @@ void FdSource::operator () (unsigned char * data, unsigned int len) } -void writePadding(unsigned int len, Sink & sink) +size_t FdSource::read(unsigned char * data, size_t len) +{ + ssize_t n; + do { + checkInterrupt(); + n = ::read(fd, (char *) data, bufSize); + } while (n == -1 && errno == EINTR); + if (n == -1) throw SysError("reading from file"); + if (n == 0) throw EndOfFile("unexpected end-of-file"); + return n; +} + + +void writePadding(size_t len, Sink & sink) { if (len % 8) { unsigned char zero[8]; @@ -103,7 +126,7 @@ void writeLongLong(unsigned long long n, Sink & sink) void writeString(const string & s, Sink & sink) { - unsigned int len = s.length(); + size_t len = s.length(); writeInt(len, sink); sink((const unsigned char *) s.c_str(), len); writePadding(len, sink); @@ -118,11 +141,11 @@ void writeStringSet(const StringSet & ss, Sink & sink) } -void readPadding(unsigned int len, Source & source) +void readPadding(size_t len, Source & source) { if (len % 8) { unsigned char zero[8]; - unsigned int n = 8 - (len % 8); + size_t n = 8 - (len % 8); source(zero, n); for (unsigned int i = 0; i < n; i++) if (zero[i]) throw SerialisationError("non-zero padding"); @@ -162,7 +185,7 @@ unsigned long long readLongLong(Source & source) string readString(Source & source) { - unsigned int len = readInt(source); + size_t len = readInt(source); unsigned char * buf = new unsigned char[len]; AutoDeleteArray d(buf); source(buf, len); -- cgit v1.2.3 From 78598d06f0240a15b74720d8f987daeb702318d7 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 16 Dec 2011 15:45:42 +0000 Subject: * Clean up exception handling. --- src/libutil/serialise.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/libutil/serialise.cc') diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index a82262704..76f2e721a 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -41,8 +41,15 @@ void BufferedSink::operator () (const unsigned char * data, size_t len) void BufferedSink::flush() { if (bufPos == 0) return; - write(buffer, bufPos); - bufPos = 0; + size_t n = bufPos; + bufPos = 0; // don't trigger the assert() in ~BufferedSink() + write(buffer, n); +} + + +FdSink::~FdSink() +{ + try { flush(); } catch (...) { ignoreException(); } } -- cgit v1.2.3 From e0bd307802d13476055f8ba99ab7808de0fd71e5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 16 Dec 2011 19:44:13 +0000 Subject: * Make the import operation through the daemon much more efficient (way fewer roundtrips) by allowing the client to send data in bigger chunks. * Some refactoring. --- src/libutil/serialise.cc | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'src/libutil/serialise.cc') diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 76f2e721a..640267a13 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -23,8 +23,9 @@ void BufferedSink::operator () (const unsigned char * data, size_t len) while (len) { /* Optimisation: bypass the buffer if the data exceeds the - buffer size and there is no unflushed data. */ - if (bufPos == 0 && len >= bufSize) { + buffer size. */ + if (bufPos + len >= bufSize) { + flush(); write(data, len); break; } @@ -59,29 +60,37 @@ void FdSink::write(const unsigned char * data, size_t len) } +void Source::operator () (unsigned char * data, size_t len) +{ + while (len) { + size_t n = read(data, len); + data += n; len -= n; + } +} + + BufferedSource::~BufferedSource() { if (buffer) delete[] buffer; } -void BufferedSource::operator () (unsigned char * data, size_t len) +size_t BufferedSource::read(unsigned char * data, size_t len) { if (!buffer) buffer = new unsigned char[bufSize]; - while (len) { - if (!bufPosIn) bufPosIn = read(buffer, bufSize); + if (!bufPosIn) bufPosIn = readUnbuffered(buffer, bufSize); - /* Copy out the data in the buffer. */ - size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len; - memcpy(data, buffer + bufPosOut, n); - data += n; bufPosOut += n; len -= n; - if (bufPosIn == bufPosOut) bufPosIn = bufPosOut = 0; - } + /* Copy out the data in the buffer. */ + size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len; + memcpy(data, buffer + bufPosOut, n); + bufPosOut += n; + if (bufPosIn == bufPosOut) bufPosIn = bufPosOut = 0; + return n; } -size_t FdSource::read(unsigned char * data, size_t len) +size_t FdSource::readUnbuffered(unsigned char * data, size_t len) { ssize_t n; do { @@ -94,6 +103,15 @@ size_t FdSource::read(unsigned char * data, size_t len) } +size_t StringSource::read(unsigned char * data, size_t len) +{ + if (pos == s.size()) throw EndOfFile("end of string reached"); + size_t n = s.copy((char *) data, len, pos); + pos += n; + return n; +} + + void writePadding(size_t len, Sink & sink) { if (len % 8) { -- cgit v1.2.3 From 8d3dfa2c1782e955d2b7796d19dc0d0381596b98 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 16 Dec 2011 21:29:46 +0000 Subject: * Avoid expensive conversions from char arrays to STL strings. --- src/libutil/serialise.cc | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'src/libutil/serialise.cc') diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 640267a13..ba549c214 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -149,15 +149,20 @@ void writeLongLong(unsigned long long n, Sink & sink) } -void writeString(const string & s, Sink & sink) +void writeString(const unsigned char * buf, size_t len, Sink & sink) { - size_t len = s.length(); writeInt(len, sink); - sink((const unsigned char *) s.c_str(), len); + sink(buf, len); writePadding(len, sink); } +void writeString(const string & s, Sink & sink) +{ + writeString((const unsigned char *) s.c_str(), s.size(), sink); +} + + void writeStringSet(const StringSet & ss, Sink & sink) { writeInt(ss.size(), sink); @@ -208,6 +213,16 @@ unsigned long long readLongLong(Source & source) } +size_t readString(unsigned char * buf, size_t max, Source & source) +{ + size_t len = readInt(source); + if (len > max) throw Error("string is too long"); + source(buf, len); + readPadding(len, source); + return len; +} + + string readString(Source & source) { size_t len = readInt(source); -- cgit v1.2.3 From 273b288a7e862ac1918064537ff130cc751fa9fd Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 16 Dec 2011 22:31:25 +0000 Subject: * importPath() -> importPaths(). Because of buffering of the input stream it's now necessary for the daemon to process the entire sequence of exported paths, rather than letting the client do it. --- src/libutil/serialise.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'src/libutil/serialise.cc') diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index ba549c214..c4563ffd1 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -163,13 +163,16 @@ void writeString(const string & s, Sink & sink) } -void writeStringSet(const StringSet & ss, Sink & sink) +template void writeStrings(const T & ss, Sink & sink) { writeInt(ss.size(), sink); - for (StringSet::iterator i = ss.begin(); i != ss.end(); ++i) + foreach (typename T::const_iterator, i, ss) writeString(*i, sink); } +template void writeStrings(const Paths & ss, Sink & sink); +template void writeStrings(const PathSet & ss, Sink & sink); + void readPadding(size_t len, Source & source) { @@ -234,14 +237,17 @@ string readString(Source & source) } -StringSet readStringSet(Source & source) +template T readStrings(Source & source) { unsigned int count = readInt(source); - StringSet ss; + T ss; while (count--) - ss.insert(readString(source)); + ss.insert(ss.end(), readString(source)); return ss; } +template Paths readStrings(Source & source); +template PathSet readStrings(Source & source); + } -- cgit v1.2.3