From a6d7cd418385e20feab8d7260a7251f218a0d5bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Na=C3=AFm=20Favier?= Date: Fri, 18 Feb 2022 13:24:39 +0100 Subject: Ensure the completion marker is not processed beyond completion I was surprised to see an error mentioning ___COMPLETE___ when trying to complete a flag argument that had no completer implemented --- src/libutil/args.cc | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'src/libutil') diff --git a/src/libutil/args.cc b/src/libutil/args.cc index f970c0e9e..38c748be0 100644 --- a/src/libutil/args.cc +++ b/src/libutil/args.cc @@ -127,11 +127,11 @@ bool Args::processFlag(Strings::iterator & pos, Strings::iterator end) if (flag.handler.arity == ArityAny) break; throw UsageError("flag '%s' requires %d argument(s)", name, flag.handler.arity); } - if (flag.completer) - if (auto prefix = needsCompletion(*pos)) { - anyCompleted = true; + if (auto prefix = needsCompletion(*pos)) { + anyCompleted = true; + if (flag.completer) flag.completer(n, *prefix); - } + } args.push_back(*pos++); } if (!anyCompleted) @@ -146,6 +146,7 @@ bool Args::processFlag(Strings::iterator & pos, Strings::iterator end) && hasPrefix(name, std::string(*prefix, 2))) completions->add("--" + name, flag->description); } + return false; } auto i = longFlags.find(std::string(*pos, 2)); if (i == longFlags.end()) return false; @@ -187,10 +188,12 @@ bool Args::processArgs(const Strings & args, bool finish) { std::vector ss; for (const auto &[n, s] : enumerate(args)) { - ss.push_back(s); - if (exp.completer) - if (auto prefix = needsCompletion(s)) + if (auto prefix = needsCompletion(s)) { + ss.push_back(*prefix); + if (exp.completer) exp.completer(n, *prefix); + } else + ss.push_back(s); } exp.handler.fun(ss); expectedArgs.pop_front(); @@ -322,16 +325,16 @@ MultiCommand::MultiCommand(const Commands & commands_) .optional = true, .handler = {[=](std::string s) { assert(!command); - if (auto prefix = needsCompletion(s)) { - for (auto & [name, command] : commands) - if (hasPrefix(name, *prefix)) - completions->add(name); - } auto i = commands.find(s); if (i == commands.end()) throw UsageError("'%s' is not a recognised command", s); command = {s, i->second()}; command->second->parent = this; + }}, + .completer = {[&](size_t, std::string_view prefix) { + for (auto & [name, command] : commands) + if (hasPrefix(name, prefix)) + completions->add(name); }} }); -- cgit v1.2.3 From 5461ff532d6169be86af703b15cfb49569732cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Na=C3=AFm=20Favier?= Date: Fri, 18 Feb 2022 13:26:40 +0100 Subject: Make completeDir follow symlinks Allows completing `nix why-depends /run/cur` to /run/current-system --- src/libutil/args.cc | 2 +- src/libutil/util.cc | 9 +++++++++ src/libutil/util.hh | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/libutil') diff --git a/src/libutil/args.cc b/src/libutil/args.cc index 38c748be0..b60c609a6 100644 --- a/src/libutil/args.cc +++ b/src/libutil/args.cc @@ -290,7 +290,7 @@ static void _completePath(std::string_view prefix, bool onlyDirs) if (glob((std::string(prefix) + "*").c_str(), flags, nullptr, &globbuf) == 0) { for (size_t i = 0; i < globbuf.gl_pathc; ++i) { if (onlyDirs) { - auto st = lstat(globbuf.gl_pathv[i]); + auto st = stat(globbuf.gl_pathv[i]); if (!S_ISDIR(st.st_mode)) continue; } completions->add(globbuf.gl_pathv[i]); diff --git a/src/libutil/util.cc b/src/libutil/util.cc index b833038a9..deaa17a7f 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -215,6 +215,15 @@ bool isDirOrInDir(std::string_view path, std::string_view dir) } +struct stat stat(const Path & path) +{ + struct stat st; + if (stat(path.c_str(), &st)) + throw SysError("getting status of '%1%'", path); + return st; +} + + struct stat lstat(const Path & path) { struct stat st; diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 20591952d..94ae8ab7d 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -77,6 +77,7 @@ bool isInDir(std::string_view path, std::string_view dir); bool isDirOrInDir(std::string_view path, std::string_view dir); /* Get status of `path'. */ +struct stat stat(const Path & path); struct stat lstat(const Path & path); /* Return true iff the given path exists. */ -- cgit v1.2.3 From 55c6906701ee7fc7e915f7889fea86957b020f94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Na=C3=AFm=20Favier?= Date: Sat, 19 Feb 2022 14:26:34 +0100 Subject: Perform tilde expansion when completing flake fragments Allows completing `nix build ~/flake#`. We can implement expansion for `~user` later if needed. Not using wordexp(3) since that expands way too much. --- src/libutil/args.cc | 7 ++++--- src/libutil/util.cc | 11 +++++++++++ src/libutil/util.hh | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'src/libutil') diff --git a/src/libutil/args.cc b/src/libutil/args.cc index b60c609a6..10f01af5b 100644 --- a/src/libutil/args.cc +++ b/src/libutil/args.cc @@ -282,12 +282,13 @@ static void _completePath(std::string_view prefix, bool onlyDirs) { completionType = ctFilenames; glob_t globbuf; - int flags = GLOB_NOESCAPE | GLOB_TILDE; + int flags = GLOB_NOESCAPE; #ifdef GLOB_ONLYDIR if (onlyDirs) flags |= GLOB_ONLYDIR; #endif - if (glob((std::string(prefix) + "*").c_str(), flags, nullptr, &globbuf) == 0) { + // using expandTilde here instead of GLOB_TILDE(_CHECK) so that ~ expands to /home/user/ + if (glob((expandTilde(prefix) + "*").c_str(), flags, nullptr, &globbuf) == 0) { for (size_t i = 0; i < globbuf.gl_pathc; ++i) { if (onlyDirs) { auto st = stat(globbuf.gl_pathv[i]); @@ -295,8 +296,8 @@ static void _completePath(std::string_view prefix, bool onlyDirs) } completions->add(globbuf.gl_pathv[i]); } - globfree(&globbuf); } + globfree(&globbuf); } void completePath(size_t, std::string_view prefix) diff --git a/src/libutil/util.cc b/src/libutil/util.cc index deaa17a7f..d9db24397 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -200,6 +200,17 @@ std::string_view baseNameOf(std::string_view path) } +std::string expandTilde(std::string_view path) +{ + // TODO: expand ~user ? + auto tilde = path.substr(0, 2); + if (tilde == "~/" || tilde == "~") + return getHome() + std::string(path.substr(1)); + else + return std::string(path); +} + + bool isInDir(std::string_view path, std::string_view dir) { return path.substr(0, 1) == "/" diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 94ae8ab7d..a1d0e0e6b 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -68,6 +68,9 @@ Path dirOf(const PathView path); following the final `/' (trailing slashes are removed). */ std::string_view baseNameOf(std::string_view path); +/* Perform tilde expansion on a path. */ +std::string expandTilde(std::string_view path); + /* Check whether 'path' is a descendant of 'dir'. Both paths must be canonicalized. */ bool isInDir(std::string_view path, std::string_view dir); -- cgit v1.2.3 From 75b62e52600a44b42693944b50638bf580a2c86e Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 18 Jun 2020 17:54:16 +0000 Subject: Avoid `fmt` when constructor already does it There is a correctnes issue here, but #3724 will fix that. This is just a cleanup for brevity's sake. --- src/libutil/util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/libutil') diff --git a/src/libutil/util.cc b/src/libutil/util.cc index c075a14b4..d4379a0cf 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -1062,7 +1062,7 @@ std::string runProgram(Path program, bool searchPath, const Strings & args, auto res = runProgram(RunOptions {.program = program, .searchPath = searchPath, .args = args, .input = input}); if (!statusOk(res.first)) - throw ExecError(res.first, fmt("program '%1%' %2%", program, statusToString(res.first))); + throw ExecError(res.first, "program '%1%' %2%", program, statusToString(res.first)); return res.second; } @@ -1190,7 +1190,7 @@ void runProgram2(const RunOptions & options) if (source) promise.get_future().get(); if (status) - throw ExecError(status, fmt("program '%1%' %2%", options.program, statusToString(status))); + throw ExecError(status, "program '%1%' %2%", options.program, statusToString(status)); } -- cgit v1.2.3 From ebf2fd76b106d5eb8f45ccce0615653108bb99bc Mon Sep 17 00:00:00 2001 From: Yorick van Pelt Date: Wed, 20 Apr 2022 15:41:01 +0200 Subject: Add custom to_json and from_json functions for ExperimentalFeature nix show-config --json was serializing experimental features as ints. nlohmann::json will automatically use these definitions to serialize and deserialize ExperimentalFeatures. Strictly, we don't use the from_json instance yet, it's provided for completeness and hopefully future use. --- src/libutil/experimental-features.cc | 14 ++++++++++++++ src/libutil/experimental-features.hh | 7 +++++++ 2 files changed, 21 insertions(+) (limited to 'src/libutil') diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index e033a4116..df37edf57 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -58,4 +58,18 @@ std::ostream & operator <<(std::ostream & str, const ExperimentalFeature & featu return str << showExperimentalFeature(feature); } +void to_json(nlohmann::json& j, const ExperimentalFeature& feature) { + j = showExperimentalFeature(feature); +} + +void from_json(const nlohmann::json& j, ExperimentalFeature& feature) { + const std::string input = j; + const auto parsed = parseExperimentalFeature(input); + + if (parsed.has_value()) + feature = *parsed; + else + throw Error("Unknown experimental feature '%s' in JSON input", input); +} + } diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index 266e41a22..a6d080094 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -51,4 +51,11 @@ public: MissingExperimentalFeature(ExperimentalFeature); }; +/** + * Semi-magic conversion to and from json. + * See the nlohmann/json readme for more details. + */ +void to_json(nlohmann::json&, const ExperimentalFeature&); +void from_json(const nlohmann::json&, ExperimentalFeature&); + } -- cgit v1.2.3 From f05e1f6fbb8a760f23a7af16b065078df6588acf Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 21 Apr 2022 11:58:40 +0200 Subject: Move hiliteMatches into a separate header This is mostly so that we don't #include everywhere (which adds quite a bit of compilation time). --- src/libutil/fmt.cc | 46 ---------------------------------------------- src/libutil/fmt.hh | 12 ------------ src/libutil/hilite.cc | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/libutil/hilite.hh | 20 ++++++++++++++++++++ 4 files changed, 64 insertions(+), 58 deletions(-) delete mode 100644 src/libutil/fmt.cc create mode 100644 src/libutil/hilite.cc create mode 100644 src/libutil/hilite.hh (limited to 'src/libutil') diff --git a/src/libutil/fmt.cc b/src/libutil/fmt.cc deleted file mode 100644 index 3dd93d73e..000000000 --- a/src/libutil/fmt.cc +++ /dev/null @@ -1,46 +0,0 @@ -#include "fmt.hh" - -#include - -namespace nix { - -std::string hiliteMatches( - std::string_view s, - std::vector matches, - std::string_view prefix, - std::string_view postfix) -{ - // Avoid copy on zero matches - if (matches.size() == 0) - return (std::string) s; - - std::sort(matches.begin(), matches.end(), [](const auto & a, const auto & b) { - return a.position() < b.position(); - }); - - std::string out; - ssize_t last_end = 0; - - for (auto it = matches.begin(); it != matches.end();) { - auto m = *it; - size_t start = m.position(); - out.append(s.substr(last_end, m.position() - last_end)); - // Merge continous matches - ssize_t end = start + m.length(); - while (++it != matches.end() && (*it).position() <= end) { - auto n = *it; - ssize_t nend = start + (n.position() - start + n.length()); - if (nend > end) - end = nend; - } - out.append(prefix); - out.append(s.substr(start, end - start)); - out.append(postfix); - last_end = end; - } - - out.append(s.substr(last_end)); - return out; -} - -} diff --git a/src/libutil/fmt.hh b/src/libutil/fmt.hh index 0821b3b74..7664e5c04 100644 --- a/src/libutil/fmt.hh +++ b/src/libutil/fmt.hh @@ -2,7 +2,6 @@ #include #include -#include #include "ansicolor.hh" @@ -155,15 +154,4 @@ inline hintformat hintfmt(std::string plain_string) return hintfmt("%s", normaltxt(plain_string)); } -/* Highlight all the given matches in the given string `s` by wrapping - them between `prefix` and `postfix`. - - If some matches overlap, then their union will be wrapped rather - than the individual matches. */ -std::string hiliteMatches( - std::string_view s, - std::vector matches, - std::string_view prefix, - std::string_view postfix); - } diff --git a/src/libutil/hilite.cc b/src/libutil/hilite.cc new file mode 100644 index 000000000..a5991ca39 --- /dev/null +++ b/src/libutil/hilite.cc @@ -0,0 +1,44 @@ +#include "hilite.hh" + +namespace nix { + +std::string hiliteMatches( + std::string_view s, + std::vector matches, + std::string_view prefix, + std::string_view postfix) +{ + // Avoid copy on zero matches + if (matches.size() == 0) + return (std::string) s; + + std::sort(matches.begin(), matches.end(), [](const auto & a, const auto & b) { + return a.position() < b.position(); + }); + + std::string out; + ssize_t last_end = 0; + + for (auto it = matches.begin(); it != matches.end();) { + auto m = *it; + size_t start = m.position(); + out.append(s.substr(last_end, m.position() - last_end)); + // Merge continous matches + ssize_t end = start + m.length(); + while (++it != matches.end() && (*it).position() <= end) { + auto n = *it; + ssize_t nend = start + (n.position() - start + n.length()); + if (nend > end) + end = nend; + } + out.append(prefix); + out.append(s.substr(start, end - start)); + out.append(postfix); + last_end = end; + } + + out.append(s.substr(last_end)); + return out; +} + +} diff --git a/src/libutil/hilite.hh b/src/libutil/hilite.hh new file mode 100644 index 000000000..f8bdbfc55 --- /dev/null +++ b/src/libutil/hilite.hh @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include + +namespace nix { + +/* Highlight all the given matches in the given string `s` by wrapping + them between `prefix` and `postfix`. + + If some matches overlap, then their union will be wrapped rather + than the individual matches. */ +std::string hiliteMatches( + std::string_view s, + std::vector matches, + std::string_view prefix, + std::string_view postfix); + +} -- cgit v1.2.3 From f1eee873ea064e8f94369bdfe2557c18ba35ccc9 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 21 Apr 2022 13:00:24 +0200 Subject: Fix fmt test --- src/libutil/tests/fmt.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/libutil') diff --git a/src/libutil/tests/fmt.cc b/src/libutil/tests/fmt.cc index 33772162c..1ff5980d5 100644 --- a/src/libutil/tests/fmt.cc +++ b/src/libutil/tests/fmt.cc @@ -1,9 +1,7 @@ -#include "fmt.hh" +#include "hilite.hh" #include -#include - namespace nix { /* ----------- tests for fmt.hh -------------------------------------------------*/ -- cgit v1.2.3 From 3b9d31b88c95e591c28f3a7423f83c40b9788781 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 21 Apr 2022 13:00:50 +0200 Subject: Rename fmt test -> hilte --- src/libutil/tests/fmt.cc | 66 --------------------------------------------- src/libutil/tests/hilite.cc | 66 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 66 deletions(-) delete mode 100644 src/libutil/tests/fmt.cc create mode 100644 src/libutil/tests/hilite.cc (limited to 'src/libutil') diff --git a/src/libutil/tests/fmt.cc b/src/libutil/tests/fmt.cc deleted file mode 100644 index 1ff5980d5..000000000 --- a/src/libutil/tests/fmt.cc +++ /dev/null @@ -1,66 +0,0 @@ -#include "hilite.hh" - -#include - -namespace nix { -/* ----------- tests for fmt.hh -------------------------------------------------*/ - - TEST(hiliteMatches, noHighlight) { - ASSERT_STREQ(hiliteMatches("Hello, world!", std::vector(), "(", ")").c_str(), "Hello, world!"); - } - - TEST(hiliteMatches, simpleHighlight) { - std::string str = "Hello, world!"; - std::regex re = std::regex("world"); - auto matches = std::vector(std::sregex_iterator(str.begin(), str.end(), re), std::sregex_iterator()); - ASSERT_STREQ( - hiliteMatches(str, matches, "(", ")").c_str(), - "Hello, (world)!" - ); - } - - TEST(hiliteMatches, multipleMatches) { - std::string str = "Hello, world, world, world, world, world, world, Hello!"; - std::regex re = std::regex("world"); - auto matches = std::vector(std::sregex_iterator(str.begin(), str.end(), re), std::sregex_iterator()); - ASSERT_STREQ( - hiliteMatches(str, matches, "(", ")").c_str(), - "Hello, (world), (world), (world), (world), (world), (world), Hello!" - ); - } - - TEST(hiliteMatches, overlappingMatches) { - std::string str = "world, Hello, world, Hello, world, Hello, world, Hello, world!"; - std::regex re = std::regex("Hello, world"); - std::regex re2 = std::regex("world, Hello"); - auto v = std::vector(std::sregex_iterator(str.begin(), str.end(), re), std::sregex_iterator()); - for(auto it = std::sregex_iterator(str.begin(), str.end(), re2); it != std::sregex_iterator(); ++it) { - v.push_back(*it); - } - ASSERT_STREQ( - hiliteMatches(str, v, "(", ")").c_str(), - "(world, Hello, world, Hello, world, Hello, world, Hello, world)!" - ); - } - - TEST(hiliteMatches, complexOverlappingMatches) { - std::string str = "legacyPackages.x86_64-linux.git-crypt"; - std::vector regexes = { - std::regex("t-cry"), - std::regex("ux\\.git-cry"), - std::regex("git-c"), - std::regex("pt"), - }; - std::vector matches; - for(auto regex : regexes) - { - for(auto it = std::sregex_iterator(str.begin(), str.end(), regex); it != std::sregex_iterator(); ++it) { - matches.push_back(*it); - } - } - ASSERT_STREQ( - hiliteMatches(str, matches, "(", ")").c_str(), - "legacyPackages.x86_64-lin(ux.git-crypt)" - ); - } -} diff --git a/src/libutil/tests/hilite.cc b/src/libutil/tests/hilite.cc new file mode 100644 index 000000000..1ff5980d5 --- /dev/null +++ b/src/libutil/tests/hilite.cc @@ -0,0 +1,66 @@ +#include "hilite.hh" + +#include + +namespace nix { +/* ----------- tests for fmt.hh -------------------------------------------------*/ + + TEST(hiliteMatches, noHighlight) { + ASSERT_STREQ(hiliteMatches("Hello, world!", std::vector(), "(", ")").c_str(), "Hello, world!"); + } + + TEST(hiliteMatches, simpleHighlight) { + std::string str = "Hello, world!"; + std::regex re = std::regex("world"); + auto matches = std::vector(std::sregex_iterator(str.begin(), str.end(), re), std::sregex_iterator()); + ASSERT_STREQ( + hiliteMatches(str, matches, "(", ")").c_str(), + "Hello, (world)!" + ); + } + + TEST(hiliteMatches, multipleMatches) { + std::string str = "Hello, world, world, world, world, world, world, Hello!"; + std::regex re = std::regex("world"); + auto matches = std::vector(std::sregex_iterator(str.begin(), str.end(), re), std::sregex_iterator()); + ASSERT_STREQ( + hiliteMatches(str, matches, "(", ")").c_str(), + "Hello, (world), (world), (world), (world), (world), (world), Hello!" + ); + } + + TEST(hiliteMatches, overlappingMatches) { + std::string str = "world, Hello, world, Hello, world, Hello, world, Hello, world!"; + std::regex re = std::regex("Hello, world"); + std::regex re2 = std::regex("world, Hello"); + auto v = std::vector(std::sregex_iterator(str.begin(), str.end(), re), std::sregex_iterator()); + for(auto it = std::sregex_iterator(str.begin(), str.end(), re2); it != std::sregex_iterator(); ++it) { + v.push_back(*it); + } + ASSERT_STREQ( + hiliteMatches(str, v, "(", ")").c_str(), + "(world, Hello, world, Hello, world, Hello, world, Hello, world)!" + ); + } + + TEST(hiliteMatches, complexOverlappingMatches) { + std::string str = "legacyPackages.x86_64-linux.git-crypt"; + std::vector regexes = { + std::regex("t-cry"), + std::regex("ux\\.git-cry"), + std::regex("git-c"), + std::regex("pt"), + }; + std::vector matches; + for(auto regex : regexes) + { + for(auto it = std::sregex_iterator(str.begin(), str.end(), regex); it != std::sregex_iterator(); ++it) { + matches.push_back(*it); + } + } + ASSERT_STREQ( + hiliteMatches(str, matches, "(", ")").c_str(), + "legacyPackages.x86_64-lin(ux.git-crypt)" + ); + } +} -- cgit v1.2.3 From 6526d1676ba5a645f65d751e7529ccd273579017 Mon Sep 17 00:00:00 2001 From: pennae Date: Fri, 4 Mar 2022 19:31:59 +0100 Subject: replace most Pos objects/ptrs with indexes into a position table Pos objects are somewhat wasteful as they duplicate the origin file name and input type for each object. on files that produce more than one Pos when parsed this a sizeable waste of memory (one pointer per Pos). the same goes for ptr on 64 bit machines: parsing enough source to require 8 bytes to locate a position would need at least 8GB of input and 64GB of expression memory. it's not likely that we'll hit that any time soon, so we can use a uint32_t index to locate positions instead. --- src/libutil/types.hh | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'src/libutil') diff --git a/src/libutil/types.hh b/src/libutil/types.hh index 00ba567c6..22bc2b8dd 100644 --- a/src/libutil/types.hh +++ b/src/libutil/types.hh @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -102,4 +103,55 @@ public: Ptr operator->() const { return Ptr(**this); } }; +/* Provides an indexable container like vector<> with memory overhead + guarantees like list<> by allocating storage in chunks of ChunkSize + elements instead of using a contiguous memory allocation like vector<> + does. Not using a single vector that is resized reduces memory overhead + on large data sets by on average (growth factor)/2, mostly + eliminates copies within the vector during resizing, and provides stable + references to its elements. */ +template +class ChunkedVector { +private: + uint32_t size_ = 0; + std::vector> chunks; + + /* keep this out of the ::add hot path */ + [[gnu::noinline]] + auto & addChunk() + { + if (size_ >= std::numeric_limits::max() - ChunkSize) + abort(); + chunks.emplace_back(); + chunks.back().reserve(ChunkSize); + return chunks.back(); + } + +public: + ChunkedVector(uint32_t reserve) + { + chunks.reserve(reserve); + addChunk(); + } + + uint32_t size() const { return size_; } + + std::pair add(T value) + { + const auto idx = size_++; + auto & chunk = [&] () -> auto & { + if (auto & back = chunks.back(); back.size() < ChunkSize) + return back; + return addChunk(); + }(); + auto & result = chunk.emplace_back(std::move(value)); + return {result, idx}; + } + + const T & operator[](uint32_t idx) const + { + return chunks[idx / ChunkSize][idx % ChunkSize]; + } +}; + } -- cgit v1.2.3 From 00a32802328b58daa7af48ccac60f6154ef05639 Mon Sep 17 00:00:00 2001 From: pennae Date: Sat, 5 Mar 2022 17:31:50 +0100 Subject: don't use Symbol in Pos to represent a path PosTable deduplicates origin information, so using symbols for paths is no longer necessary. moving away from path Symbols also reduces the usage of symbols for things that are not keys in attribute sets, which will become important in the future when we turn symbols into indices as well. --- src/libutil/error.hh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/libutil') diff --git a/src/libutil/error.hh b/src/libutil/error.hh index 348018f57..f4706e3ed 100644 --- a/src/libutil/error.hh +++ b/src/libutil/error.hh @@ -87,11 +87,7 @@ struct ErrPos { origin = pos.origin; line = pos.line; column = pos.column; - // is file symbol null? - if (pos.file.set()) - file = pos.file; - else - file = ""; + file = pos.file; return *this; } -- cgit v1.2.3 From 8775be33931ec3b1cad97035ff3d5370a97178a1 Mon Sep 17 00:00:00 2001 From: pennae Date: Sat, 5 Mar 2022 14:40:24 +0100 Subject: store Symbols in a table as well, like positions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this slightly increases the amount of memory used for any given symbol, but this increase is more than made up for if the symbol is referenced more than once in the EvalState that holds it. on average every symbol should be referenced at least twice (once to introduce a binding, once to use it), so we expect no increase in memory on average. symbol tables are limited to 2³² entries like position tables, and similar arguments apply to why overflow is not likely: 2³² symbols would require as many string instances (at 24 bytes each) and map entries (at 24 bytes or more each, assuming that the map holds on average at most one item per bucket as the docs say). a full symbol table would require at least 192GB of memory just for symbols, which is well out of reach. (an ofborg eval of nixpks today creates less than a million symbols!) --- src/libutil/types.hh | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/libutil') diff --git a/src/libutil/types.hh b/src/libutil/types.hh index 22bc2b8dd..bd1dd8bee 100644 --- a/src/libutil/types.hh +++ b/src/libutil/types.hh @@ -152,6 +152,14 @@ public: { return chunks[idx / ChunkSize][idx % ChunkSize]; } + + template + void forEach(Fn fn) const + { + for (const auto & c : chunks) + for (const auto & e : c) + fn(e); + } }; } -- cgit v1.2.3 From 8adaa6acb5a513e010d262386271ef39c418ea7f Mon Sep 17 00:00:00 2001 From: pennae Date: Tue, 5 Apr 2022 18:37:38 +0200 Subject: remove pos it's no longer needed now that positions aren't really pointers any more. --- src/libutil/ref.hh | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'src/libutil') diff --git a/src/libutil/ref.hh b/src/libutil/ref.hh index 347b81f73..f9578afc7 100644 --- a/src/libutil/ref.hh +++ b/src/libutil/ref.hh @@ -99,47 +99,4 @@ make_ref(Args&&... args) return ref(p); } - -/* A non-nullable pointer. - This is similar to a C++ "& reference", but mutable. - This is similar to ref but backed by a regular pointer instead of a smart pointer. - */ -template -class ptr { -private: - T * p; - -public: - ptr(const ptr & r) - : p(r.p) - { } - - explicit ptr(T * p) - : p(p) - { - if (!p) - throw std::invalid_argument("null pointer cast to ptr"); - } - - T* operator ->() const - { - return &*p; - } - - T& operator *() const - { - return *p; - } - - bool operator == (const ptr & other) const - { - return p == other.p; - } - - bool operator != (const ptr & other) const - { - return p != other.p; - } -}; - } -- cgit v1.2.3 From 7ca6fbc8caf30d0f8b0dca9a294022d3e37c4082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Fri, 22 Apr 2022 10:01:02 +0200 Subject: Move ChunkedVector to its own header --- src/libutil/chunked-vector.hh | 68 +++++++++++++++++++++++++++++++++++++++++++ src/libutil/types.hh | 59 ------------------------------------- 2 files changed, 68 insertions(+), 59 deletions(-) create mode 100644 src/libutil/chunked-vector.hh (limited to 'src/libutil') diff --git a/src/libutil/chunked-vector.hh b/src/libutil/chunked-vector.hh new file mode 100644 index 000000000..f15af9cd7 --- /dev/null +++ b/src/libutil/chunked-vector.hh @@ -0,0 +1,68 @@ +#pragma once + +#include +#include +#include +#include + +namespace nix { + +/* Provides an indexable container like vector<> with memory overhead + guarantees like list<> by allocating storage in chunks of ChunkSize + elements instead of using a contiguous memory allocation like vector<> + does. Not using a single vector that is resized reduces memory overhead + on large data sets by on average (growth factor)/2, mostly + eliminates copies within the vector during resizing, and provides stable + references to its elements. */ +template +class ChunkedVector { +private: + uint32_t size_ = 0; + std::vector> chunks; + + /* keep this out of the ::add hot path */ + [[gnu::noinline]] + auto & addChunk() + { + if (size_ >= std::numeric_limits::max() - ChunkSize) + abort(); + chunks.emplace_back(); + chunks.back().reserve(ChunkSize); + return chunks.back(); + } + +public: + ChunkedVector(uint32_t reserve) + { + chunks.reserve(reserve); + addChunk(); + } + + uint32_t size() const { return size_; } + + std::pair add(T value) + { + const auto idx = size_++; + auto & chunk = [&] () -> auto & { + if (auto & back = chunks.back(); back.size() < ChunkSize) + return back; + return addChunk(); + }(); + auto & result = chunk.emplace_back(std::move(value)); + return {result, idx}; + } + + const T & operator[](uint32_t idx) const + { + return chunks[idx / ChunkSize][idx % ChunkSize]; + } + + template + void forEach(Fn fn) const + { + for (const auto & c : chunks) + for (const auto & e : c) + fn(e); + } +}; +} diff --git a/src/libutil/types.hh b/src/libutil/types.hh index bd1dd8bee..6bcbd7e1d 100644 --- a/src/libutil/types.hh +++ b/src/libutil/types.hh @@ -103,63 +103,4 @@ public: Ptr operator->() const { return Ptr(**this); } }; -/* Provides an indexable container like vector<> with memory overhead - guarantees like list<> by allocating storage in chunks of ChunkSize - elements instead of using a contiguous memory allocation like vector<> - does. Not using a single vector that is resized reduces memory overhead - on large data sets by on average (growth factor)/2, mostly - eliminates copies within the vector during resizing, and provides stable - references to its elements. */ -template -class ChunkedVector { -private: - uint32_t size_ = 0; - std::vector> chunks; - - /* keep this out of the ::add hot path */ - [[gnu::noinline]] - auto & addChunk() - { - if (size_ >= std::numeric_limits::max() - ChunkSize) - abort(); - chunks.emplace_back(); - chunks.back().reserve(ChunkSize); - return chunks.back(); - } - -public: - ChunkedVector(uint32_t reserve) - { - chunks.reserve(reserve); - addChunk(); - } - - uint32_t size() const { return size_; } - - std::pair add(T value) - { - const auto idx = size_++; - auto & chunk = [&] () -> auto & { - if (auto & back = chunks.back(); back.size() < ChunkSize) - return back; - return addChunk(); - }(); - auto & result = chunk.emplace_back(std::move(value)); - return {result, idx}; - } - - const T & operator[](uint32_t idx) const - { - return chunks[idx / ChunkSize][idx % ChunkSize]; - } - - template - void forEach(Fn fn) const - { - for (const auto & c : chunks) - for (const auto & e : c) - fn(e); - } -}; - } -- cgit v1.2.3 From 484badfa096db4c001f66eccbe00b70471f2e767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Fri, 22 Apr 2022 10:01:10 +0200 Subject: Add some tests for ChunkedVector --- src/libutil/tests/chunked-vector.cc | 54 +++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/libutil/tests/chunked-vector.cc (limited to 'src/libutil') diff --git a/src/libutil/tests/chunked-vector.cc b/src/libutil/tests/chunked-vector.cc new file mode 100644 index 000000000..868d11f6f --- /dev/null +++ b/src/libutil/tests/chunked-vector.cc @@ -0,0 +1,54 @@ +#include "chunked-vector.hh" + +#include + +namespace nix { + TEST(ChunkedVector, InitEmpty) { + auto v = ChunkedVector(100); + ASSERT_EQ(v.size(), 0); + } + + TEST(ChunkedVector, GrowsCorrectly) { + auto v = ChunkedVector(100); + for (auto i = 1; i < 20; i++) { + v.add(i); + ASSERT_EQ(v.size(), i); + } + } + + TEST(ChunkedVector, AddAndGet) { + auto v = ChunkedVector(100); + for (auto i = 1; i < 20; i++) { + auto [i2, idx] = v.add(i); + auto & i3 = v[idx]; + ASSERT_EQ(i, i2); + ASSERT_EQ(&i2, &i3); + } + } + + TEST(ChunkedVector, ForEach) { + auto v = ChunkedVector(100); + for (auto i = 1; i < 20; i++) { + v.add(i); + } + int count = 0; + v.forEach([&count](int elt) { + count++; + }); + ASSERT_EQ(count, v.size()); + } + + TEST(ChunkedVector, OverflowOK) { + // Similar to the AddAndGet, but intentionnally use a small + // initial ChunkedVector to force it to overflow + auto v = ChunkedVector(2); + for (auto i = 1; i < 20; i++) { + auto [i2, idx] = v.add(i); + auto & i3 = v[idx]; + ASSERT_EQ(i, i2); + ASSERT_EQ(&i2, &i3); + } + } + +} + -- cgit v1.2.3 From 7b889f31eac512c548fe56a73cd57d00d4fb89c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Hufschmitt?= Date: Fri, 22 Apr 2022 10:56:01 +0200 Subject: Fix the darwin build Looks like the auto-merge is indeed quite broken and merges even when the CI fails --- src/libutil/chunked-vector.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/libutil') diff --git a/src/libutil/chunked-vector.hh b/src/libutil/chunked-vector.hh index f15af9cd7..0a4f0b400 100644 --- a/src/libutil/chunked-vector.hh +++ b/src/libutil/chunked-vector.hh @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include -- cgit v1.2.3