diff options
-rw-r--r-- | meson.build | 10 | ||||
-rw-r--r-- | src/libutil/generator.hh | 289 | ||||
-rw-r--r-- | src/libutil/meson.build | 1 | ||||
-rw-r--r-- | tests/unit/libutil/generator.cc | 214 | ||||
-rw-r--r-- | tests/unit/meson.build | 1 |
5 files changed, 515 insertions, 0 deletions
diff --git a/meson.build b/meson.build index 7d8a3a315..a208151ac 100644 --- a/meson.build +++ b/meson.build @@ -142,6 +142,16 @@ else cpp_pch = [] endif +# gcc 12 is known to miscompile some coroutine-based code quite horribly, +# causing (among other things) copies of move-only objects and the double +# frees one would expect when the objects are unique_ptrs. these problems +# often show up as memory corruption when nesting generators (since we do +# treat generators like owned memory) and will cause inexplicable crashs. +assert( + cxx.get_id() != 'gcc' or cxx.version().version_compare('>=13'), + 'GCC 12 and earlier are known to miscompile lix coroutines, use GCC 13 or clang.' +) + # Translate some historical and Mesony CPU names to Lixy CPU names. # FIXME(Qyriad): the 32-bit x86 code is not tested right now, because cross compilation for Lix diff --git a/src/libutil/generator.hh b/src/libutil/generator.hh new file mode 100644 index 000000000..fda257002 --- /dev/null +++ b/src/libutil/generator.hh @@ -0,0 +1,289 @@ +#pragma once +///@file + +#include "types.hh" + +#include <coroutine> +#include <exception> +#include <optional> +#include <utility> +#include <variant> + +namespace nix { + +template<typename T, typename Transform> +struct Generator; + +namespace _generator { + +template<typename T> +struct promise_state; +template<typename T> +struct GeneratorBase; + +struct finished {}; + +template<typename T> +struct link +{ + std::coroutine_handle<> handle{}; + promise_state<T> * state{}; +}; + +struct failure +{ + std::exception_ptr e; +}; + +template<typename T> +struct promise_state +{ + // result of the most recent coroutine resumption: a value, + // a nested coroutine to drain, an error, or our completion + std::variant<T, link<T>, failure, finished> value{}; + // coroutine to resume when this one has finished. set when + // one generator yields another, such that the entire chain + // of parents always linearly points to the root generator. + link<T> parent{}; +}; + +template<typename T, typename Transform> +struct promise : promise_state<T> +{ + using transform_t = std::conditional_t<std::is_void_v<Transform>, std::identity, Transform>; + + transform_t convert; + std::optional<GeneratorBase<T>> inner; + + // called by the compiler to convert the internal promise object + // to the user-declared (function return) type of the coroutine. + Generator<T, Transform> get_return_object() + { + auto h = std::coroutine_handle<promise>::from_promise(*this); + return Generator<T, Transform>(GeneratorBase<T>(h, h.promise())); + } + std::suspend_always initial_suspend() + { + return {}; + } + std::suspend_always final_suspend() noexcept + { + return {}; + } + void unhandled_exception() + { + this->value = failure{std::current_exception()}; + } + + // `co_yield` handler for "simple" values, i.e. those that + // are transformed directly to a T by the given transform. + template<typename From> + requires requires(transform_t t, From && f) { + { + t(std::forward<From>(f)) + } -> std::convertible_to<T>; + } + std::suspend_always yield_value(From && from) + { + this->value.template emplace<0>(convert(std::forward<From>(from))); + return {}; + } + + // `co_yield` handler for "complex" values, i.e. those that + // are transformed into another generator. we'll drain that + // new generator completely before resuming the current one + template<typename From> + requires requires(transform_t t, From && f) { + static_cast<Generator<T, void>>(t(std::forward<From>(f))); + } + std::suspend_always yield_value(From && from) + { + inner = static_cast<Generator<T, void>>(convert(std::forward<From>(from))).impl; + this->value = inner->active; + return {}; + } + + // handler for `co_return`, including the implicit `co_return` + // at the end of a coroutine that does not have one explicitly + void return_void() + { + this->value = finished{}; + } +}; + +template<typename T> +struct GeneratorBase +{ + template<typename, typename> + friend struct Generator; + template<typename, typename> + friend struct promise; + + // NOTE coroutine handles are LiteralType, own a memory resource (that may + // itself own unique resources), and are "typically TriviallyCopyable". we + // need to take special care to wrap this into a less footgunny interface. + GeneratorBase(GeneratorBase && other) + { + swap(other); + } + + GeneratorBase & operator=(GeneratorBase && other) + { + GeneratorBase(std::move(other)).swap(*this); + return *this; + } + + ~GeneratorBase() + { + if (h) { + h.destroy(); + } + } + + std::optional<T> next() + { + // resume the currently active coroutine once. it can return either a + // value, an exception, another generator to drain, or it can finish. + // since c++ coroutines cannot directly return anything from resume() + // we must communicate all results via `active->state.value` instead. + while (active.handle) { + active.handle.resume(); + auto & p = *active.state; + // process the result. only one case sets this to a non-`nullopt` + // value, all others leave it at `nullopt` to request more loops. + auto result = std::visit( + overloaded{ + // when the current coroutine handle is done we'll try to + // resume its parent (if the current handle was retrieved + // from a `co_yield`ed generator) or finish the generator + // entirely because the root active.parent has no handle. + [&](finished) -> std::optional<T> { + active = p.parent; + return {}; + }, + // when the coroutine yields a generator we push the full + // inner stack onto our own stack and resume the top item + [&](link<T> & inner) -> std::optional<T> { + auto base = inner.state; + while (base->parent.handle) { + base = base->parent.state; + } + base->parent = active; + active = inner; + return {}; + }, + // values are simply returned to the caller, as received. + [&](T & value) -> std::optional<T> { return std::move(value); }, + // exceptions must be rethrown. resuming again after this + // is not allowed because the top-most coroutine would be + // finished and we'd thus step back to its parent, but by + // doing so we might invalidate invariants of the parent. + // allowing the parent to catch exceptions of a child for + // `co_yield` exceptions specifically would introduce far + // too many problems to be worth the doing (since parents + // can neither know nor revert any yields of their child) + [&](failure & f) -> std::optional<T> { + active = {}; + std::rethrow_exception(f.e); + }, + }, + p.value + ); + if (result) { + return result; + } + } + + return std::nullopt; + } + +protected: + std::coroutine_handle<> h{}; + link<T> active{}; + + GeneratorBase(std::coroutine_handle<> h, promise_state<T> & state) + : h(h) + , active(h, &state) + { + } + + void swap(GeneratorBase & other) + { + std::swap(h, other.h); + std::swap(active, other.active); + } +}; + +} // _generator + +/// Coroutine-based iterator modeled loosely on Rust [`std::iter::Iterator`][iter] +/// interface. Like Rust's `Iterator` and unlike common C++ iterators, a Generator +/// returns `std::optional<T>` values from its next() function, but unlike both it +/// can also transform items produced within using a Transform function object the +/// Generator holds before returning them via next(). To allow generator nesting a +/// Transform may also return another Generator instance for any yielded value, in +/// this case the new Generator will temporarily take priority over the previously +/// running one and have its values returned until it is exhausted, then return to +/// the previous Generator. This mechanism may nest Generator to arbitrary depths. +/// +/// \tparam T item type +/// \tparam Transform transform function object type, or `void` for no transform +/// +/// [iter]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html +template<typename T, typename Transform = void> +struct Generator +{ + template<typename, typename> + friend struct _generator::promise; + // erasing the Transform type requires all generator types with a non-erased + // Transform to access the private constructor of the erased type, but sadly + // we cannot resonably restrict this to "T, non-void" without much more code + // or compiler warnings on some versions of clang, e.g. the one darwin uses. + template<typename, typename> + friend struct Generator; + + using promise_type = _generator::promise<T, Transform>; + + Generator(const Generator &) = delete; + Generator & operator=(const Generator &) = delete; + Generator(Generator &&) = default; + Generator & operator=(Generator &&) = default; + + /// If the coroutine held by the Generator has not finished, runs it until it + /// yields a value, throws any exception, or returns. If the coroutine yields + /// a value this value is passed to a persistent instance of `Transform` that + /// is held by the Generator, and the result of this call is returned. If the + /// coroutine throws an exception, or the Transform throws an exception while + /// processing an item, that exception is rethrown and the Generator will not + /// return any more non-`std::nullopt` values from next(). Once the contained + /// coroutine has completed or an exception has been thrown the Generator can + /// no longer return any valid values, only `std::nullopt`. Exceptions thrown + /// are thrown only once, further invocations of next() return `std::nullopt`. + /// + /// \returns `std::nullopt` if the coroutine has completed, or a value + std::optional<T> next() + { + return impl.next(); + } + + /// Type-erases the `Transform`. + /// + /// \return a new Generator with the `Transform` type-erased + Generator<T, void> decay() && + { + return Generator<T, void>(std::move(impl)); + } + + /// \copydoc decay() + operator Generator<T, void>() && + { + return std::move(*this).decay(); + } + +private: + _generator::GeneratorBase<T> impl; + + explicit Generator(_generator::GeneratorBase<T> b) : impl(std::move(b)) {} +}; + +} diff --git a/src/libutil/meson.build b/src/libutil/meson.build index f6d14a11b..211196c60 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -72,6 +72,7 @@ libutil_headers = files( 'file-system.hh', 'finally.hh', 'fmt.hh', + 'generator.hh', 'git.hh', 'hash.hh', 'hilite.hh', diff --git a/tests/unit/libutil/generator.cc b/tests/unit/libutil/generator.cc new file mode 100644 index 000000000..800e6ca8a --- /dev/null +++ b/tests/unit/libutil/generator.cc @@ -0,0 +1,214 @@ +#include "generator.hh" + +#include <concepts> +#include <cstdint> +#include <gtest/gtest.h> + +namespace nix { + +TEST(Generator, yields) +{ + auto g = []() -> Generator<int> { + co_yield 1; + co_yield 2; + }(); + + ASSERT_EQ(g.next(), 1); + ASSERT_EQ(g.next(), 2); + ASSERT_FALSE(g.next().has_value()); +} + +TEST(Generator, returns) +{ + { + auto g = []() -> Generator<int> { co_return; }(); + + ASSERT_FALSE(g.next().has_value()); + } + { + auto g = []() -> Generator<int> { + co_yield 1; + co_yield []() -> Generator<int> { co_return; }(); + co_yield 2; + co_yield []() -> Generator<int> { co_yield 10; }(); + co_yield 3; + (void) "dummy statement to force some more execution"; + }(); + + ASSERT_EQ(g.next(), 1); + ASSERT_EQ(g.next(), 2); + ASSERT_EQ(g.next(), 10); + ASSERT_EQ(g.next(), 3); + ASSERT_FALSE(g.next().has_value()); + } +} + +TEST(Generator, nests) +{ + auto g = []() -> Generator<int> { + co_yield 1; + co_yield []() -> Generator<int> { + co_yield 9; + co_yield []() -> Generator<int> { + co_yield 99; + co_yield 100; + }(); + }(); + + auto g2 = []() -> Generator<int> { + co_yield []() -> Generator<int> { + co_yield 2000; + co_yield 2001; + }(); + co_yield 1001; + }(); + + co_yield g2.next().value(); + co_yield std::move(g2); + co_yield 2; + }(); + + ASSERT_EQ(g.next(), 1); + ASSERT_EQ(g.next(), 9); + ASSERT_EQ(g.next(), 99); + ASSERT_EQ(g.next(), 100); + ASSERT_EQ(g.next(), 2000); + ASSERT_EQ(g.next(), 2001); + ASSERT_EQ(g.next(), 1001); + ASSERT_EQ(g.next(), 2); + ASSERT_FALSE(g.next().has_value()); +} + +TEST(Generator, nestsExceptions) +{ + auto g = []() -> Generator<int> { + co_yield 1; + co_yield []() -> Generator<int> { + co_yield 9; + throw 1; + co_yield 10; + }(); + co_yield 2; + }(); + + ASSERT_EQ(g.next(), 1); + ASSERT_EQ(g.next(), 9); + ASSERT_THROW(g.next(), int); +} + +TEST(Generator, exception) +{ + { + auto g = []() -> Generator<int> { + co_yield 1; + throw 1; + }(); + + ASSERT_EQ(g.next(), 1); + ASSERT_THROW(g.next(), int); + ASSERT_FALSE(g.next().has_value()); + } + { + auto g = []() -> Generator<int> { + throw 1; + co_return; + }(); + + ASSERT_THROW(g.next(), int); + ASSERT_FALSE(g.next().has_value()); + } +} + +namespace { +struct Transform +{ + int state = 0; + + std::pair<uint32_t, int> operator()(std::integral auto x) + { + return {x, state++}; + } + + Generator<std::pair<uint32_t, int>, Transform> operator()(const char *) + { + co_yield 9; + co_yield 19; + } + + Generator<std::pair<uint32_t, int>, Transform> operator()(Generator<int> && inner) + { + return [](auto g) mutable -> Generator<std::pair<uint32_t, int>, Transform> { + while (auto i = g.next()) { + co_yield *i; + } + }(std::move(inner)); + } +}; +} + +TEST(Generator, transform) +{ + auto g = []() -> Generator<std::pair<uint32_t, int>, Transform> { + co_yield int32_t(-1); + co_yield ""; + co_yield []() -> Generator<int> { co_yield 7; }(); + co_yield 20; + }(); + + ASSERT_EQ(g.next(), (std::pair<unsigned, int>{4294967295, 0})); + ASSERT_EQ(g.next(), (std::pair<unsigned, int>{9, 0})); + ASSERT_EQ(g.next(), (std::pair<unsigned, int>{19, 1})); + ASSERT_EQ(g.next(), (std::pair<unsigned, int>{7, 0})); + ASSERT_EQ(g.next(), (std::pair<unsigned, int>{20, 1})); + ASSERT_FALSE(g.next().has_value()); +} + +namespace { +struct ThrowTransform +{ + int operator()(int x) + { + return x; + } + + int operator()(bool) + { + throw 2; + } + + Generator<int, void> operator()(Generator<int> && inner) + { + throw false; + } +}; +} + +TEST(Generator, transformThrows) +{ + { + auto g = []() -> Generator<int, ThrowTransform> { + co_yield 1; + co_yield false; + co_yield 2; + }(); + + ASSERT_EQ(g.next(), 1); + ASSERT_THROW(g.next(), int); + ASSERT_FALSE(g.next().has_value()); + } + { + auto g = []() -> Generator<int, ThrowTransform> { + co_yield 1; + co_yield []() -> Generator<int> { + co_yield 2; + }(); + co_yield 3; + }(); + + ASSERT_EQ(g.next(), 1); + ASSERT_THROW(g.next(), bool); + ASSERT_FALSE(g.next().has_value()); + } +} + +} diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 2b5471526..17d089f18 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -40,6 +40,7 @@ libutil_tests_sources = files( 'libutil/compression.cc', 'libutil/config.cc', 'libutil/escape-string.cc', + 'libutil/generator.cc', 'libutil/git.cc', 'libutil/hash.cc', 'libutil/hilite.cc', |