aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2022-12-12 19:57:32 +0100
committerEelco Dolstra <edolstra@gmail.com>2022-12-12 19:57:32 +0100
commit900b85408435f6f7118e5d46f606d377452cdb12 (patch)
tree5ba319fa0c4519bf27c5e7840d6def6ea34ad7bb /src
parente408af82ab71d870f854efe417abf1400567e1f1 (diff)
Add CanonPath wrapper to represent canonicalized paths
Diffstat (limited to 'src')
-rw-r--r--src/libutil/canon-path.cc103
-rw-r--r--src/libutil/canon-path.hh173
-rw-r--r--src/libutil/tests/canon-path.cc155
3 files changed, 431 insertions, 0 deletions
diff --git a/src/libutil/canon-path.cc b/src/libutil/canon-path.cc
new file mode 100644
index 000000000..b132b4262
--- /dev/null
+++ b/src/libutil/canon-path.cc
@@ -0,0 +1,103 @@
+#include "canon-path.hh"
+#include "util.hh"
+
+namespace nix {
+
+CanonPath CanonPath::root = CanonPath("/");
+
+CanonPath::CanonPath(std::string_view raw)
+ : path(absPath((Path) raw, "/"))
+{ }
+
+CanonPath::CanonPath(std::string_view raw, const CanonPath & root)
+ : path(absPath((Path) raw, root.abs()))
+{ }
+
+std::optional<CanonPath> CanonPath::parent() const
+{
+ if (isRoot()) return std::nullopt;
+ return CanonPath(unchecked_t(), path.substr(0, std::max((size_t) 1, path.rfind('/'))));
+}
+
+void CanonPath::pop()
+{
+ assert(!isRoot());
+ path.resize(std::max((size_t) 1, path.rfind('/')));
+}
+
+bool CanonPath::isWithin(const CanonPath & parent) const
+{
+ return !(
+ path.size() < parent.path.size()
+ || path.substr(0, parent.path.size()) != parent.path
+ || (parent.path.size() > 1 && path.size() > parent.path.size()
+ && path[parent.path.size()] != '/'));
+}
+
+CanonPath CanonPath::removePrefix(const CanonPath & prefix) const
+{
+ assert(isWithin(prefix));
+ if (prefix.isRoot()) return *this;
+ if (path.size() == prefix.path.size()) return root;
+ return CanonPath(unchecked_t(), path.substr(prefix.path.size()));
+}
+
+void CanonPath::extend(const CanonPath & x)
+{
+ if (x.isRoot()) return;
+ if (isRoot())
+ path += x.rel();
+ else
+ path += x.abs();
+}
+
+CanonPath CanonPath::operator + (const CanonPath & x) const
+{
+ auto res = *this;
+ res.extend(x);
+ return res;
+}
+
+void CanonPath::push(std::string_view c)
+{
+ assert(c.find('/') == c.npos);
+ assert(c != "." && c != "..");
+ if (!isRoot()) path += '/';
+ path += c;
+}
+
+CanonPath CanonPath::operator + (std::string_view c) const
+{
+ auto res = *this;
+ res.push(c);
+ return res;
+}
+
+bool CanonPath::isAllowed(const std::set<CanonPath> & allowed) const
+{
+ /* Check if `this` is an exact match or the parent of an
+ allowed path. */
+ auto lb = allowed.lower_bound(*this);
+ if (lb != allowed.end()) {
+ if (lb->isWithin(*this))
+ return true;
+ }
+
+ /* Check if a parent of `this` is allowed. */
+ auto path = *this;
+ while (!path.isRoot()) {
+ path.pop();
+ if (allowed.count(path))
+ return true;
+ }
+
+ return false;
+}
+
+std::ostream & operator << (std::ostream & stream, const CanonPath & path)
+{
+ stream << path.abs();
+ return stream;
+}
+
+}
diff --git a/src/libutil/canon-path.hh b/src/libutil/canon-path.hh
new file mode 100644
index 000000000..c5e7f0596
--- /dev/null
+++ b/src/libutil/canon-path.hh
@@ -0,0 +1,173 @@
+#pragma once
+
+#include <string>
+#include <optional>
+#include <cassert>
+#include <iostream>
+#include <set>
+
+namespace nix {
+
+/* A canonical representation of a path. It ensures the following:
+
+ - It always starts with a slash.
+
+ - It never ends with a slash, except if the path is "/".
+
+ - A slash is never followed by a slash (i.e. no empty components).
+
+ - There are no components equal to '.' or '..'.
+
+ Note that the path does not need to correspond to an actually
+ existing path, and there is no guarantee that symlinks are
+ resolved.
+*/
+class CanonPath
+{
+ std::string path;
+
+public:
+
+ /* Construct a canon path from a non-canonical path. Any '.', '..'
+ or empty components are removed. */
+ CanonPath(std::string_view raw);
+
+ explicit CanonPath(const char * raw)
+ : CanonPath(std::string_view(raw))
+ { }
+
+ struct unchecked_t { };
+
+ CanonPath(unchecked_t _, std::string path)
+ : path(std::move(path))
+ { }
+
+ static CanonPath root;
+
+ /* If `raw` starts with a slash, return
+ `CanonPath(raw)`. Otherwise return a `CanonPath` representing
+ `root + "/" + raw`. */
+ CanonPath(std::string_view raw, const CanonPath & root);
+
+ bool isRoot() const
+ { return path.size() <= 1; }
+
+ explicit operator std::string_view() const
+ { return path; }
+
+ const std::string & abs() const
+ { return path; }
+
+ /* Like abs(), but return an empty string if this path is
+ '/'. Thus the returned string never ends in a slash. */
+ const std::string & absOrEmpty() const
+ {
+ const static std::string epsilon;
+ return isRoot() ? epsilon : path;
+ }
+
+ const char * c_str() const
+ { return path.c_str(); }
+
+ std::string_view rel() const
+ { return ((std::string_view) path).substr(1); }
+
+ struct Iterator
+ {
+ std::string_view remaining;
+ size_t slash;
+
+ Iterator(std::string_view remaining)
+ : remaining(remaining)
+ , slash(remaining.find('/'))
+ { }
+
+ bool operator != (const Iterator & x) const
+ { return remaining.data() != x.remaining.data(); }
+
+ const std::string_view operator * () const
+ { return remaining.substr(0, slash); }
+
+ void operator ++ ()
+ {
+ if (slash == remaining.npos)
+ remaining = remaining.substr(remaining.size());
+ else {
+ remaining = remaining.substr(slash + 1);
+ slash = remaining.find('/');
+ }
+ }
+ };
+
+ Iterator begin() const { return Iterator(rel()); }
+ Iterator end() const { return Iterator(rel().substr(path.size() - 1)); }
+
+ std::optional<CanonPath> parent() const;
+
+ /* Remove the last component. Panics if this path is the root. */
+ void pop();
+
+ std::optional<std::string_view> dirOf() const
+ {
+ if (isRoot()) return std::nullopt;
+ return path.substr(0, path.rfind('/'));
+ }
+
+ std::optional<std::string_view> baseName() const
+ {
+ if (isRoot()) return std::nullopt;
+ return ((std::string_view) path).substr(path.rfind('/') + 1);
+ }
+
+ bool operator == (const CanonPath & x) const
+ { return path == x.path; }
+
+ bool operator != (const CanonPath & x) const
+ { return path != x.path; }
+
+ /* Compare paths lexicographically except that path separators
+ are sorted before any other character. That is, in the sorted order
+ a directory is always followed directly by its children. For
+ instance, 'foo' < 'foo/bar' < 'foo!'. */
+ bool operator < (const CanonPath & x) const
+ {
+ auto i = path.begin();
+ auto j = x.path.begin();
+ for ( ; i != path.end() && j != x.path.end(); ++i, ++j) {
+ auto c_i = *i;
+ if (c_i == '/') c_i = 0;
+ auto c_j = *j;
+ if (c_j == '/') c_j = 0;
+ if (c_i < c_j) return true;
+ if (c_i > c_j) return false;
+ }
+ return i == path.end() && j != x.path.end();
+ }
+
+ /* Return true if `this` is equal to `parent` or a child of
+ `parent`. */
+ bool isWithin(const CanonPath & parent) const;
+
+ CanonPath removePrefix(const CanonPath & prefix) const;
+
+ /* Append another path to this one. */
+ void extend(const CanonPath & x);
+
+ /* Concatenate two paths. */
+ CanonPath operator + (const CanonPath & x) const;
+
+ /* Add a path component to this one. It must not contain any slashes. */
+ void push(std::string_view c);
+
+ CanonPath operator + (std::string_view c) const;
+
+ /* Check whether access to this path is allowed, which is the case
+ if 1) `this` is within any of the `allowed` paths; or 2) any of
+ the `allowed` paths are within `this`. (The latter condition
+ ensures access to the parents of allowed paths.) */
+ bool isAllowed(const std::set<CanonPath> & allowed) const;
+};
+
+std::ostream & operator << (std::ostream & stream, const CanonPath & path);
+
+}
diff --git a/src/libutil/tests/canon-path.cc b/src/libutil/tests/canon-path.cc
new file mode 100644
index 000000000..c1c5adadf
--- /dev/null
+++ b/src/libutil/tests/canon-path.cc
@@ -0,0 +1,155 @@
+#include "canon-path.hh"
+
+#include <gtest/gtest.h>
+
+namespace nix {
+
+ TEST(CanonPath, basic) {
+ {
+ CanonPath p("/");
+ ASSERT_EQ(p.abs(), "/");
+ ASSERT_EQ(p.rel(), "");
+ ASSERT_EQ(p.baseName(), std::nullopt);
+ ASSERT_EQ(p.dirOf(), std::nullopt);
+ ASSERT_FALSE(p.parent());
+ }
+
+ {
+ CanonPath p("/foo//");
+ ASSERT_EQ(p.abs(), "/foo");
+ ASSERT_EQ(p.rel(), "foo");
+ ASSERT_EQ(*p.baseName(), "foo");
+ ASSERT_EQ(*p.dirOf(), ""); // FIXME: do we want this?
+ ASSERT_EQ(p.parent()->abs(), "/");
+ }
+
+ {
+ CanonPath p("foo/bar");
+ ASSERT_EQ(p.abs(), "/foo/bar");
+ ASSERT_EQ(p.rel(), "foo/bar");
+ ASSERT_EQ(*p.baseName(), "bar");
+ ASSERT_EQ(*p.dirOf(), "/foo");
+ ASSERT_EQ(p.parent()->abs(), "/foo");
+ }
+
+ {
+ CanonPath p("foo//bar/");
+ ASSERT_EQ(p.abs(), "/foo/bar");
+ ASSERT_EQ(p.rel(), "foo/bar");
+ ASSERT_EQ(*p.baseName(), "bar");
+ ASSERT_EQ(*p.dirOf(), "/foo");
+ }
+ }
+
+ TEST(CanonPath, pop) {
+ CanonPath p("foo/bar/x");
+ ASSERT_EQ(p.abs(), "/foo/bar/x");
+ p.pop();
+ ASSERT_EQ(p.abs(), "/foo/bar");
+ p.pop();
+ ASSERT_EQ(p.abs(), "/foo");
+ p.pop();
+ ASSERT_EQ(p.abs(), "/");
+ }
+
+ TEST(CanonPath, removePrefix) {
+ CanonPath p1("foo/bar");
+ CanonPath p2("foo/bar/a/b/c");
+ ASSERT_EQ(p2.removePrefix(p1).abs(), "/a/b/c");
+ ASSERT_EQ(p1.removePrefix(p1).abs(), "/");
+ ASSERT_EQ(p1.removePrefix(CanonPath("/")).abs(), "/foo/bar");
+ }
+
+ TEST(CanonPath, iter) {
+ {
+ CanonPath p("a//foo/bar//");
+ std::vector<std::string_view> ss;
+ for (auto & c : p) ss.push_back(c);
+ ASSERT_EQ(ss, std::vector<std::string_view>({"a", "foo", "bar"}));
+ }
+
+ {
+ CanonPath p("/");
+ std::vector<std::string_view> ss;
+ for (auto & c : p) ss.push_back(c);
+ ASSERT_EQ(ss, std::vector<std::string_view>());
+ }
+ }
+
+ TEST(CanonPath, concat) {
+ {
+ CanonPath p1("a//foo/bar//");
+ CanonPath p2("xyzzy/bla");
+ ASSERT_EQ((p1 + p2).abs(), "/a/foo/bar/xyzzy/bla");
+ }
+
+ {
+ CanonPath p1("/");
+ CanonPath p2("/a/b");
+ ASSERT_EQ((p1 + p2).abs(), "/a/b");
+ }
+
+ {
+ CanonPath p1("/a/b");
+ CanonPath p2("/");
+ ASSERT_EQ((p1 + p2).abs(), "/a/b");
+ }
+
+ {
+ CanonPath p("/foo/bar");
+ ASSERT_EQ((p + "x").abs(), "/foo/bar/x");
+ }
+
+ {
+ CanonPath p("/");
+ ASSERT_EQ((p + "foo" + "bar").abs(), "/foo/bar");
+ }
+ }
+
+ TEST(CanonPath, within) {
+ {
+ ASSERT_TRUE(CanonPath("foo").isWithin(CanonPath("foo")));
+ ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("bar")));
+ ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("fo")));
+ ASSERT_TRUE(CanonPath("foo/bar").isWithin(CanonPath("foo")));
+ ASSERT_FALSE(CanonPath("foo").isWithin(CanonPath("foo/bar")));
+ ASSERT_TRUE(CanonPath("/foo/bar/default.nix").isWithin(CanonPath("/")));
+ ASSERT_TRUE(CanonPath("/").isWithin(CanonPath("/")));
+ }
+ }
+
+ TEST(CanonPath, sort) {
+ ASSERT_FALSE(CanonPath("foo") < CanonPath("foo"));
+ ASSERT_TRUE (CanonPath("foo") < CanonPath("foo/bar"));
+ ASSERT_TRUE (CanonPath("foo/bar") < CanonPath("foo!"));
+ ASSERT_FALSE(CanonPath("foo!") < CanonPath("foo"));
+ ASSERT_TRUE (CanonPath("foo") < CanonPath("foo!"));
+ }
+
+ TEST(CanonPath, allowed) {
+ {
+ std::set<CanonPath> allowed {
+ CanonPath("foo/bar"),
+ CanonPath("foo!"),
+ CanonPath("xyzzy"),
+ CanonPath("a/b/c"),
+ };
+
+ ASSERT_TRUE (CanonPath("foo/bar").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("foo/bar/bla").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("foo").isAllowed(allowed));
+ ASSERT_FALSE(CanonPath("bar").isAllowed(allowed));
+ ASSERT_FALSE(CanonPath("bar/a").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("a").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("a/b").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("a/b/c").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("a/b/c/d").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("a/b/c/d/e").isAllowed(allowed));
+ ASSERT_FALSE(CanonPath("a/b/a").isAllowed(allowed));
+ ASSERT_FALSE(CanonPath("a/b/d").isAllowed(allowed));
+ ASSERT_FALSE(CanonPath("aaa").isAllowed(allowed));
+ ASSERT_FALSE(CanonPath("zzz").isAllowed(allowed));
+ ASSERT_TRUE (CanonPath("/").isAllowed(allowed));
+ }
+ }
+}