aboutsummaryrefslogtreecommitdiff
path: root/src/libexpr
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2022-01-17 19:49:52 +0100
committerGitHub <noreply@github.com>2022-01-17 19:49:52 +0100
commitfc2443a67caa139fdfb0fa0fccf3d777d736ffe9 (patch)
tree8266100830cdde723f42e4332658a2ae464f7564 /src/libexpr
parent008ddef4b08e9bee530a5a4c597c88b344089021 (diff)
parent72f42093e711db1ab43c920688bb5e59df33935d (diff)
Merge pull request #5812 from pennae/small-perf-improvements
improve parser performance a bit
Diffstat (limited to 'src/libexpr')
-rw-r--r--src/libexpr/eval.hh6
-rw-r--r--src/libexpr/lexer.l41
-rw-r--r--src/libexpr/nixexpr.cc2
-rw-r--r--src/libexpr/parser.y48
-rw-r--r--src/libexpr/primops.cc9
-rw-r--r--src/libexpr/symbol-table.hh21
6 files changed, 77 insertions, 50 deletions
diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh
index 89814785e..850c5bae6 100644
--- a/src/libexpr/eval.hh
+++ b/src/libexpr/eval.hh
@@ -179,8 +179,8 @@ public:
Expr * parseExprFromFile(const Path & path, StaticEnv & staticEnv);
/* Parse a Nix expression from the specified string. */
- Expr * parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv);
- Expr * parseExprFromString(std::string_view s, const Path & basePath);
+ Expr * parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv);
+ Expr * parseExprFromString(std::string s, const Path & basePath);
Expr * parseStdin();
@@ -308,7 +308,7 @@ private:
friend struct ExprAttrs;
friend struct ExprLet;
- Expr * parse(const char * text, FileOrigin origin, const Path & path,
+ Expr * parse(char * text, size_t length, FileOrigin origin, const Path & path,
const Path & basePath, StaticEnv & staticEnv);
public:
diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l
index c18877e29..a0e7a1877 100644
--- a/src/libexpr/lexer.l
+++ b/src/libexpr/lexer.l
@@ -64,29 +64,32 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
}
-// FIXME: optimize
-static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length)
+// we make use of the fact that the parser receives a private copy of the input
+// string and can munge around in it.
+static Expr * unescapeStr(SymbolTable & symbols, char * s, size_t length)
{
- string t;
- t.reserve(length);
+ char * result = s;
+ char * t = s;
char c;
+ // the input string is terminated with *two* NULs, so we can safely take
+ // *one* character after the one being checked against.
while ((c = *s++)) {
if (c == '\\') {
- assert(*s);
c = *s++;
- if (c == 'n') t += '\n';
- else if (c == 'r') t += '\r';
- else if (c == 't') t += '\t';
- else t += c;
+ if (c == 'n') *t = '\n';
+ else if (c == 'r') *t = '\r';
+ else if (c == 't') *t = '\t';
+ else *t = c;
}
else if (c == '\r') {
/* Normalise CR and CR/LF into LF. */
- t += '\n';
+ *t = '\n';
if (*s == '\n') s++; /* cr/lf */
}
- else t += c;
+ else *t = c;
+ t++;
}
- return new ExprString(symbols.create(t));
+ return new ExprString(symbols.create({result, size_t(t - result)}));
}
@@ -139,7 +142,7 @@ or { return OR_KW; }
\/\/ { return UPDATE; }
\+\+ { return CONCAT; }
-{ID} { yylval->id = strdup(yytext); return ID; }
+{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; }
{INT} { errno = 0;
try {
yylval->n = boost::lexical_cast<int64_t>(yytext);
@@ -221,14 +224,14 @@ or { return OR_KW; }
<PATH_START>{PATH_SEG} {
POP_STATE();
PUSH_STATE(INPATH_SLASH);
- yylval->path = strdup(yytext);
+ yylval->path = {yytext, (size_t) yyleng};
return PATH;
}
<PATH_START>{HPATH_START} {
POP_STATE();
PUSH_STATE(INPATH_SLASH);
- yylval->path = strdup(yytext);
+ yylval->path = {yytext, (size_t) yyleng};
return HPATH;
}
@@ -237,7 +240,7 @@ or { return OR_KW; }
PUSH_STATE(INPATH_SLASH);
else
PUSH_STATE(INPATH);
- yylval->path = strdup(yytext);
+ yylval->path = {yytext, (size_t) yyleng};
return PATH;
}
{HPATH} {
@@ -245,7 +248,7 @@ or { return OR_KW; }
PUSH_STATE(INPATH_SLASH);
else
PUSH_STATE(INPATH);
- yylval->path = strdup(yytext);
+ yylval->path = {yytext, (size_t) yyleng};
return HPATH;
}
@@ -280,8 +283,8 @@ or { return OR_KW; }
throw ParseError("path has a trailing slash");
}
-{SPATH} { yylval->path = strdup(yytext); return SPATH; }
-{URI} { yylval->uri = strdup(yytext); return URI; }
+{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; }
+{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; }
[ \t\r\n]+ /* eat up whitespace */
\#[^\r\n]* /* single-line comments */
diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc
index a75357871..640c44c01 100644
--- a/src/libexpr/nixexpr.cc
+++ b/src/libexpr/nixexpr.cc
@@ -473,7 +473,7 @@ string ExprLambda::showNamePos() const
size_t SymbolTable::totalSize() const
{
size_t n = 0;
- for (auto & i : symbols)
+ for (auto & i : store)
n += i.size();
return n;
}
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
index f8aaea582..a3e713937 100644
--- a/src/libexpr/parser.y
+++ b/src/libexpr/parser.y
@@ -273,9 +273,16 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err
nix::Formal * formal;
nix::NixInt n;
nix::NixFloat nf;
- const char * id; // !!! -> Symbol
- char * path;
- char * uri;
+ // using C a struct allows us to avoid having to define the special
+ // members that using string_view here would implicitly delete.
+ struct StringToken {
+ const char * p;
+ size_t l;
+ operator std::string_view() const { return {p, l}; }
+ };
+ StringToken id; // !!! -> Symbol
+ StringToken path;
+ StringToken uri;
std::vector<nix::AttrName> * attrNames;
std::vector<std::pair<nix::Pos, nix::Expr *> > * string_parts;
}
@@ -397,7 +404,7 @@ expr_select
expr_simple
: ID {
- if (strcmp($1, "__curPos") == 0)
+ if (strncmp($1.p, "__curPos", $1.l) == 0)
$$ = new ExprPos(CUR_POS);
else
$$ = new ExprVar(CUR_POS, data->symbols.create($1));
@@ -414,7 +421,7 @@ expr_simple
$$ = new ExprConcatStrings(CUR_POS, false, $2);
}
| SPATH {
- string path($1 + 1, strlen($1) - 2);
+ string path($1.p + 1, $1.l - 2);
$$ = new ExprCall(CUR_POS,
new ExprVar(data->symbols.create("__findFile")),
{new ExprVar(data->symbols.create("__nixPath")),
@@ -460,14 +467,14 @@ string_parts_interpolated
path_start
: PATH {
- Path path(absPath($1, data->basePath));
+ Path path(absPath({$1.p, $1.l}, data->basePath));
/* add back in the trailing '/' to the first segment */
- if ($1[strlen($1)-1] == '/' && strlen($1) > 1)
+ if ($1.p[$1.l-1] == '/' && $1.l > 1)
path += "/";
$$ = new ExprPath(path);
}
| HPATH {
- Path path(getHome() + string($1 + 1));
+ Path path(getHome() + string($1.p + 1, $1.l - 1));
$$ = new ExprPath(path);
}
;
@@ -543,7 +550,7 @@ attrpath
attr
: ID { $$ = $1; }
- | OR_KW { $$ = "or"; }
+ | OR_KW { $$ = {"or", 2}; }
;
string_attr
@@ -589,7 +596,7 @@ formal
namespace nix {
-Expr * EvalState::parse(const char * text, FileOrigin origin,
+Expr * EvalState::parse(char * text, size_t length, FileOrigin origin,
const Path & path, const Path & basePath, StaticEnv & staticEnv)
{
yyscan_t scanner;
@@ -609,7 +616,7 @@ Expr * EvalState::parse(const char * text, FileOrigin origin,
data.basePath = basePath;
yylex_init(&scanner);
- yy_scan_string(text, scanner);
+ yy_scan_buffer(text, length, scanner);
int res = yyparse(scanner, &data);
yylex_destroy(scanner);
@@ -655,26 +662,33 @@ Expr * EvalState::parseExprFromFile(const Path & path)
Expr * EvalState::parseExprFromFile(const Path & path, StaticEnv & staticEnv)
{
- return parse(readFile(path).c_str(), foFile, path, dirOf(path), staticEnv);
+ auto buffer = readFile(path);
+ // readFile should have left some extra space for terminators
+ buffer.append("\0\0", 2);
+ return parse(buffer.data(), buffer.size(), foFile, path, dirOf(path), staticEnv);
}
-Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath, StaticEnv & staticEnv)
+Expr * EvalState::parseExprFromString(std::string s, const Path & basePath, StaticEnv & staticEnv)
{
- return parse(s.data(), foString, "", basePath, staticEnv);
+ s.append("\0\0", 2);
+ return parse(s.data(), s.size(), foString, "", basePath, staticEnv);
}
-Expr * EvalState::parseExprFromString(std::string_view s, const Path & basePath)
+Expr * EvalState::parseExprFromString(std::string s, const Path & basePath)
{
- return parseExprFromString(s, basePath, staticBaseEnv);
+ return parseExprFromString(std::move(s), basePath, staticBaseEnv);
}
Expr * EvalState::parseStdin()
{
//Activity act(*logger, lvlTalkative, format("parsing standard input"));
- return parse(drainFD(0).data(), foStdin, "", absPath("."), staticBaseEnv);
+ auto buffer = drainFD(0);
+ // drainFD should have left some extra space for terminators
+ buffer.append("\0\0", 2);
+ return parse(buffer.data(), buffer.size(), foStdin, "", absPath("."), staticBaseEnv);
}
diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc
index 1d2a7d5d2..b819918ad 100644
--- a/src/libexpr/primops.cc
+++ b/src/libexpr/primops.cc
@@ -378,7 +378,7 @@ void prim_exec(EvalState & state, const Pos & pos, Value * * args, Value & v)
auto output = runProgram(program, true, commandArgs);
Expr * parsed;
try {
- parsed = state.parseExprFromString(output, pos.file);
+ parsed = state.parseExprFromString(std::move(output), pos.file);
} catch (Error & e) {
e.addTrace(pos, "While parsing the output from '%1%'", program);
throw;
@@ -3915,9 +3915,12 @@ void EvalState::createBaseEnv()
/* Note: we have to initialize the 'derivation' constant *after*
building baseEnv/staticBaseEnv because it uses 'builtins'. */
- eval(parse(
+ char code[] =
#include "primops/derivation.nix.gen.hh"
- , foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation);
+ // the parser needs two NUL bytes as terminators; one of them
+ // is implied by being a C string.
+ "\0";
+ eval(parse(code, sizeof(code), foFile, sDerivationNix, "/", staticBaseEnv), *vDerivation);
}
diff --git a/src/libexpr/symbol-table.hh b/src/libexpr/symbol-table.hh
index 4eb6dac81..a090ebae5 100644
--- a/src/libexpr/symbol-table.hh
+++ b/src/libexpr/symbol-table.hh
@@ -1,7 +1,8 @@
#pragma once
+#include <list>
#include <map>
-#include <unordered_set>
+#include <unordered_map>
#include "types.hh"
@@ -70,15 +71,21 @@ public:
class SymbolTable
{
private:
- typedef std::unordered_set<string> Symbols;
- Symbols symbols;
+ std::unordered_map<std::string_view, Symbol> symbols;
+ std::list<string> store;
public:
Symbol create(std::string_view s)
{
- // FIXME: avoid allocation if 's' already exists in the symbol table.
- std::pair<Symbols::iterator, bool> res = symbols.emplace(std::string(s));
- return Symbol(&*res.first);
+ // Most symbols are looked up more than once, so we trade off insertion performance
+ // for lookup performance.
+ // TODO: could probably be done more efficiently with transparent Hash and Equals
+ // on the original implementation using unordered_set
+ auto it = symbols.find(s);
+ if (it != symbols.end()) return it->second;
+
+ const string & rawSym = store.emplace_back(s);
+ return symbols.emplace(rawSym, Symbol(&rawSym)).first->second;
}
size_t size() const
@@ -91,7 +98,7 @@ public:
template<typename T>
void dump(T callback)
{
- for (auto & s : symbols)
+ for (auto & s : store)
callback(s);
}
};