aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAria Shrimpton <me@aria.rip>2024-03-06 16:15:23 +0000
committerAria Shrimpton <me@aria.rip>2024-03-06 16:15:23 +0000
commitbbfdd5ec7404a28ae735884db3d98f2eebc48fa1 (patch)
tree55dfab6192a80d19e51768008a8e1660062df420
parent7d1f4550b99783c9c09383f4807e5a4b32b42cb3 (diff)
most of rest of implementation chapter
-rw-r--r--Tasks.org10
-rw-r--r--thesis/listings-rust.sty75
-rw-r--r--thesis/main.tex2
-rw-r--r--thesis/parts/implementation.tex76
4 files changed, 152 insertions, 11 deletions
diff --git a/Tasks.org b/Tasks.org
index 77ad8cc..b5d83cc 100644
--- a/Tasks.org
+++ b/Tasks.org
@@ -242,18 +242,16 @@ Ideas:
**** DONE Generics and stuff
-*** TODO Selection / Codegen
+*** DONE Selection / Codegen
-**** TODO Selection Algorithm incl Adaptive
+**** DONE Selection Algorithm incl Adaptive
-**** TODO Implementation w/ const generics
+**** DONE Implementation w/ const generics
-**** TODO Generated code (opaque types)
+**** DONE Generated code (opaque types)
*** TODO Misc Concerns
-**** TODO Justify Rust as language
-
**** TODO Explain cargo's role in rust projects & how it is integrated
**** TODO Caching and stuff
diff --git a/thesis/listings-rust.sty b/thesis/listings-rust.sty
new file mode 100644
index 0000000..1a23aa1
--- /dev/null
+++ b/thesis/listings-rust.sty
@@ -0,0 +1,75 @@
+\NeedsTeXFormat{LaTeX2e}[1994/06/01]
+\ProvidesPackage{listings-rust}[2018/01/23 Custom Package]
+
+\RequirePackage{color}
+\RequirePackage{listings}
+
+\lstdefinelanguage{Rust}{%
+ sensitive%
+, morecomment=[l]{//}%
+, morecomment=[s]{/*}{*/}%
+, moredelim=[s][{\itshape\color[rgb]{0,0,0.75}}]{\#[}{]}%
+, morestring=[b]{"}%
+, alsodigit={}%
+, alsoother={}%
+, alsoletter={!}%
+%
+%
+% [1] reserve keywords
+% [2] traits
+% [3] primitive types
+% [4] type and value constructors
+% [5] identifier
+%
+, morekeywords={break, continue, else, for, if, in, loop, match, return, while} % control flow keywords
+, morekeywords={as, const, let, move, mut, ref, static} % in the context of variables
+, morekeywords={dyn, enum, fn, impl, Self, self, struct, trait, type, union, use, where} % in the context of declarations
+, morekeywords={crate, extern, mod, pub, super} % in the context of modularisation
+, morekeywords={unsafe} % markers
+, morekeywords={abstract, alignof, become, box, do, final, macro, offsetof, override, priv, proc, pure, sizeof, typeof, unsized, virtual, yield} % reserved identifiers
+%
+% grep 'pub trait [A-Za-z][A-Za-z0-9]*' -r . | sed 's/^.*pub trait \([A-Za-z][A-Za-z0-9]*\).*/\1/g' | sort -u | tr '\n' ',' | sed 's/^\(.*\),$/{\1}\n/g' | sed 's/,/, /g'
+, morekeywords=[2]{Add, AddAssign, Any, AsciiExt, AsInner, AsInnerMut, AsMut, AsRawFd, AsRawHandle, AsRawSocket, AsRef, Binary, BitAnd, BitAndAssign, Bitor, BitOr, BitOrAssign, BitXor, BitXorAssign, Borrow, BorrowMut, Boxed, BoxPlace, BufRead, BuildHasher, CastInto, CharExt, Clone, CoerceUnsized, CommandExt, Copy, Debug, DecodableFloat, Default, Deref, DerefMut, DirBuilderExt, DirEntryExt, Display, Div, DivAssign, DoubleEndedIterator, DoubleEndedSearcher, Drop, EnvKey, Eq, Error, ExactSizeIterator, ExitStatusExt, Extend, FileExt, FileTypeExt, Float, Fn, FnBox, FnMut, FnOnce, Freeze, From, FromInner, FromIterator, FromRawFd, FromRawHandle, FromRawSocket, FromStr, FullOps, FusedIterator, Generator, Hash, Hasher, Index, IndexMut, InPlace, Int, Into, IntoCow, IntoInner, IntoIterator, IntoRawFd, IntoRawHandle, IntoRawSocket, IsMinusOne, IsZero, Iterator, JoinHandleExt, LargeInt, LowerExp, LowerHex, MetadataExt, Mul, MulAssign, Neg, Not, Octal, OpenOptionsExt, Ord, OsStrExt, OsStringExt, Packet, PartialEq, PartialOrd, Pattern, PermissionsExt, Place, Placer, Pointer, Product, Put, RangeArgument, RawFloat, Read, Rem, RemAssign, Seek, Shl, ShlAssign, Shr, ShrAssign, Sized, SliceConcatExt, SliceExt, SliceIndex, Stats, Step, StrExt, Sub, SubAssign, Sum, Sync, TDynBenchFn, Terminal, Termination, ToOwned, ToSocketAddrs, ToString, Try, TryFrom, TryInto, UnicodeStr, Unsize, UpperExp, UpperHex, WideInt, Write}
+, morekeywords=[2]{Send} % additional traits
+%
+, morekeywords=[3]{bool, char, f32, f64, i8, i16, i32, i64, isize, str, u8, u16, u32, u64, unit, usize, i128, u128} % primitive types
+%
+, morekeywords=[4]{Err, false, None, Ok, Some, true} % prelude value constructors
+% grep 'pub \(type\|struct\|enum\) [A-Za-z][A-Za-z0-9]*' -r . | sed 's/^.*pub \(type\|struct\|enum\) \([A-Za-z][A-Za-z0-9]*\).*/\2/g' | sort -u | tr '\n' ',' | sed 's/^\(.*\),$/{\1}\n/g' | sed 's/,/, /g'
+, morekeywords=[3]{AccessError, Adddf3, AddI128, AddoI128, AddoU128, ADDRESS, ADDRESS64, addrinfo, ADDRINFOA, AddrParseError, Addsf3, AddU128, advice, aiocb, Alignment, AllocErr, AnonPipe, Answer, Arc, Args, ArgsInnerDebug, ArgsOs, Argument, Arguments, ArgumentV1, Ashldi3, Ashlti3, Ashrdi3, Ashrti3, AssertParamIsClone, AssertParamIsCopy, AssertParamIsEq, AssertUnwindSafe, AtomicBool, AtomicPtr, Attr, auxtype, auxv, BackPlace, BacktraceContext, Barrier, BarrierWaitResult, Bencher, BenchMode, BenchSamples, BinaryHeap, BinaryHeapPlace, blkcnt, blkcnt64, blksize, BOOL, boolean, BOOLEAN, BoolTrie, BorrowError, BorrowMutError, Bound, Box, bpf, BTreeMap, BTreeSet, Bucket, BucketState, Buf, BufReader, BufWriter, Builder, BuildHasherDefault, BY, BYTE, Bytes, CannotReallocInPlace, cc, Cell, Chain, CHAR, CharIndices, CharPredicateSearcher, Chars, CharSearcher, CharsError, CharSliceSearcher, CharTryFromError, Child, ChildPipes, ChildStderr, ChildStdin, ChildStdio, ChildStdout, Chunks, ChunksMut, ciovec, clock, clockid, Cloned, cmsgcred, cmsghdr, CodePoint, Color, ColorConfig, Command, CommandEnv, Component, Components, CONDITION, condvar, Condvar, CONSOLE, CONTEXT, Count, Cow, cpu, CRITICAL, CStr, CString, CStringArray, Cursor, Cycle, CycleIter, daddr, DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple, Decimal, Decoded, DecodeUtf16, DecodeUtf16Error, DecodeUtf8, DefaultEnvKey, DefaultHasher, dev, device, Difference, Digit32, DIR, DirBuilder, dircookie, dirent, dirent64, DirEntry, Discriminant, DISPATCHER, Display, Divdf3, Divdi3, Divmoddi4, Divmodsi4, Divsf3, Divsi3, Divti3, dl, Dl, Dlmalloc, Dns, DnsAnswer, DnsQuery, dqblk, Drain, DrainFilter, Dtor, Duration, DwarfReader, DWORD, DWORDLONG, DynamicLibrary, Edge, EHAction, EHContext, Elf32, Elf64, Empty, EmptyBucket, EncodeUtf16, EncodeWide, Entry, EntryPlace, Enumerate, Env, epoll, errno, Error, ErrorKind, EscapeDebug, EscapeDefault, EscapeUnicode, event, Event, eventrwflags, eventtype, ExactChunks, ExactChunksMut, EXCEPTION, Excess, ExchangeHeapSingleton, exit, exitcode, ExitStatus, Failure, fd, fdflags, fdsflags, fdstat, ff, fflags, File, FILE, FileAttr, filedelta, FileDesc, FilePermissions, filesize, filestat, FILETIME, filetype, FileType, Filter, FilterMap, Fixdfdi, Fixdfsi, Fixdfti, Fixsfdi, Fixsfsi, Fixsfti, Fixunsdfdi, Fixunsdfsi, Fixunsdfti, Fixunssfdi, Fixunssfsi, Fixunssfti, Flag, FlatMap, Floatdidf, FLOATING, Floatsidf, Floatsisf, Floattidf, Floattisf, Floatundidf, Floatunsidf, Floatunsisf, Floatuntidf, Floatuntisf, flock, ForceResult, FormatSpec, Formatted, Formatter, Fp, FpCategory, fpos, fpos64, fpreg, fpregset, FPUControlWord, Frame, FromBytesWithNulError, FromUtf16Error, FromUtf8Error, FrontPlace, fsblkcnt, fsfilcnt, fsflags, fsid, fstore, fsword, FullBucket, FullBucketMut, FullDecoded, Fuse, GapThenFull, GeneratorState, gid, glob, glob64, GlobalDlmalloc, greg, group, GROUP, Guard, GUID, Handle, HANDLE, Handler, HashMap, HashSet, Heap, HINSTANCE, HMODULE, hostent, HRESULT, id, idtype, if, ifaddrs, IMAGEHLP, Immut, in, in6, Incoming, Infallible, Initializer, ino, ino64, inode, input, InsertResult, Inspect, Instant, int16, int32, int64, int8, integer, IntermediateBox, Internal, Intersection, intmax, IntoInnerError, IntoIter, IntoStringError, intptr, InvalidSequence, iovec, ip, IpAddr, ipc, Ipv4Addr, ipv6, Ipv6Addr, Ipv6MulticastScope, Iter, IterMut, itimerspec, itimerval, jail, JoinHandle, JoinPathsError, KDHELP64, kevent, kevent64, key, Key, Keys, KV, l4, LARGE, lastlog, launchpad, Layout, Lazy, lconv, Leaf, LeafOrInternal, Lines, LinesAny, LineWriter, linger, linkcount, LinkedList, load, locale, LocalKey, LocalKeyState, Location, lock, LockResult, loff, LONG, lookup, lookupflags, LookupHost, LPBOOL, LPBY, LPBYTE, LPCSTR, LPCVOID, LPCWSTR, LPDWORD, LPFILETIME, LPHANDLE, LPOVERLAPPED, LPPROCESS, LPPROGRESS, LPSECURITY, LPSTARTUPINFO, LPSTR, LPVOID, LPWCH, LPWIN32, LPWSADATA, LPWSAPROTOCOL, LPWSTR, Lshrdi3, Lshrti3, lwpid, M128A, mach, major, Map, mcontext, Metadata, Metric, MetricMap, mflags, minor, mmsghdr, Moddi3, mode, Modsi3, Modti3, MonitorMsg, MOUNT, mprot, mq, mqd, msflags, msghdr, msginfo, msglen, msgqnum, msqid, Muldf3, Mulodi4, Mulosi4, Muloti4, Mulsf3, Multi3, Mut, Mutex, MutexGuard, MyCollection, n16, NamePadding, NativeLibBoilerplate, nfds, nl, nlink, NodeRef, NoneError, NonNull, NonZero, nthreads, NulError, OccupiedEntry, off, off64, oflags, Once, OnceState, OpenOptions, Option, Options, OptRes, Ordering, OsStr, OsString, Output, OVERLAPPED, Owned, Packet, PanicInfo, Param, ParseBoolError, ParseCharError, ParseError, ParseFloatError, ParseIntError, ParseResult, Part, passwd, Path, PathBuf, PCONDITION, PCONSOLE, Peekable, PeekMut, Permissions, PhantomData, pid, Pipes, PlaceBack, PlaceFront, PLARGE, PoisonError, pollfd, PopResult, port, Position, Powidf2, Powisf2, Prefix, PrefixComponent, PrintFormat, proc, Process, PROCESS, processentry, protoent, PSRWLOCK, pthread, ptr, ptrdiff, PVECTORED, Queue, radvisory, RandomState, Range, RangeFrom, RangeFull, RangeInclusive, RangeMut, RangeTo, RangeToInclusive, RawBucket, RawFd, RawHandle, RawPthread, RawSocket, RawTable, RawVec, Rc, ReadDir, Receiver, recv, RecvError, RecvTimeoutError, ReentrantMutex, ReentrantMutexGuard, Ref, RefCell, RefMut, REPARSE, Repeat, Result, Rev, Reverse, riflags, rights, rlim, rlim64, rlimit, rlimit64, roflags, Root, RSplit, RSplitMut, RSplitN, RSplitNMut, RUNTIME, rusage, RwLock, RWLock, RwLockReadGuard, RwLockWriteGuard, sa, SafeHash, Scan, sched, scope, sdflags, SearchResult, SearchStep, SECURITY, SeekFrom, segment, Select, SelectionResult, sem, sembuf, send, Sender, SendError, servent, sf, Shared, shmatt, shmid, ShortReader, ShouldPanic, Shutdown, siflags, sigaction, SigAction, sigevent, sighandler, siginfo, Sign, signal, signalfd, SignalToken, sigset, sigval, Sink, SipHasher, SipHasher13, SipHasher24, size, SIZE, Skip, SkipWhile, Slice, SmallBoolTrie, sockaddr, SOCKADDR, sockcred, Socket, SOCKET, SocketAddr, SocketAddrV4, SocketAddrV6, socklen, speed, Splice, Split, SplitMut, SplitN, SplitNMut, SplitPaths, SplitWhitespace, spwd, SRWLOCK, ssize, stack, STACKFRAME64, StartResult, STARTUPINFO, stat, Stat, stat64, statfs, statfs64, StaticKey, statvfs, StatVfs, statvfs64, Stderr, StderrLock, StderrTerminal, Stdin, StdinLock, Stdio, StdioPipes, Stdout, StdoutLock, StdoutTerminal, StepBy, String, StripPrefixError, StrSearcher, subclockflags, Subdf3, SubI128, SuboI128, SuboU128, subrwflags, subscription, Subsf3, SubU128, Summary, suseconds, SYMBOL, SYMBOLIC, SymmetricDifference, SyncSender, sysinfo, System, SystemTime, SystemTimeError, Take, TakeWhile, tcb, tcflag, TcpListener, TcpStream, TempDir, TermInfo, TerminfoTerminal, termios, termios2, TestDesc, TestDescAndFn, TestEvent, TestFn, TestName, TestOpts, TestResult, Thread, threadattr, threadentry, ThreadId, tid, time, time64, timespec, TimeSpec, timestamp, timeval, timeval32, timezone, tm, tms, ToLowercase, ToUppercase, TraitObject, TryFromIntError, TryFromSliceError, TryIter, TryLockError, TryLockResult, TryRecvError, TrySendError, TypeId, U64x2, ucontext, ucred, Udivdi3, Udivmoddi4, Udivmodsi4, Udivmodti4, Udivsi3, Udivti3, UdpSocket, uid, UINT, uint16, uint32, uint64, uint8, uintmax, uintptr, ulflags, ULONG, ULONGLONG, Umoddi3, Umodsi3, Umodti3, UnicodeVersion, Union, Unique, UnixDatagram, UnixListener, UnixStream, Unpacked, UnsafeCell, UNWIND, UpgradeResult, useconds, user, userdata, USHORT, Utf16Encoder, Utf8Error, Utf8Lossy, Utf8LossyChunk, Utf8LossyChunksIter, utimbuf, utmp, utmpx, utsname, uuid, VacantEntry, Values, ValuesMut, VarError, Variables, Vars, VarsOs, Vec, VecDeque, vm, Void, WaitTimeoutResult, WaitToken, wchar, WCHAR, Weak, whence, WIN32, WinConsole, Windows, WindowsEnvKey, winsize, WORD, Wrapping, wrlen, WSADATA, WSAPROTOCOL, WSAPROTOCOLCHAIN, Wtf8, Wtf8Buf, Wtf8CodePoints, xsw, xucred, Zip, zx}
+%
+, morekeywords=[5]{assert!, assert_eq!, assert_ne!, cfg!, column!, compile_error!, concat!, concat_idents!, debug_assert!, debug_assert_eq!, debug_assert_ne!, env!, eprint!, eprintln!, file!, format!, format_args!, include!, include_bytes!, include_str!, line!, module_path!, option_env!, panic!, print!, println!, select!, stringify!, thread_local!, try!, unimplemented!, unreachable!, vec!, write!, writeln!} % prelude macros
+}%
+
+\lstdefinestyle{colouredRust}%
+{ basicstyle=\ttfamily%
+, identifierstyle=%
+, commentstyle=\color[gray]{0.4}%
+, stringstyle=\color[rgb]{0, 0, 0.5}%
+, keywordstyle=\bfseries% reserved keywords
+, keywordstyle=[2]\color[rgb]{0.75, 0, 0}% traits
+, keywordstyle=[3]\color[rgb]{0, 0.5, 0}% primitive types
+, keywordstyle=[4]\color[rgb]{0, 0.5, 0}% type and value constructors
+, keywordstyle=[5]\color[rgb]{0, 0, 0.75}% macros
+, columns=spaceflexible%
+, keepspaces=true%
+, showspaces=false%
+, showtabs=false%
+, showstringspaces=true%
+}%
+
+\lstdefinestyle{boxed}{
+ style=colouredRust%
+, numbers=left%
+, firstnumber=auto%
+, numberblanklines=true%
+, frame=trbL%
+, numberstyle=\tiny%
+, frame=leftline%
+, numbersep=7pt%
+, framesep=5pt%
+, framerule=10pt%
+, xleftmargin=15pt%
+, backgroundcolor=\color[gray]{0.97}%
+, rulecolor=\color[gray]{0.90}%
+}
diff --git a/thesis/main.tex b/thesis/main.tex
index 9b65351..f12e6eb 100644
--- a/thesis/main.tex
+++ b/thesis/main.tex
@@ -15,7 +15,7 @@
\newcommand{\todo}[1]{\colorbox{yellow}{TODO: #1} \par}
%% Code blocks
-\usepackage{listings}
+\usepackage{listings, listings-rust}
\usepackage{courier}
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
diff --git a/thesis/parts/implementation.tex b/thesis/parts/implementation.tex
index 3576447..b1e156a 100644
--- a/thesis/parts/implementation.tex
+++ b/thesis/parts/implementation.tex
@@ -71,14 +71,82 @@ Although it has some amount of overhead, it's not important as we aren't measuri
\section{Selection and Codegen}
%% Selection Algorithm incl Adaptiv
+Selection is done per container site.
+For each candidate implementation, we calculate its cost on each partition in the profiler output, then sum these values to get the total estimated cost for each implementation.
+This provides us with estimates for each singular candidate.
-%% Generated code (opaque types)
+In order to try and suggest an adaptive container, we use the following algorithm:
-%% Implementation w/ const generics
+\begin{enumerate}
+\item Calculate the cost for each candidate and for each partition
+\item For each partition, find the best candidate and store it in the array \code{best}. Note that we don't sum across all partitions this time.
+\item Find the lowest index \code{i} where \code{best[i] != best[0]}
+\item Check that \code{i} partitions the list properly: For all \code{j < i}, \code{best[j] == best[0]} and for all \code{j>=i}, \code{best[j] == best[i]}.
+\item Let \code{before} be the name of the candidate in \code{best[0]}, \code{after} be the name of the candidate in \code{best[i]}, and \code{threshold} be the maximum n value of partition \code{i}.
+\item Calculate the cost of switching as:
+ $$
+ C_{\textrm{before,clear}}(\textrm{threshold}) + \textrm{threshold} * C_{\textrm{after,insert}}(\textrm{threshold})
+ $$
+\item Calculate the cost of not switching: The sum of the difference in cost between \code{before} and \code{after} for all partitions with index \code{> i}.
+\item If the cost of not switching is less than the cost of switching, we can't make a suggestion.
+\item Otherwise, suggest an adaptive container which switches from \code{before} to \code{after} when $n$ gets above \code{threshold}. Its estimated cost is the cost for \code{before} up to partition \code{i}, plus the cost of \code{after} for all other partitions.
+\end{enumerate}
-\section{Misc Concerns}
+Selection is implemented in \code{src/crates/candelabra/src/profiler/info.rs} and \code{src/crates/candelabra/src/select.rs}.
-\todo{Justify Rust as language}
+%% Generated code (opaque types)
+As mentioned above, the original Primrose code would generate code as in Listing \ref{lst:primrose_codegen}.
+In order to ensure that users specify all of the traits they need, this code only exposes methods on the implementation that are part of the trait bounds given.
+However, it does this by using a \code{dyn} object, Rust's mechanism for dynamic dispatch.
+
+Although this approach works, it adds an extra layer of indirection to every call: The caller must use the dyn object's vtable to find the method it needs to call.
+This also prevents the compiler from optimising across this boundary.
+
+In order to avoid this, we make use of Rust's support for existential types: Types that aren't directly named, but are inferred by the compiler.
+Existential types only guarantee their users the given trait bounds, therefore they accomplish the same goal of forcing users to specify all of their trait bounds upfront.
+
+Figure \ref{lst:new_codegen} shows our equivalent generated code.
+The type alias \code{Stack<S>} only allows users to use the \code{Container<S>}, \code{Stack<S>}, and \code{Default} traits.
+Our unused 'dummy' function \code{_StackCon} has the return type \code{Stack<S>}.
+Rust's type inference step sees that its actual return type is \code{Vec<S>}, and therefore sets the concrete type of \code{Stack<S>} to \code{Vec<S>} at compile time.
+
+Unfortunately, this feature is not yet in stable Rust, meaning we have to opt in to it using an unstable compiler flag (\code{feature(type_alias_impl_trait)}).
+At time of writing, the main obstacle to stabilisation appears to be design decisions that only apply to more complicated use-cases, therefore we are confident that this code will remain valid and won't encounter any compiler bugs.
+
+\begin{figure}[h]
+ \begin{lstlisting}[caption=Code generated by original Primrose project,label={lst:primrose_codegen},language=Rust]
+pub trait StackTrait<T> : Container<T> + Stack<T> {}
+impl<T: 'static + Ord + std::hash::Hash> StackTrait<T> for <Stack<T> as ContainerConstructor>::Impl {}
+
+
+pub struct Stack<T> {
+ elem_t: core::marker::PhantomData<T>,
+}
+
+impl<T: 'static + Ord + std::hash::Hash> ContainerConstructor for Stack<T> {
+ type Impl = Vec<T>;
+ type Bound = dyn StackTrait<T>;
+ fn new() -> Box<Self::Bound> {
+ Box::new(Self::Impl::new())
+ }
+}
+\end{lstlisting}
+\end{figure}
+
+\begin{figure}[h]
+ \begin{lstlisting}[caption=Code generated with new method,label={lst:new_codegen},language=Rust]
+pub type StackCon<S: PartialEq + Ord + std::hash::Hash> = impl Container<S> + Stack<S> + Default;
+
+#[allow(non_snake_case)]
+fn _StackCon<S: PartialEq + Ord + std::hash::Hash>() -> StackCon<S> {
+ std::vec::Vec::<S>::default()
+}
+\end{lstlisting}
+\end{figure}
+
+\section{Miscellaneous concerns}
+
+In this section, we highlight some other design decisions we made, and justify them.
\todo{Explain cargo's role in rust projects \& how it is integrated}