diff options
-rw-r--r-- | thesis/biblio.bib | 78 | ||||
-rw-r--r-- | thesis/parts/background.tex | 68 |
2 files changed, 90 insertions, 56 deletions
diff --git a/thesis/biblio.bib b/thesis/biblio.bib index 7d8fa6b..0c0c008 100644 --- a/thesis/biblio.bib +++ b/thesis/biblio.bib @@ -1,22 +1,20 @@ -@article{jung_brainy_2011, - title = {Brainy: effective selection of data structures}, - volume = {46}, - issn = {0362-1340, 1558-1160}, - url = {https://dl.acm.org/doi/10.1145/1993316.1993509}, - doi = {10.1145/1993316.1993509}, - shorttitle = {Brainy}, - abstract = {Data structure selection is one of the most critical aspects of developing effective applications. By analyzing data structures' behavior and their interaction with the rest of the application on the underlying architecture, tools can make suggestions for alternative data structures better suited for the program input on which the application runs. Consequently, developers can optimize their data structure usage to make the application conscious of an underlying architecture and a particular program input. - This paper presents the design and evaluation of Brainy, a new program analysis tool that automatically selects the best data structure for a given program and its input on a specific microarchitecture. The data structure's interface functions are instrumented to dynamically monitor how the data structure interacts with the application for a given input. The instrumentation records traces of various runtime characteristics including underlying architecture-specific events. These generated traces are analyzed and fed into an offline model, constructed using machine learning, to select the best data structure. That is, Brainy exploits runtime feedback of data structures to model the situation an application runs on, and selects the best data structure for a given application/input/architecture combination based on the constructed model. The empirical evaluation shows that this technique is highly accurate across several real-world applications with various program input sets on two different state-of-the-art microarchitectures. Consequently, Brainy achieved an average performance improvement of 27\% and 33\% on both microarchitectures, respectively.}, - pages = {86--97}, - number = {6}, - journaltitle = {{ACM} {SIGPLAN} Notices}, - shortjournal = {{SIGPLAN} Not.}, - author = {Jung, Changhee and Rus, Silvius and Railing, Brian P. and Clark, Nathan and Pande, Santosh}, +@inproceedings{shacham_chameleon_2009, + location = {Dublin Ireland}, + title = {Chameleon: adaptive selection of collections}, + isbn = {978-1-60558-392-1}, + url = {https://dl.acm.org/doi/10.1145/1542476.1542522}, + doi = {10.1145/1542476.1542522}, + shorttitle = {Chameleon}, + eventtitle = {{PLDI} '09: {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, + pages = {408--418}, + booktitle = {Proceedings of the 30th {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, + publisher = {{ACM}}, + author = {Shacham, Ohad and Vechev, Martin and Yahav, Eran}, urldate = {2023-09-21}, - date = {2011-06-04}, + date = {2009-06-15}, langid = {english}, - file = {Jung et al. - 2011 - Brainy effective selection of data structures.pdf:/home/aria/Zotero/storage/DPJPURT8/Jung et al. - 2011 - Brainy effective selection of data structures.pdf:application/pdf}, + file = {Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:/home/aria/Zotero/storage/75CS9CWY/Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:application/pdf}, } @inproceedings{costa_collectionswitch_2018, @@ -37,22 +35,24 @@ file = {Costa and Andrzejak - 2018 - CollectionSwitch a framework for efficient and dy:/home/aria/Zotero/storage/7B8QMVRU/Costa and Andrzejak - 2018 - CollectionSwitch a framework for efficient and dy:application/pdf}, } -@inproceedings{shacham_chameleon_2009, - location = {Dublin Ireland}, - title = {Chameleon: adaptive selection of collections}, - isbn = {978-1-60558-392-1}, - url = {https://dl.acm.org/doi/10.1145/1542476.1542522}, - doi = {10.1145/1542476.1542522}, - shorttitle = {Chameleon}, - eventtitle = {{PLDI} '09: {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, - pages = {408--418}, - booktitle = {Proceedings of the 30th {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, - publisher = {{ACM}}, - author = {Shacham, Ohad and Vechev, Martin and Yahav, Eran}, +@article{jung_brainy_2011, + title = {Brainy: effective selection of data structures}, + volume = {46}, + issn = {0362-1340, 1558-1160}, + url = {https://dl.acm.org/doi/10.1145/1993316.1993509}, + doi = {10.1145/1993316.1993509}, + shorttitle = {Brainy}, + abstract = {Data structure selection is one of the most critical aspects of developing effective applications. By analyzing data structures' behavior and their interaction with the rest of the application on the underlying architecture, tools can make suggestions for alternative data structures better suited for the program input on which the application runs. Consequently, developers can optimize their data structure usage to make the application conscious of an underlying architecture and a particular program input. + This paper presents the design and evaluation of Brainy, a new program analysis tool that automatically selects the best data structure for a given program and its input on a specific microarchitecture. The data structure's interface functions are instrumented to dynamically monitor how the data structure interacts with the application for a given input. The instrumentation records traces of various runtime characteristics including underlying architecture-specific events. These generated traces are analyzed and fed into an offline model, constructed using machine learning, to select the best data structure. That is, Brainy exploits runtime feedback of data structures to model the situation an application runs on, and selects the best data structure for a given application/input/architecture combination based on the constructed model. The empirical evaluation shows that this technique is highly accurate across several real-world applications with various program input sets on two different state-of-the-art microarchitectures. Consequently, Brainy achieved an average performance improvement of 27\% and 33\% on both microarchitectures, respectively.}, + pages = {86--97}, + number = {6}, + journaltitle = {{ACM} {SIGPLAN} Notices}, + shortjournal = {{SIGPLAN} Not.}, + author = {Jung, Changhee and Rus, Silvius and Railing, Brian P. and Clark, Nathan and Pande, Santosh}, urldate = {2023-09-21}, - date = {2009-06-15}, + date = {2011-06-04}, langid = {english}, - file = {Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:/home/aria/Zotero/storage/75CS9CWY/Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:application/pdf}, + file = {Jung et al. - 2011 - Brainy effective selection of data structures.pdf:/home/aria/Zotero/storage/DPJPURT8/Jung et al. - 2011 - Brainy effective selection of data structures.pdf:application/pdf}, } @article{qin_primrose_2023, @@ -75,3 +75,21 @@ keywords = {Computer Science - Data Structures and Algorithms, Computer Science - Programming Languages}, file = {arXiv Fulltext PDF:/home/aria/Zotero/storage/IL59NESA/Qin et al. - 2023 - Primrose Selecting Container Data Types by Their .pdf:application/pdf;arXiv.org Snapshot:/home/aria/Zotero/storage/DCIW4XE4/2205.html:text/html}, } + +@article{chung_towards_2004, + title = {Towards Automatic Performance Tuning}, + author = {Chung, I-Hsin}, + date = {2004-11}, + file = {Chung - 2004 - Towards Automatic Performance Tuning.pdf:/home/aria/Zotero/storage/WQBJMSN8/Chung - 2004 - Towards Automatic Performance Tuning.pdf:application/pdf}, +} + +@inproceedings{l_liu_perflint_2009, + title = {Perflint: A Context Sensitive Performance Advisor for C++ Programs}, + doi = {10.1109/CGO.2009.36}, + eventtitle = {2009 International Symposium on Code Generation and Optimization}, + pages = {265--274}, + booktitle = {2009 International Symposium on Code Generation and Optimization}, + author = {{L. Liu} and {S. Rus}}, + date = {2009-03-22}, + note = {Journal Abbreviation: 2009 International Symposium on Code Generation and Optimization}, +} diff --git a/thesis/parts/background.tex b/thesis/parts/background.tex index 633efde..7b995b0 100644 --- a/thesis/parts/background.tex +++ b/thesis/parts/background.tex @@ -6,10 +6,10 @@ Finally, we examine the gaps in the existing literature, and explain how this pa The vast majority of programs will use make extensive use of collection data types - types intended to hold many different instances of other data types. This can refer to anything from fixed-size arrays, to growable linked lists, to associative key-value mappings or dictionaries. -In some cases, these are built-in parts of the language: In Go, a list of ints has type \code{[]int} and a dictionary from string to string has type \code{map[string]string}. +In some cases, these are built-in parts of the language: In Go, a list of ints has type \code{[]int} and a map from string to string has type \code{map[string]string}. -In other languages, these are instead part of some standard library. -In Rust, you might write \code{Vec<isize>} and \code{HashMap<String, String>} for the same types. +In other languages, these are instead part of some standard library, or in some cases must be defined by the user. +In Rust, you might write \code{Vec<isize>} and \code{HashMap<String, String>} for the same purpose. This forces us to make a choice upfront: what type should we use? In this case the answer is obvious - the two have very different purposes and don't support the same operations. @@ -23,61 +23,84 @@ We refer to this problem as container selection, and split it into two parts: Fu Functional requirements refers to a similar definition as is normally used for software: The container must behave the way that the program expects it to. -Continuing with our previous example, we can first note that \code{Vec} and \code{HashSet} implement different sets of methods. +Continuing with our previous example, we can see that \code{Vec} and \code{HashSet} implement different methods. \code{Vec} implements methods like \code{.get(index)} and \code{.push(value)}, while \code{HashSet} implements neither - they don't make sense for an unordered collection. Similarly, \code{HashSet} implements \code{.replace(value)} and \code{.is\_subset(other)}, neither of which make sense for \code{Vec}. -If we try to swap \code{Vec} for \code{HashSet}, the resulting program may not compile. +If we try to swap \code{Vec} for \code{HashSet}, the resulting program will likely not compile. These restrictions form the first part of our functional requirements - the ``syntactic properties'' of the containers must satisfy the program's requirements. In object-oriented programming, we might say they must implement an interface. +In Rust, we would say that they implement a trait, or that they belong to a type class. However, syntactic properties alone are not always enough to select an appropriate container. Suppose our program only requires a container to have \code{.insert(value)}, \code{.contains(value)}, and \code{.len()}. Both \code{Vec} and \code{HashSet} will satisfy these requirements. - However, our program might rely on \code{.len()} returning a count including duplicates. In this case, \code{HashSet} would give us different behaviour, possibly causing our program to behave incorrectly. To express this, we say that a container implementation also has ``semantic properties'' that must satisfy our requirements. Intuitively we can think of this as what conditions the container upholds. -For a set, this would include that there are never any duplicates % TODO +For a \code{HashSet}, this would include that there are never any duplicates, whereas for a Vec it would include that ordering is preserved. \subsection{Non-functional requirements} While meeting the functional requirements is generally enough to ensure a program runs correctly, we also want to ensure we choose the 'best' type we can. -There are many measures for this, but we will focus primarily on time: how much we can affect the runtime of the program. +For our purposes, this will simply be the type that minimises runtime, although other approaches also consider the balance between memory usage and time. -If we assume we can find a selection of types that satisfy the functional requirements, then one obvious solution is just to benchmark the program with each of these implementations in place, and see which works best. +Prior work has shown that properly considering container selection selection can give substantial performance improvements, even in large applications. +For instance, tuning performed in \cite{chung_towards_2004} achieved an up to 70\% increase in the throughput of a complex web application, and a 15-40\% decrease in the runtime of several scientific applications. +\cite{l_liu_perflint_2009} found and suggested fixes for ``hundreds of suboptimal patterns in a set of large C++ benchmarks'', with one such case improving performance by 17\%. +Similarly, \cite{jung_brainy_2011} achieves an average speedup of 27-33\% on real-world applications and libraries. -This will obviously work, however note that as well as our program, we need to develop benchmarks. -If the benchmarks are flawed, or don't represent how our program is used in practice, then we may get drastically different results in the 'real world'. +If we assume we can find a selection of types that satisfy the functional requirements, then one obvious solution is just to benchmark the program with each of these implementations in place, and see which works best. +This will obviously work, so long as our benchmarks are roughly representative of 'real world' inputs. -%% TODO: Motivate how this improves performance +Unfortunately, this technique scales poorly for bigger applications. +As the number of container types we must select increases, the number of combinations we must try increases exponentially (assuming they all have roughly the same number of candidates). +This quickly becomes unfeasible, and so we must find other ways of improving our performance. \section{Prior Literature} \subsection{Approaches in common programming languages} -%% TODO +Modern programming languages broadly take one of two approaches to container selection. + +Some languages, usually higher-level ones, recommend built-in structures as the default, using implementations that perform fine for the vast majority of use-cases. +Popular examples include Python, which uses \code{[1, 2, 3]} and \code{\{'one': 1\}} for lists and maps respectively; and Go, which uses \code{int[]\{1, 2, 3\}} and \code{map[string]int\{"one": 1\}} for the same purposes. +This approach prioritises developer ergonomics: programmers writing in these languages do not need to think about how these are implemented in the vast majority of cases. +In both languages, other implementations are possible to a certain extent, although these aren't usually preferred and come at the cost of code readability. + +In other languages, collections are given as part of a standard library, or must be written by the user. +For example, C does not support growable lists at the language level - users must bring in their own implementation or use an existing library. +Java comes with growable lists and maps as part of its standard library, as does Rust (with some macros to make use easier). +In both cases, the ``blessed'' implementation of collections is not special - users can implement their own. + +In many languages, interfaces or their closest equivalent are used to distinguish 'similar' collections. +In Java, ordered collections implement the interface \code{List<E>}, while similar interfaces exist for \code{Set<E>}, \code{Queue<E>}, etc. +This means that when the developer chooses a type, the compiler enforces the syntactic requirements of the collection, and the writer of the implementaiton ``promises'' they have met the semantic requirements. +Other languages give much weaker guarantees, for instance Rust has no typeclasses for List or Set. +Its closest equivalents are traits like \code{Index<I>} and \code{IntoIterator}, neither of which make semantic guarantees. + +Whilst the approach Java takes is the most expressive, both of these approaches either put the choice on the developer, or remove the choice entirely. +This means that developers are forced to guess based on their knowledge of the underlying implementations, or more often to just pick the most common implementation. +The papers we will examine all attempt to choose for the developer, based on a variety of techniques. \subsection{Chameleon} -Chameleon\parencite{shacham_chameleon_2009} is a solution that focuses on the non-functional requirements of container selection. +Chameleon\parencite{shacham_chameleon_2009} is a tool for Java codebases, which uses a rules engine to identify sub-optimal choices. -First, it runs the program with some example input, and collects data on the collections used using a ``semantic profiler''. -This data includes the space used by collections, the minimum space that could be used by all of the items of that collection, and the number of each operation performed. +First, it runs the program with some representative input, and collects data on the collections used using a ``semantic profiler''. +This data includes the space used by collections, the minimum space that could be used by all of the items of that collection, and the counts of each operation performed. These statistics are tracked per individual collection allocated, and then aggregated by 'allocation context' - a portion of the callstack where the allocation occured. These aggregated statistics are then passed to a rules engine, which uses a set of rules to suggest places a different container type might improve performance. For example, a rule could check when a linked list often has items accessed by index, and suggest a different list implementation as a replacement. This results in a flexible engine for providing suggestions, which can be extended with new rules and types as necessary. -%% todo: something about online selection part - Unfortunately, this does require the developer to come up with and add replacement rules for each implementation. In many cases, there may be patterns that could be used to suggest a better option, but that the developer does not see or is not able to formalise. -Chameleon also makes no attempt to select based on functional requirements. +Chameleon also relies only on the existing type to decide what it can suggest. This results in selection rules needing to be more restricted than they otherwise could be. For instance, a rule cannot suggest a \code{HashSet} instead of a \code{LinkedList}, as the two are not semantically identical. Chameleon has no way of knowing if doing so will break the program's functionality, and so it does not make a suggestion. @@ -114,13 +137,6 @@ However, this approach is still limited in the semantics it can identify, for in \subsection{CollectionSwitch} -%% - online selection - uses library so easier to integrate -%% - collects access patterns, size patterns, etc. -%% - performance model is built beforehand for each concrete implementation, with a cost model used to estimate the relative performance of each based on observed usage -%% - switches underlying implementation dynamically -%% - also able to decide size thresholds where the implementation should be changed and do this -%% - doesn't require specific knowledge of the implementations, although does still assume all are semantically equivalent - CollectionSwitch\parencite{costa_collectionswitch_2018} takes a different approach to the container selection problem, adapting as the program runs and new information becomes available. First, a performance model is built for each container implementation. |