diff options
author | Aria <me@aria.rip> | 2023-10-18 17:10:20 +0100 |
---|---|---|
committer | Aria <me@aria.rip> | 2023-10-18 17:10:20 +0100 |
commit | 123f859bae36b1877922110202d72e5068d5ece8 (patch) | |
tree | 657af32befa7b9cc558281b18868eba5071bc7a4 /thesis | |
parent | 6ff27479567d703285ec6b7f042c23cac0d4782d (diff) |
stuff
Diffstat (limited to 'thesis')
-rw-r--r-- | thesis/biblio.bib | 151 | ||||
-rw-r--r-- | thesis/parts/background.tex | 59 |
2 files changed, 132 insertions, 78 deletions
diff --git a/thesis/biblio.bib b/thesis/biblio.bib index ed823cc..fe7a041 100644 --- a/thesis/biblio.bib +++ b/thesis/biblio.bib @@ -1,20 +1,23 @@ -@inproceedings{shacham_chameleon_2009, - location = {Dublin Ireland}, - title = {Chameleon: adaptive selection of collections}, - isbn = {978-1-60558-392-1}, - url = {https://dl.acm.org/doi/10.1145/1542476.1542522}, - doi = {10.1145/1542476.1542522}, - shorttitle = {Chameleon}, - eventtitle = {{PLDI} '09: {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, - pages = {408--418}, - booktitle = {Proceedings of the 30th {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, - publisher = {{ACM}}, - author = {Shacham, Ohad and Vechev, Martin and Yahav, Eran}, +@article{jung_brainy_2011, + title = {Brainy: effective selection of data structures}, + volume = {46}, + issn = {0362-1340, 1558-1160}, + url = {https://dl.acm.org/doi/10.1145/1993316.1993509}, + doi = {10.1145/1993316.1993509}, + shorttitle = {Brainy}, + abstract = {Data structure selection is one of the most critical aspects of developing effective applications. By analyzing data structures' behavior and their interaction with the rest of the application on the underlying architecture, tools can make suggestions for alternative data structures better suited for the program input on which the application runs. Consequently, developers can optimize their data structure usage to make the application conscious of an underlying architecture and a particular program input. + This paper presents the design and evaluation of Brainy, a new program analysis tool that automatically selects the best data structure for a given program and its input on a specific microarchitecture. The data structure's interface functions are instrumented to dynamically monitor how the data structure interacts with the application for a given input. The instrumentation records traces of various runtime characteristics including underlying architecture-specific events. These generated traces are analyzed and fed into an offline model, constructed using machine learning, to select the best data structure. That is, Brainy exploits runtime feedback of data structures to model the situation an application runs on, and selects the best data structure for a given application/input/architecture combination based on the constructed model. The empirical evaluation shows that this technique is highly accurate across several real-world applications with various program input sets on two different state-of-the-art microarchitectures. Consequently, Brainy achieved an average performance improvement of 27\% and 33\% on both microarchitectures, respectively.}, + pages = {86--97}, + number = {6}, + journaltitle = {{ACM} {SIGPLAN} Notices}, + shortjournal = {{SIGPLAN} Not.}, + author = {Jung, Changhee and Rus, Silvius and Railing, Brian P. and Clark, Nathan and Pande, Santosh}, urldate = {2023-09-21}, - date = {2009-06-15}, + date = {2011-06-04}, langid = {english}, - file = {Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:/home/aria/Zotero/storage/75CS9CWY/Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:application/pdf}, + keywords = {ml, read}, + file = {Jung et al. - 2011 - Brainy effective selection of data structures.pdf:/home/aria/Zotero/storage/DPJPURT8/Jung et al. - 2011 - Brainy effective selection of data structures.pdf:application/pdf}, } @inproceedings{costa_collectionswitch_2018, @@ -32,27 +35,27 @@ urldate = {2023-09-21}, date = {2018-02-24}, langid = {english}, + keywords = {estimate-based, read}, file = {Costa and Andrzejak - 2018 - CollectionSwitch a framework for efficient and dy:/home/aria/Zotero/storage/7B8QMVRU/Costa and Andrzejak - 2018 - CollectionSwitch a framework for efficient and dy:application/pdf}, } -@article{jung_brainy_2011, - title = {Brainy: effective selection of data structures}, - volume = {46}, - issn = {0362-1340, 1558-1160}, - url = {https://dl.acm.org/doi/10.1145/1993316.1993509}, - doi = {10.1145/1993316.1993509}, - shorttitle = {Brainy}, - abstract = {Data structure selection is one of the most critical aspects of developing effective applications. By analyzing data structures' behavior and their interaction with the rest of the application on the underlying architecture, tools can make suggestions for alternative data structures better suited for the program input on which the application runs. Consequently, developers can optimize their data structure usage to make the application conscious of an underlying architecture and a particular program input. - This paper presents the design and evaluation of Brainy, a new program analysis tool that automatically selects the best data structure for a given program and its input on a specific microarchitecture. The data structure's interface functions are instrumented to dynamically monitor how the data structure interacts with the application for a given input. The instrumentation records traces of various runtime characteristics including underlying architecture-specific events. These generated traces are analyzed and fed into an offline model, constructed using machine learning, to select the best data structure. That is, Brainy exploits runtime feedback of data structures to model the situation an application runs on, and selects the best data structure for a given application/input/architecture combination based on the constructed model. The empirical evaluation shows that this technique is highly accurate across several real-world applications with various program input sets on two different state-of-the-art microarchitectures. Consequently, Brainy achieved an average performance improvement of 27\% and 33\% on both microarchitectures, respectively.}, - pages = {86--97}, - number = {6}, - journaltitle = {{ACM} {SIGPLAN} Notices}, - shortjournal = {{SIGPLAN} Not.}, - author = {Jung, Changhee and Rus, Silvius and Railing, Brian P. and Clark, Nathan and Pande, Santosh}, +@inproceedings{shacham_chameleon_2009, + location = {Dublin Ireland}, + title = {Chameleon: adaptive selection of collections}, + isbn = {978-1-60558-392-1}, + url = {https://dl.acm.org/doi/10.1145/1542476.1542522}, + doi = {10.1145/1542476.1542522}, + shorttitle = {Chameleon}, + eventtitle = {{PLDI} '09: {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, + pages = {408--418}, + booktitle = {Proceedings of the 30th {ACM} {SIGPLAN} Conference on Programming Language Design and Implementation}, + publisher = {{ACM}}, + author = {Shacham, Ohad and Vechev, Martin and Yahav, Eran}, urldate = {2023-09-21}, - date = {2011-06-04}, + date = {2009-06-15}, langid = {english}, - file = {Jung et al. - 2011 - Brainy effective selection of data structures.pdf:/home/aria/Zotero/storage/DPJPURT8/Jung et al. - 2011 - Brainy effective selection of data structures.pdf:application/pdf}, + keywords = {read, rules-based}, + file = {Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:/home/aria/Zotero/storage/75CS9CWY/Shacham et al. - 2009 - Chameleon adaptive selection of collections.pdf:application/pdf}, } @article{qin_primrose_2023, @@ -72,27 +75,18 @@ date = {2023-02-15}, eprinttype = {arxiv}, eprint = {2205.09655 [cs]}, - keywords = {Computer Science - Data Structures and Algorithms, Computer Science - Programming Languages}, + keywords = {functional requirements, read}, file = {arXiv Fulltext PDF:/home/aria/Zotero/storage/IL59NESA/Qin et al. - 2023 - Primrose Selecting Container Data Types by Their .pdf:application/pdf;arXiv.org Snapshot:/home/aria/Zotero/storage/DCIW4XE4/2205.html:text/html}, } -@article{chung_towards_2004, - title = {Towards Automatic Performance Tuning}, - author = {Chung, I-Hsin}, - date = {2004-11}, - file = {Chung - 2004 - Towards Automatic Performance Tuning.pdf:/home/aria/Zotero/storage/WQBJMSN8/Chung - 2004 - Towards Automatic Performance Tuning.pdf:application/pdf}, -} - -@inproceedings{l_liu_perflint_2009, - title = {Perflint: A Context Sensitive Performance Advisor for C++ Programs}, - doi = {10.1109/CGO.2009.36}, - eventtitle = {2009 International Symposium on Code Generation and Optimization}, - pages = {265--274}, - booktitle = {2009 International Symposium on Code Generation and Optimization}, - author = {{L. Liu} and {S. Rus}}, - date = {2009-03-22}, - note = {Journal Abbreviation: 2009 International Symposium on Code Generation and Optimization}, - file = {Full Text:/home/aria/Zotero/storage/KTJNYCES/L. Liu and S. Rus - 2009 - Perflint A Context Sensitive Performance Advisor .pdf:application/pdf}, +@inproceedings{osterlund_dynamically_2013, + title = {Dynamically transforming data structures}, + doi = {10.1109/ASE.2013.6693099}, + pages = {410--420}, + booktitle = {2013 28th {IEEE}/{ACM} International Conference on Automated Software Engineering ({ASE})}, + author = {Österlund, Erik and Löwe, Welf}, + date = {2013}, + keywords = {rules-based}, } @incollection{hutchison_coco_2013, @@ -113,13 +107,60 @@ date = {2013}, doi = {10.1007/978-3-642-39038-8_1}, note = {Series Title: Lecture Notes in Computer Science}, + keywords = {read, rules-based}, } -@inproceedings{osterlund_dynamically_2013, - title = {Dynamically transforming data structures}, - doi = {10.1109/ASE.2013.6693099}, - pages = {410--420}, - booktitle = {2013 28th {IEEE}/{ACM} International Conference on Automated Software Engineering ({ASE})}, - author = {Österlund, Erik and Löwe, Welf}, - date = {2013}, +@inproceedings{l_liu_perflint_2009, + title = {Perflint: A Context Sensitive Performance Advisor for C++ Programs}, + doi = {10.1109/CGO.2009.36}, + eventtitle = {2009 International Symposium on Code Generation and Optimization}, + pages = {265--274}, + booktitle = {2009 International Symposium on Code Generation and Optimization}, + author = {{L. Liu} and {S. Rus}}, + date = {2009-03-22}, + note = {Journal Abbreviation: 2009 International Symposium on Code Generation and Optimization}, + keywords = {read, rules-based}, + file = {Full Text:/home/aria/Zotero/storage/KTJNYCES/L. Liu and S. Rus - 2009 - Perflint A Context Sensitive Performance Advisor .pdf:application/pdf}, +} + +@article{chung_towards_2004, + title = {Towards Automatic Performance Tuning}, + author = {Chung, I-Hsin}, + date = {2004-11}, + file = {Chung - 2004 - Towards Automatic Performance Tuning.pdf:/home/aria/Zotero/storage/WQBJMSN8/Chung - 2004 - Towards Automatic Performance Tuning.pdf:application/pdf}, +} + +@inproceedings{thomas_framework_2005, + location = {New York, {NY}, {USA}}, + title = {A Framework for Adaptive Algorithm Selection in {STAPL}}, + isbn = {1-59593-080-9}, + url = {https://doi.org/10.1145/1065944.1065981}, + doi = {10.1145/1065944.1065981}, + series = {{PPoPP} '05}, + abstract = {Writing portable programs that perform well on multiple platforms or for varying input sizes and types can be very difficult because performance is often sensitive to the system architecture, the run-time environment, and input data characteristics. This is even more challenging on parallel and distributed systems due to the wide variety of system architectures. One way to address this problem is to adaptively select the best parallel algorithm for the current input data and system from a set of functionally equivalent algorithmic options. Toward this goal, we have developed a general framework for adaptive algorithm selection for use in the Standard Template Adaptive Parallel Library ({STAPL}). Our framework uses machine learning techniques to analyze data collected by {STAPL} installation benchmarks and to determine tests that will select among algorithmic options at run-time. We apply a prototype implementation of our framework to two important parallel operations, sorting and matrix multiplication, on multiple platforms and show that the framework determines run-time tests that correctly select the best performing algorithm from among several competing algorithmic options in 86-100\% of the cases studied, depending on the operation and the system.}, + pages = {277--288}, + booktitle = {Proceedings of the Tenth {ACM} {SIGPLAN} Symposium on Principles and Practice of Parallel Programming}, + publisher = {Association for Computing Machinery}, + author = {Thomas, Nathan and Tanase, Gabriel and Tkachyshyn, Olga and Perdue, Jack and Amato, Nancy M. and Rauchwerger, Lawrence}, + date = {2005}, + note = {event-place: Chicago, {IL}, {USA}}, + keywords = {ml, read}, +} + +@inproceedings{franke_collection_2022, + location = {New York, {NY}, {USA}}, + title = {Collection Skeletons: Declarative Abstractions for Data Collections}, + isbn = {978-1-4503-9919-7}, + url = {https://doi.org/10.1145/3567512.3567528}, + doi = {10.1145/3567512.3567528}, + series = {{SLE} 2022}, + abstract = {Modern programming languages provide programmers with rich abstractions for data collections as part of their standard libraries, e.g. Containers in the C++ {STL}, the Java Collections Framework, or the Scala Collections {API}. Typically, these collections frameworks are organised as hierarchies that provide programmers with common abstract data types ({ADTs}) like lists, queues, and stacks. While convenient, this approach introduces problems which ultimately affect application performance due to users over-specifying collection data types limiting implementation flexibility. In this paper, we develop Collection Skeletons which provide a novel, declarative approach to data collections. Using our framework, programmers explicitly select properties for their collections, thereby truly decoupling specification from implementation. By making collection properties explicit immediate benefits materialise in form of reduced risk of over-specification and increased implementation flexibility. We have prototyped our declarative abstractions for collections as a C++ library, and demonstrate that benchmark applications rewritten to use Collection Skeletons incur little or no overhead. In fact, for several benchmarks, we observe performance speedups (on average between 2.57 to 2.93, and up to 16.37) and also enhanced performance portability across three different hardware platforms.}, + pages = {189--201}, + booktitle = {Proceedings of the 15th {ACM} {SIGPLAN} International Conference on Software Language Engineering}, + publisher = {Association for Computing Machinery}, + author = {Franke, Björn and Li, Zhibo and Morton, Magnus and Steuwer, Michel}, + date = {2022}, + note = {event-place: Auckland, New Zealand}, + keywords = {functional requirements}, + file = {Accepted Version:/home/aria/Zotero/storage/TJ3AGL2S/Franke et al. - 2022 - Collection Skeletons Declarative Abstractions for.pdf:application/pdf}, } diff --git a/thesis/parts/background.tex b/thesis/parts/background.tex index dbf6b74..c832e50 100644 --- a/thesis/parts/background.tex +++ b/thesis/parts/background.tex @@ -1,25 +1,25 @@ -In this chapter, we provide an overview of the problem of container selection, and its effect on program correctness and performance. -We then provide an overview of how modern programming languages approach this problem, and how existing literature contributes. -Finally, we examine the gaps in the existing literature, and explain how this paper aims to contribute. +In this chapter we provide an overview of the problem of container selection, and its effect on program correctness and performance. +We then provide an overview of approaches from modern programming languages, and existing literature. +Finally, we examine the gaps in the existing literature, and explain what we aim to contribute. \section{Container Selection} -The vast majority of programs will use make extensive use of collection data types - types intended to hold many different instances of other data types. -This can refer to anything from fixed-size arrays, to growable linked lists, to associative key-value mappings or dictionaries. +The vast majority of programs will make extensive use of collection data types --- types intended to hold many different instances of other data types. +This includes structures like fixed-size arrays, growable lists, and key-value mappings. -In many languages, the standard library provides a variety of collections, forcing us to choose which one is best. -Consider the Rust types \code{Vec<T>} (a dynamic array) and \code{HashSet<T>} (a hash-based set). +In many languages, the standard library provides a variety of collections, forcing us to choose which is best. +Consider the Rust types \code{Vec<T>} (a dynamic array) and \code{HashSet<T>} (a hash-based set). %comma separate definitions instead of parentheses If we care about the ordering, or about preserving duplicates, then we must use \code{Vec<T>}. -But if we don't, then \code{HashSet<T>} might be more performant, if we use \code{contains} a lot. +But if we don't, then \code{HashSet<T>} might be more performant, if we use \code{contains} a lot.%bad contraction (there can be good contractions) -We refer to this problem as container selection, and say that we must satisfy both functional requirements, and non-functional requirements. +We refer to this problem as container selection, and say that we must satisfy both functional requirements, and non-functional requirements.%comma? \subsection{Functional requirements} -The functional requirements tell us how the container will be used, and how it must behave. +The functional requirements tell us how the container will be used, and how it must behave.%comma? Continuing with our previous example, we can see that \code{Vec} and \code{HashSet} implement different methods. -\code{Vec} implements \code{.get(index)} while \code{HashSet} doesn't - it wouldn't make sense for an unordered collection. +\code{Vec} implements \code{.get(index)} while \code{HashSet} doesn't - it wouldn't make sense for an unordered collection.%contraction If we try to swap \code{Vec} for \code{HashSet}, the resulting program will likely not compile. We will call the operations a container implements the ``syntactic properties'' of the container. @@ -39,9 +39,8 @@ For a \code{HashSet}, this would include that there are never any duplicates, wh While meeting the functional requirements should ensure our program runs correctly, we also want to choose the 'best' type that we can. Here we will consider 'best' as striking a balance between runtime and memory usage. -Prior work has shown that properly considering container selection selection can give substantial performance improvements, even in large applications. -For instance, tuning performed in \cite{chung_towards_2004} achieved an up to 70\% increase in the throughput of a complex web application, and a 15-40\% decrease in the runtime of several scientific applications. -\cite{l_liu_perflint_2009} found and suggested fixes for ``hundreds of suboptimal patterns in a set of large C++ benchmarks,'' with one such case improving performance by 17\%. +Prior work has shown that properly considering container selection selection can give substantial performance improvements. +For instance, \cite{l_liu_perflint_2009} found and suggested fixes for ``hundreds of suboptimal patterns in a set of large C++ benchmarks,'' with one such case improving performance by 17\%. Similarly, \cite{jung_brainy_2011} achieves an average speedup of 27-33\% on real-world applications and libraries. If we can find a selection of types that satisfy our functional requirements, then one obvious solution is to benchmark the program with each of these implementations in place, and see which works best. @@ -76,10 +75,11 @@ This means that when the developer chooses a type, the compiler enforces the syn Whilst the approach Java takes is the most expressive, both of these approaches either put the choice on the developer, or remove the choice entirely. This means that developers are forced to guess based on their knowledge of the underlying implementations, or to just pick the most common implementation. -\subsection{Chameleon} +\subsection{Rules-based approaches} -Chameleon\parencite{shacham_chameleon_2009} is a tool for Java codebases, which uses a rules engine to identify sub-optimal choices. +One approach to the container selection problem is to allow the developer to make the choice initially, but use some tool to detect bad choices. +Chameleon\parencite{shacham_chameleon_2009} is one example of this. It first collects statistics from program benchmarks using a ``semantic profiler''. This includes the space used by collections over time, and the counts of each operation performed. These statistics are tracked per individual collection allocated, and then aggregated by 'allocation context' - the call stack at the point where the allocation occured. @@ -92,9 +92,18 @@ This results in selection rules needing to be more restricted than they otherwis For instance, a rule cannot suggest a \code{HashSet} instead of a \code{LinkedList}, as the two are not semantically identical. Chameleon has no way of knowing if doing so will break the program's functionality, and so it does not make a suggestion. -A similar rules-based approach was also used in \cite{l_liu_perflint_2009}, while \cite{jung_brainy_2011} uses a machine learning approach with similar statistics collection. +A similar rules-based approach is used by \cite{l_liu_perflint_2009} for the C++ standard library. +\cite{hutchison_coco_2013} and \cite{osterlund_dynamically_2013} use similar techniques, but work as the program runs. +This works well for programs with different phases of execution, however does incur an overhead. -\subsection{CollectionSwitch} +\subsection{ML-based approaches} + +%% TODO +\cite{jung_brainy_2011} uses a machine learning approach with similar statistics collection + +\cite{thomas_framework_2005} also uses an ML approach, but focuses on parallel algorithms rather than data structures, and does not take hardware counters into account. + +\subsection{Estimate-based approaches} CollectionSwitch\parencite{costa_collectionswitch_2018} is an online solution, which adapts as the program runs and new information becomes available. @@ -108,18 +117,22 @@ Rules then decide when switching to a new implementation is worth it based on th By generating a cost model based on benchmarks, CollectionSwitch manages to be more flexible than other rules-based approaches such as Chameleon. It expects applications to use Java's \code{List}, \code{Set}, or \code{Map} interfaces, which express enough functional requirements for most problems. -\cite{hutchison_coco_2013} and \cite{osterlund_dynamically_2013} both also attempt online selection, however do so with a rules-based approach more similar to Chameleon \cite{shacham_chameleon_2009}. +\subsection{Functional requirements} -\subsection{Primrose} +Most of the approaches highlighted above have focused on non-functional requirements, and used programming language features to enforce functional requirements. +By contrast, Primrose \parencite{qin_primrose_2023} focuses on the functional requirements of container selection. -Primrose \parencite{qin_primrose_2023} focuses on the functional requirements of container selection. +It allows the application developer to specify semantic requirements using a DSL, and syntactic requirements using Rust's traits. -It allows the application developer to specify both syntactic and semantic requirements using a Lisp DSL. -The available implementations are then checked against these requirements using an SMT solver, to obtain a set of usable implementations. +A semantic property is simply a predicate, acting on an abstract model of the container type. +Similarly, each implementation provides an abstract version of its operations acting on this model. +An SMT solver then checks if a given implementation will always meet the conditions required by the predicate(s). Developers must then choose which of these implementations will work best for their non-functional requirements. This allows developers to express any combination of semantic requirements, rather than limiting them to common ones like Java's approach. It can also be extended with new implementations as needed, although this does require modelling the semantics of the new implementation. +\cite{franke_collection_2022} also uses the idea of refinement types, but is limited to properties defined by the library authors. + \section{Contributions} |