aboutsummaryrefslogtreecommitdiff
path: root/lix-doc
diff options
context:
space:
mode:
authorJade Lovelace <lix@jade.fyi>2024-04-07 16:16:21 -0700
committerjade <lix@jade.fyi>2024-04-08 04:05:13 +0000
commit0145d45806b1f60e9c7221a1f063a1b336a98eb4 (patch)
treecfe9ec77f0323da70fb297667c7df14ab137fcf5 /lix-doc
parentb995c17f0eb8d9598f339c080c467101c1f55feb (diff)
nix-doc -> lix-doc, make self-contained in package.nix
package.nix previously needed this callPackage'd externally, which didn't make a lot of sense to us since this is an internal dependency. Thus we changed it to make it more self contained. Change-Id: I4935bc0bc80e1a132bc9b1519e917791da95037c
Diffstat (limited to 'lix-doc')
-rw-r--r--lix-doc/.gitignore6
-rw-r--r--lix-doc/Cargo.lock161
-rw-r--r--lix-doc/Cargo.toml18
-rw-r--r--lix-doc/README.md6
-rw-r--r--lix-doc/package.nix11
-rw-r--r--lix-doc/src/lib.rs326
-rw-r--r--lix-doc/src/pprint.rs40
7 files changed, 568 insertions, 0 deletions
diff --git a/lix-doc/.gitignore b/lix-doc/.gitignore
new file mode 100644
index 000000000..c0d245929
--- /dev/null
+++ b/lix-doc/.gitignore
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: 2024 Jade Lovelace
+#
+# SPDX-License-Identifier: BSD-2-Clause OR MIT
+
+/target
+result
diff --git a/lix-doc/Cargo.lock b/lix-doc/Cargo.lock
new file mode 100644
index 000000000..d5028edfe
--- /dev/null
+++ b/lix-doc/Cargo.lock
@@ -0,0 +1,161 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "cbitset"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29b6ad25ae296159fb0da12b970b2fe179b234584d7cd294c891e2bbb284466b"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "dissimilar"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86e3bdc80eee6e16b2b6b0f87fbc98c04bee3455e35174c0de1a125d0688c632"
+
+[[package]]
+name = "expect-test"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30d9eafeadd538e68fb28016364c9732d78e420b9ff8853fa5e4058861e9f8d3"
+dependencies = [
+ "dissimilar",
+ "once_cell",
+]
+
+[[package]]
+name = "lix-doc"
+version = "0.0.1"
+dependencies = [
+ "expect-test",
+ "rnix",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rnix"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a9b645f0edba447dbfc6473dd22999f46a1d00ab39e777a2713a1cf34a1597b"
+dependencies = [
+ "cbitset",
+ "rowan",
+]
+
+[[package]]
+name = "rowan"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ea7cadf87a9d8432e85cb4eb86bd2e765ace60c24ef86e79084dcae5d1c5a19"
+dependencies = [
+ "rustc-hash",
+ "smol_str",
+ "text_unit",
+ "thin-dst",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "serde"
+version = "1.0.197"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.197"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "smol_str"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "text_unit"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20431e104bfecc1a40872578dbc390e10290a0e9c35fffe3ce6f73c15a9dbfc2"
+
+[[package]]
+name = "thin-dst"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db3c46be180f1af9673ebb27bc1235396f61ef6965b3fe0dbb2e624deb604f0e"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
diff --git a/lix-doc/Cargo.toml b/lix-doc/Cargo.toml
new file mode 100644
index 000000000..df4eed932
--- /dev/null
+++ b/lix-doc/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+description = "Nix function documentation tool, stripped down into a library"
+edition = "2018"
+name = "lix-doc"
+version = "0.0.1"
+license = "BSD-2-Clause OR MIT"
+# upstream details
+homepage = "https://github.com/lf-/nix-doc"
+repository = "https://github.com/lf-/nix-doc"
+
+[lib]
+crate_type = ["staticlib"]
+
+[dependencies]
+rnix = "0.8.0"
+
+[dev-dependencies]
+expect-test = "1.1.0"
diff --git a/lix-doc/README.md b/lix-doc/README.md
new file mode 100644
index 000000000..26049ebd7
--- /dev/null
+++ b/lix-doc/README.md
@@ -0,0 +1,6 @@
+# lix-doc
+
+This is a stripped down fork of `nix-doc`, used for `:doc` in `nix repl` in
+Lix. It will be replaced in the future with proper support when we get the new
+parser working, but it exists today as a low-risk implementation of an
+important usability feature.
diff --git a/lix-doc/package.nix b/lix-doc/package.nix
new file mode 100644
index 000000000..86ab7501e
--- /dev/null
+++ b/lix-doc/package.nix
@@ -0,0 +1,11 @@
+{
+ rustPlatform,
+ lib
+}:
+
+rustPlatform.buildRustPackage {
+ name = "lix-doc";
+
+ cargoLock.lockFile = ./Cargo.lock;
+ src = lib.cleanSource ./.;
+}
diff --git a/lix-doc/src/lib.rs b/lix-doc/src/lib.rs
new file mode 100644
index 000000000..9c2e43f2f
--- /dev/null
+++ b/lix-doc/src/lib.rs
@@ -0,0 +1,326 @@
+// SPDX-FileCopyrightText: 2024 Jade Lovelace
+//
+// SPDX-License-Identifier: BSD-2-Clause OR MIT
+
+//! library components of nix-doc
+pub mod pprint;
+
+use crate::pprint::pprint_args;
+
+use rnix::types::{Lambda, TypedNode};
+use rnix::SyntaxKind::*;
+use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent};
+
+use std::ffi::{CStr, CString};
+use std::fs;
+use std::iter;
+use std::os::raw::c_char;
+use std::panic;
+
+use std::ptr;
+
+use std::{fmt::Display, str};
+
+pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
+
+const DOC_INDENT: usize = 3;
+
+struct SearchResult {
+ /// Name of the function
+ identifier: String,
+
+ /// Dedented documentation comments
+ doc: String,
+
+ /// Parameter block for the function
+ param_block: String,
+}
+
+fn find_pos(file: &str, line: usize, col: usize) -> usize {
+ let mut lines = 1;
+ let mut line_start = 0;
+ let mut it = file.chars().enumerate().peekable();
+ while let Some((count, ch)) = it.next() {
+ if ch == '\n' || ch == '\r' {
+ lines += 1;
+ let addend = if ch == '\r' && it.peek().map(|x| x.1) == Some('\n') {
+ it.next();
+ 1
+ } else {
+ 0
+ };
+ line_start = count + addend;
+ }
+
+ let col_diff = ((count as i32) - (line_start as i32)).abs() as usize;
+ if lines == line && col_diff == col {
+ return count;
+ }
+ }
+ unreachable!();
+}
+
+impl SearchResult {
+ fn format<P: Display>(&self, filename: P, line: usize) -> String {
+ format!(
+ "**Synopsis:** `{}` = {}\n\n{}\n\n# {}",
+ self.identifier.as_str(),
+ self.param_block,
+ indented(&self.doc, DOC_INDENT),
+ format!("{}:{}", filename, line).as_str(),
+ )
+ }
+}
+
+/// Emits a string `s` indented by `indent` spaces
+fn indented(s: &str, indent: usize) -> String {
+ let indent_s = iter::repeat(' ').take(indent).collect::<String>();
+ s.split('\n')
+ .map(|line| indent_s.clone() + line)
+ .collect::<Vec<_>>()
+ .join("\n")
+}
+
+/// Cleans up a single line, erasing prefix single line comments but preserving indentation
+fn cleanup_single_line<'a>(s: &'a str) -> &'a str {
+ let mut cmt_new_start = 0;
+ for (idx, ch) in s.char_indices() {
+ // if we find a character, save the byte position after it as our new string start
+ if ch == '#' || ch == '*' {
+ cmt_new_start = idx + 1;
+ break;
+ }
+ // if, instead, we are on a line with no starting comment characters, leave it alone as it
+ // will be handled by dedent later
+ if !ch.is_whitespace() {
+ break;
+ }
+ }
+ &s[cmt_new_start..]
+}
+
+/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking
+/// at the second line if it can.
+fn dedent_comment(s: &str) -> String {
+ let mut whitespaces = 0;
+ let mut lines = s.lines();
+ let first = lines.next();
+
+ // scan for whitespace
+ for line in lines.chain(first) {
+ let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
+
+ if line_whitespace != line.len() {
+ // a non-whitespace line, perfect for taking whitespace off of
+ whitespaces = line_whitespace;
+ break;
+ }
+ }
+
+ // maybe the first considered line we found was indented further, so let's look for more lines
+ // that might have a shorter indent. In the case of one line, do nothing.
+ for line in s.lines().skip(1) {
+ let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
+
+ if line_whitespace != line.len() {
+ whitespaces = line_whitespace.min(whitespaces);
+ }
+ }
+
+ // delete up to `whitespaces` whitespace characters from each line and reconstitute the string
+ let mut out = String::new();
+ for line in s.lines() {
+ let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0);
+ out.push_str(&line[content_begin.min(whitespaces)..]);
+ out.push('\n');
+ }
+
+ out.truncate(out.trim_end_matches('\n').len());
+ out
+}
+
+/// Deletes whitespace and leading comment characters
+///
+/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a
+/// multiline comment, they will be deleted.
+fn cleanup_comments<S: AsRef<str>, I: DoubleEndedIterator<Item = S>>(comment: &mut I) -> String {
+ dedent_comment(
+ &comment
+ .rev()
+ .map(|small_comment| {
+ small_comment
+ .as_ref()
+ // space before multiline start
+ .trim_start()
+ // multiline starts
+ .trim_start_matches("/*")
+ // trailing so we can grab multiline end
+ .trim_end()
+ // multiline ends
+ .trim_end_matches("*/")
+ // extra space that was in the multiline
+ .trim()
+ .split('\n')
+ // erase single line comments and such
+ .map(cleanup_single_line)
+ .collect::<Vec<_>>()
+ .join("\n")
+ })
+ .collect::<Vec<_>>()
+ .join("\n"),
+ )
+}
+
+/// Get the docs for a specific function
+pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> {
+ let content = fs::read(filename).ok()?;
+ let decoded = str::from_utf8(&content).ok()?;
+ let pos = find_pos(&decoded, line, col);
+ let rowan_pos = TextUnit::from_usize(pos);
+ let tree = rnix::parse(decoded);
+
+ let mut lambda = None;
+ for node in tree.node().preorder() {
+ match node {
+ WalkEvent::Enter(n) => {
+ if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA {
+ lambda = Lambda::cast(n);
+ break;
+ }
+ }
+ WalkEvent::Leave(_) => (),
+ }
+ }
+ let lambda = lambda?;
+ let res = visit_lambda("func".to_string(), &lambda);
+ Some(res.format(filename, line))
+}
+
+fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult {
+ // grab the arguments
+ let param_block = pprint_args(&lambda);
+
+ // find the doc comment
+ let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string());
+
+ SearchResult {
+ identifier: name,
+ doc: comment,
+ param_block
+ }
+}
+
+fn find_comment(node: SyntaxNode) -> Option<String> {
+ let mut node = NodeOrToken::Node(node);
+ let mut comments = Vec::new();
+ loop {
+ loop {
+ if let Some(new) = node.prev_sibling_or_token() {
+ node = new;
+ break;
+ } else {
+ node = NodeOrToken::Node(node.parent()?);
+ }
+ }
+
+ match node.kind() {
+ TOKEN_COMMENT => match &node {
+ NodeOrToken::Token(token) => comments.push(token.text().clone()),
+ NodeOrToken::Node(_) => unreachable!(),
+ },
+ // This stuff is found as part of `the-fn = f: ...`
+ // here: ^^^^^^^^
+ NODE_KEY | TOKEN_ASSIGN => (),
+ t if t.is_trivia() => (),
+ _ => break,
+ }
+ }
+ let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str()));
+ Some(doc).filter(|it| !it.is_empty())
+}
+
+/// Get the docs for a function in the given file path at the given file position and return it as
+/// a C string pointer
+#[no_mangle]
+pub extern "C" fn nd_get_function_docs(
+ filename: *const c_char,
+ line: usize,
+ col: usize,
+ ) -> *const c_char {
+ let fname = unsafe { CStr::from_ptr(filename) };
+ fname
+ .to_str()
+ .ok()
+ .and_then(|f| {
+ panic::catch_unwind(|| get_function_docs(f, line, col))
+ .map_err(|e| {
+ eprintln!("panic!! {:#?}", e);
+ e
+ })
+ .ok()
+ })
+ .flatten()
+ .and_then(|s| CString::new(s).ok())
+ .map(|s| s.into_raw() as *const c_char)
+ .unwrap_or(ptr::null())
+}
+
+/// Call this to free a string from nd_get_function_docs
+#[no_mangle]
+pub extern "C" fn nd_free_string(s: *const c_char) {
+ unsafe {
+ // cast note: this cast is turning something that was cast to const
+ // back to mut
+ drop(CString::from_raw(s as *mut c_char));
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_bytepos() {
+ let fakefile = "abc\ndef\nghi";
+ assert_eq!(find_pos(fakefile, 2, 2), 5);
+ }
+
+ #[test]
+ fn test_bytepos_cursed() {
+ let fakefile = "abc\rdef\r\nghi";
+ assert_eq!(find_pos(fakefile, 2, 2), 5);
+ assert_eq!(find_pos(fakefile, 3, 2), 10);
+ }
+
+ #[test]
+ fn test_comment_stripping() {
+ let ex1 = ["/* blah blah blah\n foooo baaar\n blah */"];
+ assert_eq!(
+ cleanup_comments(&mut ex1.iter()),
+ "blah blah blah\n foooo baaar\nblah"
+ );
+
+ let ex2 = ["# a1", "# a2", "# aa"];
+ assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n a2\na1");
+ }
+
+ #[test]
+ fn test_dedent() {
+ let ex1 = "a\n b\n c\n d";
+ assert_eq!(dedent_comment(ex1), "a\nb\nc\n d");
+ let ex2 = "a\nb\nc";
+ assert_eq!(dedent_comment(ex2), ex2);
+ let ex3 = " a\n b\n\n c";
+ assert_eq!(dedent_comment(ex3), "a\nb\n\n c");
+ }
+
+ #[test]
+ fn test_single_line_comment_stripping() {
+ let ex1 = " * a";
+ let ex2 = " # a";
+ let ex3 = " a";
+ assert_eq!(cleanup_single_line(ex1), " a");
+ assert_eq!(cleanup_single_line(ex2), " a");
+ assert_eq!(cleanup_single_line(ex3), ex3);
+ }
+}
diff --git a/lix-doc/src/pprint.rs b/lix-doc/src/pprint.rs
new file mode 100644
index 000000000..7e73d2d20
--- /dev/null
+++ b/lix-doc/src/pprint.rs
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: 2024 Jade Lovelace
+//
+// SPDX-License-Identifier: BSD-2-Clause OR MIT
+
+use rnix::types::{Lambda, TypedNode};
+use rnix::SyntaxKind::*;
+
+/// Pretty-prints the arguments to a function
+pub fn pprint_args(lambda: &Lambda) -> String {
+ // TODO: handle docs directly on NODE_IDENT args (uncommon case)
+ let mut lambda = lambda.clone();
+ let mut out = String::new();
+ loop {
+ let arg = lambda.arg().unwrap();
+ match arg.kind() {
+ NODE_IDENT => {
+ out += &format!("*{}*", &arg.to_string());
+ out.push_str(": ");
+ let body = lambda.body().unwrap();
+ if body.kind() == NODE_LAMBDA {
+ lambda = Lambda::cast(body).unwrap();
+ } else {
+ break;
+ }
+ }
+ NODE_PATTERN => {
+ out += &format!("*{}*", &arg.to_string());
+ out.push_str(": ");
+ break;
+ }
+ t => {
+ unreachable!("unhandled arg type {:?}", t);
+ }
+ }
+ }
+ out.push_str("...");
+ out
+
+ //pprint_arg(lambda.arg());
+}