diff options
author | Jade Lovelace <lix@jade.fyi> | 2024-04-07 16:16:21 -0700 |
---|---|---|
committer | jade <lix@jade.fyi> | 2024-04-08 04:05:13 +0000 |
commit | 0145d45806b1f60e9c7221a1f063a1b336a98eb4 (patch) | |
tree | cfe9ec77f0323da70fb297667c7df14ab137fcf5 /lix-doc | |
parent | b995c17f0eb8d9598f339c080c467101c1f55feb (diff) |
nix-doc -> lix-doc, make self-contained in package.nix
package.nix previously needed this callPackage'd externally, which
didn't make a lot of sense to us since this is an internal dependency.
Thus we changed it to make it more self contained.
Change-Id: I4935bc0bc80e1a132bc9b1519e917791da95037c
Diffstat (limited to 'lix-doc')
-rw-r--r-- | lix-doc/.gitignore | 6 | ||||
-rw-r--r-- | lix-doc/Cargo.lock | 161 | ||||
-rw-r--r-- | lix-doc/Cargo.toml | 18 | ||||
-rw-r--r-- | lix-doc/README.md | 6 | ||||
-rw-r--r-- | lix-doc/package.nix | 11 | ||||
-rw-r--r-- | lix-doc/src/lib.rs | 326 | ||||
-rw-r--r-- | lix-doc/src/pprint.rs | 40 |
7 files changed, 568 insertions, 0 deletions
diff --git a/lix-doc/.gitignore b/lix-doc/.gitignore new file mode 100644 index 000000000..c0d245929 --- /dev/null +++ b/lix-doc/.gitignore @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2024 Jade Lovelace +# +# SPDX-License-Identifier: BSD-2-Clause OR MIT + +/target +result diff --git a/lix-doc/Cargo.lock b/lix-doc/Cargo.lock new file mode 100644 index 000000000..d5028edfe --- /dev/null +++ b/lix-doc/Cargo.lock @@ -0,0 +1,161 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "cbitset" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29b6ad25ae296159fb0da12b970b2fe179b234584d7cd294c891e2bbb284466b" +dependencies = [ + "num-traits", +] + +[[package]] +name = "dissimilar" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86e3bdc80eee6e16b2b6b0f87fbc98c04bee3455e35174c0de1a125d0688c632" + +[[package]] +name = "expect-test" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d9eafeadd538e68fb28016364c9732d78e420b9ff8853fa5e4058861e9f8d3" +dependencies = [ + "dissimilar", + "once_cell", +] + +[[package]] +name = "lix-doc" +version = "0.0.1" +dependencies = [ + "expect-test", + "rnix", +] + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rnix" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a9b645f0edba447dbfc6473dd22999f46a1d00ab39e777a2713a1cf34a1597b" +dependencies = [ + "cbitset", + "rowan", +] + +[[package]] +name = "rowan" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea7cadf87a9d8432e85cb4eb86bd2e765ace60c24ef86e79084dcae5d1c5a19" +dependencies = [ + "rustc-hash", + "smol_str", + "text_unit", + "thin-dst", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smol_str" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9" +dependencies = [ + "serde", +] + +[[package]] +name = "syn" +version = "2.0.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "text_unit" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20431e104bfecc1a40872578dbc390e10290a0e9c35fffe3ce6f73c15a9dbfc2" + +[[package]] +name = "thin-dst" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c46be180f1af9673ebb27bc1235396f61ef6965b3fe0dbb2e624deb604f0e" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/lix-doc/Cargo.toml b/lix-doc/Cargo.toml new file mode 100644 index 000000000..df4eed932 --- /dev/null +++ b/lix-doc/Cargo.toml @@ -0,0 +1,18 @@ +[package] +description = "Nix function documentation tool, stripped down into a library" +edition = "2018" +name = "lix-doc" +version = "0.0.1" +license = "BSD-2-Clause OR MIT" +# upstream details +homepage = "https://github.com/lf-/nix-doc" +repository = "https://github.com/lf-/nix-doc" + +[lib] +crate_type = ["staticlib"] + +[dependencies] +rnix = "0.8.0" + +[dev-dependencies] +expect-test = "1.1.0" diff --git a/lix-doc/README.md b/lix-doc/README.md new file mode 100644 index 000000000..26049ebd7 --- /dev/null +++ b/lix-doc/README.md @@ -0,0 +1,6 @@ +# lix-doc + +This is a stripped down fork of `nix-doc`, used for `:doc` in `nix repl` in +Lix. It will be replaced in the future with proper support when we get the new +parser working, but it exists today as a low-risk implementation of an +important usability feature. diff --git a/lix-doc/package.nix b/lix-doc/package.nix new file mode 100644 index 000000000..86ab7501e --- /dev/null +++ b/lix-doc/package.nix @@ -0,0 +1,11 @@ +{ + rustPlatform, + lib +}: + +rustPlatform.buildRustPackage { + name = "lix-doc"; + + cargoLock.lockFile = ./Cargo.lock; + src = lib.cleanSource ./.; +} diff --git a/lix-doc/src/lib.rs b/lix-doc/src/lib.rs new file mode 100644 index 000000000..9c2e43f2f --- /dev/null +++ b/lix-doc/src/lib.rs @@ -0,0 +1,326 @@ +// SPDX-FileCopyrightText: 2024 Jade Lovelace +// +// SPDX-License-Identifier: BSD-2-Clause OR MIT + +//! library components of nix-doc +pub mod pprint; + +use crate::pprint::pprint_args; + +use rnix::types::{Lambda, TypedNode}; +use rnix::SyntaxKind::*; +use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent}; + +use std::ffi::{CStr, CString}; +use std::fs; +use std::iter; +use std::os::raw::c_char; +use std::panic; + +use std::ptr; + +use std::{fmt::Display, str}; + +pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; + +const DOC_INDENT: usize = 3; + +struct SearchResult { + /// Name of the function + identifier: String, + + /// Dedented documentation comments + doc: String, + + /// Parameter block for the function + param_block: String, +} + +fn find_pos(file: &str, line: usize, col: usize) -> usize { + let mut lines = 1; + let mut line_start = 0; + let mut it = file.chars().enumerate().peekable(); + while let Some((count, ch)) = it.next() { + if ch == '\n' || ch == '\r' { + lines += 1; + let addend = if ch == '\r' && it.peek().map(|x| x.1) == Some('\n') { + it.next(); + 1 + } else { + 0 + }; + line_start = count + addend; + } + + let col_diff = ((count as i32) - (line_start as i32)).abs() as usize; + if lines == line && col_diff == col { + return count; + } + } + unreachable!(); +} + +impl SearchResult { + fn format<P: Display>(&self, filename: P, line: usize) -> String { + format!( + "**Synopsis:** `{}` = {}\n\n{}\n\n# {}", + self.identifier.as_str(), + self.param_block, + indented(&self.doc, DOC_INDENT), + format!("{}:{}", filename, line).as_str(), + ) + } +} + +/// Emits a string `s` indented by `indent` spaces +fn indented(s: &str, indent: usize) -> String { + let indent_s = iter::repeat(' ').take(indent).collect::<String>(); + s.split('\n') + .map(|line| indent_s.clone() + line) + .collect::<Vec<_>>() + .join("\n") +} + +/// Cleans up a single line, erasing prefix single line comments but preserving indentation +fn cleanup_single_line<'a>(s: &'a str) -> &'a str { + let mut cmt_new_start = 0; + for (idx, ch) in s.char_indices() { + // if we find a character, save the byte position after it as our new string start + if ch == '#' || ch == '*' { + cmt_new_start = idx + 1; + break; + } + // if, instead, we are on a line with no starting comment characters, leave it alone as it + // will be handled by dedent later + if !ch.is_whitespace() { + break; + } + } + &s[cmt_new_start..] +} + +/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking +/// at the second line if it can. +fn dedent_comment(s: &str) -> String { + let mut whitespaces = 0; + let mut lines = s.lines(); + let first = lines.next(); + + // scan for whitespace + for line in lines.chain(first) { + let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count(); + + if line_whitespace != line.len() { + // a non-whitespace line, perfect for taking whitespace off of + whitespaces = line_whitespace; + break; + } + } + + // maybe the first considered line we found was indented further, so let's look for more lines + // that might have a shorter indent. In the case of one line, do nothing. + for line in s.lines().skip(1) { + let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count(); + + if line_whitespace != line.len() { + whitespaces = line_whitespace.min(whitespaces); + } + } + + // delete up to `whitespaces` whitespace characters from each line and reconstitute the string + let mut out = String::new(); + for line in s.lines() { + let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0); + out.push_str(&line[content_begin.min(whitespaces)..]); + out.push('\n'); + } + + out.truncate(out.trim_end_matches('\n').len()); + out +} + +/// Deletes whitespace and leading comment characters +/// +/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a +/// multiline comment, they will be deleted. +fn cleanup_comments<S: AsRef<str>, I: DoubleEndedIterator<Item = S>>(comment: &mut I) -> String { + dedent_comment( + &comment + .rev() + .map(|small_comment| { + small_comment + .as_ref() + // space before multiline start + .trim_start() + // multiline starts + .trim_start_matches("/*") + // trailing so we can grab multiline end + .trim_end() + // multiline ends + .trim_end_matches("*/") + // extra space that was in the multiline + .trim() + .split('\n') + // erase single line comments and such + .map(cleanup_single_line) + .collect::<Vec<_>>() + .join("\n") + }) + .collect::<Vec<_>>() + .join("\n"), + ) +} + +/// Get the docs for a specific function +pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> { + let content = fs::read(filename).ok()?; + let decoded = str::from_utf8(&content).ok()?; + let pos = find_pos(&decoded, line, col); + let rowan_pos = TextUnit::from_usize(pos); + let tree = rnix::parse(decoded); + + let mut lambda = None; + for node in tree.node().preorder() { + match node { + WalkEvent::Enter(n) => { + if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA { + lambda = Lambda::cast(n); + break; + } + } + WalkEvent::Leave(_) => (), + } + } + let lambda = lambda?; + let res = visit_lambda("func".to_string(), &lambda); + Some(res.format(filename, line)) +} + +fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult { + // grab the arguments + let param_block = pprint_args(&lambda); + + // find the doc comment + let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string()); + + SearchResult { + identifier: name, + doc: comment, + param_block + } +} + +fn find_comment(node: SyntaxNode) -> Option<String> { + let mut node = NodeOrToken::Node(node); + let mut comments = Vec::new(); + loop { + loop { + if let Some(new) = node.prev_sibling_or_token() { + node = new; + break; + } else { + node = NodeOrToken::Node(node.parent()?); + } + } + + match node.kind() { + TOKEN_COMMENT => match &node { + NodeOrToken::Token(token) => comments.push(token.text().clone()), + NodeOrToken::Node(_) => unreachable!(), + }, + // This stuff is found as part of `the-fn = f: ...` + // here: ^^^^^^^^ + NODE_KEY | TOKEN_ASSIGN => (), + t if t.is_trivia() => (), + _ => break, + } + } + let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str())); + Some(doc).filter(|it| !it.is_empty()) +} + +/// Get the docs for a function in the given file path at the given file position and return it as +/// a C string pointer +#[no_mangle] +pub extern "C" fn nd_get_function_docs( + filename: *const c_char, + line: usize, + col: usize, + ) -> *const c_char { + let fname = unsafe { CStr::from_ptr(filename) }; + fname + .to_str() + .ok() + .and_then(|f| { + panic::catch_unwind(|| get_function_docs(f, line, col)) + .map_err(|e| { + eprintln!("panic!! {:#?}", e); + e + }) + .ok() + }) + .flatten() + .and_then(|s| CString::new(s).ok()) + .map(|s| s.into_raw() as *const c_char) + .unwrap_or(ptr::null()) +} + +/// Call this to free a string from nd_get_function_docs +#[no_mangle] +pub extern "C" fn nd_free_string(s: *const c_char) { + unsafe { + // cast note: this cast is turning something that was cast to const + // back to mut + drop(CString::from_raw(s as *mut c_char)); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bytepos() { + let fakefile = "abc\ndef\nghi"; + assert_eq!(find_pos(fakefile, 2, 2), 5); + } + + #[test] + fn test_bytepos_cursed() { + let fakefile = "abc\rdef\r\nghi"; + assert_eq!(find_pos(fakefile, 2, 2), 5); + assert_eq!(find_pos(fakefile, 3, 2), 10); + } + + #[test] + fn test_comment_stripping() { + let ex1 = ["/* blah blah blah\n foooo baaar\n blah */"]; + assert_eq!( + cleanup_comments(&mut ex1.iter()), + "blah blah blah\n foooo baaar\nblah" + ); + + let ex2 = ["# a1", "# a2", "# aa"]; + assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n a2\na1"); + } + + #[test] + fn test_dedent() { + let ex1 = "a\n b\n c\n d"; + assert_eq!(dedent_comment(ex1), "a\nb\nc\n d"); + let ex2 = "a\nb\nc"; + assert_eq!(dedent_comment(ex2), ex2); + let ex3 = " a\n b\n\n c"; + assert_eq!(dedent_comment(ex3), "a\nb\n\n c"); + } + + #[test] + fn test_single_line_comment_stripping() { + let ex1 = " * a"; + let ex2 = " # a"; + let ex3 = " a"; + assert_eq!(cleanup_single_line(ex1), " a"); + assert_eq!(cleanup_single_line(ex2), " a"); + assert_eq!(cleanup_single_line(ex3), ex3); + } +} diff --git a/lix-doc/src/pprint.rs b/lix-doc/src/pprint.rs new file mode 100644 index 000000000..7e73d2d20 --- /dev/null +++ b/lix-doc/src/pprint.rs @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: 2024 Jade Lovelace +// +// SPDX-License-Identifier: BSD-2-Clause OR MIT + +use rnix::types::{Lambda, TypedNode}; +use rnix::SyntaxKind::*; + +/// Pretty-prints the arguments to a function +pub fn pprint_args(lambda: &Lambda) -> String { + // TODO: handle docs directly on NODE_IDENT args (uncommon case) + let mut lambda = lambda.clone(); + let mut out = String::new(); + loop { + let arg = lambda.arg().unwrap(); + match arg.kind() { + NODE_IDENT => { + out += &format!("*{}*", &arg.to_string()); + out.push_str(": "); + let body = lambda.body().unwrap(); + if body.kind() == NODE_LAMBDA { + lambda = Lambda::cast(body).unwrap(); + } else { + break; + } + } + NODE_PATTERN => { + out += &format!("*{}*", &arg.to_string()); + out.push_str(": "); + break; + } + t => { + unreachable!("unhandled arg type {:?}", t); + } + } + } + out.push_str("..."); + out + + //pprint_arg(lambda.arg()); +} |