aboutsummaryrefslogtreecommitdiff
path: root/lix-doc/src
diff options
context:
space:
mode:
authorJade Lovelace <lix@jade.fyi>2024-04-07 16:16:21 -0700
committerjade <lix@jade.fyi>2024-04-08 04:05:13 +0000
commit0145d45806b1f60e9c7221a1f063a1b336a98eb4 (patch)
treecfe9ec77f0323da70fb297667c7df14ab137fcf5 /lix-doc/src
parentb995c17f0eb8d9598f339c080c467101c1f55feb (diff)
nix-doc -> lix-doc, make self-contained in package.nix
package.nix previously needed this callPackage'd externally, which didn't make a lot of sense to us since this is an internal dependency. Thus we changed it to make it more self contained. Change-Id: I4935bc0bc80e1a132bc9b1519e917791da95037c
Diffstat (limited to 'lix-doc/src')
-rw-r--r--lix-doc/src/lib.rs326
-rw-r--r--lix-doc/src/pprint.rs40
2 files changed, 366 insertions, 0 deletions
diff --git a/lix-doc/src/lib.rs b/lix-doc/src/lib.rs
new file mode 100644
index 000000000..9c2e43f2f
--- /dev/null
+++ b/lix-doc/src/lib.rs
@@ -0,0 +1,326 @@
+// SPDX-FileCopyrightText: 2024 Jade Lovelace
+//
+// SPDX-License-Identifier: BSD-2-Clause OR MIT
+
+//! library components of nix-doc
+pub mod pprint;
+
+use crate::pprint::pprint_args;
+
+use rnix::types::{Lambda, TypedNode};
+use rnix::SyntaxKind::*;
+use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent};
+
+use std::ffi::{CStr, CString};
+use std::fs;
+use std::iter;
+use std::os::raw::c_char;
+use std::panic;
+
+use std::ptr;
+
+use std::{fmt::Display, str};
+
+pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
+
+const DOC_INDENT: usize = 3;
+
+struct SearchResult {
+ /// Name of the function
+ identifier: String,
+
+ /// Dedented documentation comments
+ doc: String,
+
+ /// Parameter block for the function
+ param_block: String,
+}
+
+fn find_pos(file: &str, line: usize, col: usize) -> usize {
+ let mut lines = 1;
+ let mut line_start = 0;
+ let mut it = file.chars().enumerate().peekable();
+ while let Some((count, ch)) = it.next() {
+ if ch == '\n' || ch == '\r' {
+ lines += 1;
+ let addend = if ch == '\r' && it.peek().map(|x| x.1) == Some('\n') {
+ it.next();
+ 1
+ } else {
+ 0
+ };
+ line_start = count + addend;
+ }
+
+ let col_diff = ((count as i32) - (line_start as i32)).abs() as usize;
+ if lines == line && col_diff == col {
+ return count;
+ }
+ }
+ unreachable!();
+}
+
+impl SearchResult {
+ fn format<P: Display>(&self, filename: P, line: usize) -> String {
+ format!(
+ "**Synopsis:** `{}` = {}\n\n{}\n\n# {}",
+ self.identifier.as_str(),
+ self.param_block,
+ indented(&self.doc, DOC_INDENT),
+ format!("{}:{}", filename, line).as_str(),
+ )
+ }
+}
+
+/// Emits a string `s` indented by `indent` spaces
+fn indented(s: &str, indent: usize) -> String {
+ let indent_s = iter::repeat(' ').take(indent).collect::<String>();
+ s.split('\n')
+ .map(|line| indent_s.clone() + line)
+ .collect::<Vec<_>>()
+ .join("\n")
+}
+
+/// Cleans up a single line, erasing prefix single line comments but preserving indentation
+fn cleanup_single_line<'a>(s: &'a str) -> &'a str {
+ let mut cmt_new_start = 0;
+ for (idx, ch) in s.char_indices() {
+ // if we find a character, save the byte position after it as our new string start
+ if ch == '#' || ch == '*' {
+ cmt_new_start = idx + 1;
+ break;
+ }
+ // if, instead, we are on a line with no starting comment characters, leave it alone as it
+ // will be handled by dedent later
+ if !ch.is_whitespace() {
+ break;
+ }
+ }
+ &s[cmt_new_start..]
+}
+
+/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking
+/// at the second line if it can.
+fn dedent_comment(s: &str) -> String {
+ let mut whitespaces = 0;
+ let mut lines = s.lines();
+ let first = lines.next();
+
+ // scan for whitespace
+ for line in lines.chain(first) {
+ let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
+
+ if line_whitespace != line.len() {
+ // a non-whitespace line, perfect for taking whitespace off of
+ whitespaces = line_whitespace;
+ break;
+ }
+ }
+
+ // maybe the first considered line we found was indented further, so let's look for more lines
+ // that might have a shorter indent. In the case of one line, do nothing.
+ for line in s.lines().skip(1) {
+ let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
+
+ if line_whitespace != line.len() {
+ whitespaces = line_whitespace.min(whitespaces);
+ }
+ }
+
+ // delete up to `whitespaces` whitespace characters from each line and reconstitute the string
+ let mut out = String::new();
+ for line in s.lines() {
+ let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0);
+ out.push_str(&line[content_begin.min(whitespaces)..]);
+ out.push('\n');
+ }
+
+ out.truncate(out.trim_end_matches('\n').len());
+ out
+}
+
+/// Deletes whitespace and leading comment characters
+///
+/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a
+/// multiline comment, they will be deleted.
+fn cleanup_comments<S: AsRef<str>, I: DoubleEndedIterator<Item = S>>(comment: &mut I) -> String {
+ dedent_comment(
+ &comment
+ .rev()
+ .map(|small_comment| {
+ small_comment
+ .as_ref()
+ // space before multiline start
+ .trim_start()
+ // multiline starts
+ .trim_start_matches("/*")
+ // trailing so we can grab multiline end
+ .trim_end()
+ // multiline ends
+ .trim_end_matches("*/")
+ // extra space that was in the multiline
+ .trim()
+ .split('\n')
+ // erase single line comments and such
+ .map(cleanup_single_line)
+ .collect::<Vec<_>>()
+ .join("\n")
+ })
+ .collect::<Vec<_>>()
+ .join("\n"),
+ )
+}
+
+/// Get the docs for a specific function
+pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> {
+ let content = fs::read(filename).ok()?;
+ let decoded = str::from_utf8(&content).ok()?;
+ let pos = find_pos(&decoded, line, col);
+ let rowan_pos = TextUnit::from_usize(pos);
+ let tree = rnix::parse(decoded);
+
+ let mut lambda = None;
+ for node in tree.node().preorder() {
+ match node {
+ WalkEvent::Enter(n) => {
+ if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA {
+ lambda = Lambda::cast(n);
+ break;
+ }
+ }
+ WalkEvent::Leave(_) => (),
+ }
+ }
+ let lambda = lambda?;
+ let res = visit_lambda("func".to_string(), &lambda);
+ Some(res.format(filename, line))
+}
+
+fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult {
+ // grab the arguments
+ let param_block = pprint_args(&lambda);
+
+ // find the doc comment
+ let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string());
+
+ SearchResult {
+ identifier: name,
+ doc: comment,
+ param_block
+ }
+}
+
+fn find_comment(node: SyntaxNode) -> Option<String> {
+ let mut node = NodeOrToken::Node(node);
+ let mut comments = Vec::new();
+ loop {
+ loop {
+ if let Some(new) = node.prev_sibling_or_token() {
+ node = new;
+ break;
+ } else {
+ node = NodeOrToken::Node(node.parent()?);
+ }
+ }
+
+ match node.kind() {
+ TOKEN_COMMENT => match &node {
+ NodeOrToken::Token(token) => comments.push(token.text().clone()),
+ NodeOrToken::Node(_) => unreachable!(),
+ },
+ // This stuff is found as part of `the-fn = f: ...`
+ // here: ^^^^^^^^
+ NODE_KEY | TOKEN_ASSIGN => (),
+ t if t.is_trivia() => (),
+ _ => break,
+ }
+ }
+ let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str()));
+ Some(doc).filter(|it| !it.is_empty())
+}
+
+/// Get the docs for a function in the given file path at the given file position and return it as
+/// a C string pointer
+#[no_mangle]
+pub extern "C" fn nd_get_function_docs(
+ filename: *const c_char,
+ line: usize,
+ col: usize,
+ ) -> *const c_char {
+ let fname = unsafe { CStr::from_ptr(filename) };
+ fname
+ .to_str()
+ .ok()
+ .and_then(|f| {
+ panic::catch_unwind(|| get_function_docs(f, line, col))
+ .map_err(|e| {
+ eprintln!("panic!! {:#?}", e);
+ e
+ })
+ .ok()
+ })
+ .flatten()
+ .and_then(|s| CString::new(s).ok())
+ .map(|s| s.into_raw() as *const c_char)
+ .unwrap_or(ptr::null())
+}
+
+/// Call this to free a string from nd_get_function_docs
+#[no_mangle]
+pub extern "C" fn nd_free_string(s: *const c_char) {
+ unsafe {
+ // cast note: this cast is turning something that was cast to const
+ // back to mut
+ drop(CString::from_raw(s as *mut c_char));
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_bytepos() {
+ let fakefile = "abc\ndef\nghi";
+ assert_eq!(find_pos(fakefile, 2, 2), 5);
+ }
+
+ #[test]
+ fn test_bytepos_cursed() {
+ let fakefile = "abc\rdef\r\nghi";
+ assert_eq!(find_pos(fakefile, 2, 2), 5);
+ assert_eq!(find_pos(fakefile, 3, 2), 10);
+ }
+
+ #[test]
+ fn test_comment_stripping() {
+ let ex1 = ["/* blah blah blah\n foooo baaar\n blah */"];
+ assert_eq!(
+ cleanup_comments(&mut ex1.iter()),
+ "blah blah blah\n foooo baaar\nblah"
+ );
+
+ let ex2 = ["# a1", "# a2", "# aa"];
+ assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n a2\na1");
+ }
+
+ #[test]
+ fn test_dedent() {
+ let ex1 = "a\n b\n c\n d";
+ assert_eq!(dedent_comment(ex1), "a\nb\nc\n d");
+ let ex2 = "a\nb\nc";
+ assert_eq!(dedent_comment(ex2), ex2);
+ let ex3 = " a\n b\n\n c";
+ assert_eq!(dedent_comment(ex3), "a\nb\n\n c");
+ }
+
+ #[test]
+ fn test_single_line_comment_stripping() {
+ let ex1 = " * a";
+ let ex2 = " # a";
+ let ex3 = " a";
+ assert_eq!(cleanup_single_line(ex1), " a");
+ assert_eq!(cleanup_single_line(ex2), " a");
+ assert_eq!(cleanup_single_line(ex3), ex3);
+ }
+}
diff --git a/lix-doc/src/pprint.rs b/lix-doc/src/pprint.rs
new file mode 100644
index 000000000..7e73d2d20
--- /dev/null
+++ b/lix-doc/src/pprint.rs
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: 2024 Jade Lovelace
+//
+// SPDX-License-Identifier: BSD-2-Clause OR MIT
+
+use rnix::types::{Lambda, TypedNode};
+use rnix::SyntaxKind::*;
+
+/// Pretty-prints the arguments to a function
+pub fn pprint_args(lambda: &Lambda) -> String {
+ // TODO: handle docs directly on NODE_IDENT args (uncommon case)
+ let mut lambda = lambda.clone();
+ let mut out = String::new();
+ loop {
+ let arg = lambda.arg().unwrap();
+ match arg.kind() {
+ NODE_IDENT => {
+ out += &format!("*{}*", &arg.to_string());
+ out.push_str(": ");
+ let body = lambda.body().unwrap();
+ if body.kind() == NODE_LAMBDA {
+ lambda = Lambda::cast(body).unwrap();
+ } else {
+ break;
+ }
+ }
+ NODE_PATTERN => {
+ out += &format!("*{}*", &arg.to_string());
+ out.push_str(": ");
+ break;
+ }
+ t => {
+ unreachable!("unhandled arg type {:?}", t);
+ }
+ }
+ }
+ out.push_str("...");
+ out
+
+ //pprint_arg(lambda.arg());
+}