diff options
author | Jade Lovelace <lix@jade.fyi> | 2024-04-07 16:16:21 -0700 |
---|---|---|
committer | jade <lix@jade.fyi> | 2024-04-08 04:05:13 +0000 |
commit | 0145d45806b1f60e9c7221a1f063a1b336a98eb4 (patch) | |
tree | cfe9ec77f0323da70fb297667c7df14ab137fcf5 /lix-doc/src/lib.rs | |
parent | b995c17f0eb8d9598f339c080c467101c1f55feb (diff) |
nix-doc -> lix-doc, make self-contained in package.nix
package.nix previously needed this callPackage'd externally, which
didn't make a lot of sense to us since this is an internal dependency.
Thus we changed it to make it more self contained.
Change-Id: I4935bc0bc80e1a132bc9b1519e917791da95037c
Diffstat (limited to 'lix-doc/src/lib.rs')
-rw-r--r-- | lix-doc/src/lib.rs | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/lix-doc/src/lib.rs b/lix-doc/src/lib.rs new file mode 100644 index 000000000..9c2e43f2f --- /dev/null +++ b/lix-doc/src/lib.rs @@ -0,0 +1,326 @@ +// SPDX-FileCopyrightText: 2024 Jade Lovelace +// +// SPDX-License-Identifier: BSD-2-Clause OR MIT + +//! library components of nix-doc +pub mod pprint; + +use crate::pprint::pprint_args; + +use rnix::types::{Lambda, TypedNode}; +use rnix::SyntaxKind::*; +use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent}; + +use std::ffi::{CStr, CString}; +use std::fs; +use std::iter; +use std::os::raw::c_char; +use std::panic; + +use std::ptr; + +use std::{fmt::Display, str}; + +pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; + +const DOC_INDENT: usize = 3; + +struct SearchResult { + /// Name of the function + identifier: String, + + /// Dedented documentation comments + doc: String, + + /// Parameter block for the function + param_block: String, +} + +fn find_pos(file: &str, line: usize, col: usize) -> usize { + let mut lines = 1; + let mut line_start = 0; + let mut it = file.chars().enumerate().peekable(); + while let Some((count, ch)) = it.next() { + if ch == '\n' || ch == '\r' { + lines += 1; + let addend = if ch == '\r' && it.peek().map(|x| x.1) == Some('\n') { + it.next(); + 1 + } else { + 0 + }; + line_start = count + addend; + } + + let col_diff = ((count as i32) - (line_start as i32)).abs() as usize; + if lines == line && col_diff == col { + return count; + } + } + unreachable!(); +} + +impl SearchResult { + fn format<P: Display>(&self, filename: P, line: usize) -> String { + format!( + "**Synopsis:** `{}` = {}\n\n{}\n\n# {}", + self.identifier.as_str(), + self.param_block, + indented(&self.doc, DOC_INDENT), + format!("{}:{}", filename, line).as_str(), + ) + } +} + +/// Emits a string `s` indented by `indent` spaces +fn indented(s: &str, indent: usize) -> String { + let indent_s = iter::repeat(' ').take(indent).collect::<String>(); + s.split('\n') + .map(|line| indent_s.clone() + line) + .collect::<Vec<_>>() + .join("\n") +} + +/// Cleans up a single line, erasing prefix single line comments but preserving indentation +fn cleanup_single_line<'a>(s: &'a str) -> &'a str { + let mut cmt_new_start = 0; + for (idx, ch) in s.char_indices() { + // if we find a character, save the byte position after it as our new string start + if ch == '#' || ch == '*' { + cmt_new_start = idx + 1; + break; + } + // if, instead, we are on a line with no starting comment characters, leave it alone as it + // will be handled by dedent later + if !ch.is_whitespace() { + break; + } + } + &s[cmt_new_start..] +} + +/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking +/// at the second line if it can. +fn dedent_comment(s: &str) -> String { + let mut whitespaces = 0; + let mut lines = s.lines(); + let first = lines.next(); + + // scan for whitespace + for line in lines.chain(first) { + let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count(); + + if line_whitespace != line.len() { + // a non-whitespace line, perfect for taking whitespace off of + whitespaces = line_whitespace; + break; + } + } + + // maybe the first considered line we found was indented further, so let's look for more lines + // that might have a shorter indent. In the case of one line, do nothing. + for line in s.lines().skip(1) { + let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count(); + + if line_whitespace != line.len() { + whitespaces = line_whitespace.min(whitespaces); + } + } + + // delete up to `whitespaces` whitespace characters from each line and reconstitute the string + let mut out = String::new(); + for line in s.lines() { + let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0); + out.push_str(&line[content_begin.min(whitespaces)..]); + out.push('\n'); + } + + out.truncate(out.trim_end_matches('\n').len()); + out +} + +/// Deletes whitespace and leading comment characters +/// +/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a +/// multiline comment, they will be deleted. +fn cleanup_comments<S: AsRef<str>, I: DoubleEndedIterator<Item = S>>(comment: &mut I) -> String { + dedent_comment( + &comment + .rev() + .map(|small_comment| { + small_comment + .as_ref() + // space before multiline start + .trim_start() + // multiline starts + .trim_start_matches("/*") + // trailing so we can grab multiline end + .trim_end() + // multiline ends + .trim_end_matches("*/") + // extra space that was in the multiline + .trim() + .split('\n') + // erase single line comments and such + .map(cleanup_single_line) + .collect::<Vec<_>>() + .join("\n") + }) + .collect::<Vec<_>>() + .join("\n"), + ) +} + +/// Get the docs for a specific function +pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> { + let content = fs::read(filename).ok()?; + let decoded = str::from_utf8(&content).ok()?; + let pos = find_pos(&decoded, line, col); + let rowan_pos = TextUnit::from_usize(pos); + let tree = rnix::parse(decoded); + + let mut lambda = None; + for node in tree.node().preorder() { + match node { + WalkEvent::Enter(n) => { + if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA { + lambda = Lambda::cast(n); + break; + } + } + WalkEvent::Leave(_) => (), + } + } + let lambda = lambda?; + let res = visit_lambda("func".to_string(), &lambda); + Some(res.format(filename, line)) +} + +fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult { + // grab the arguments + let param_block = pprint_args(&lambda); + + // find the doc comment + let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string()); + + SearchResult { + identifier: name, + doc: comment, + param_block + } +} + +fn find_comment(node: SyntaxNode) -> Option<String> { + let mut node = NodeOrToken::Node(node); + let mut comments = Vec::new(); + loop { + loop { + if let Some(new) = node.prev_sibling_or_token() { + node = new; + break; + } else { + node = NodeOrToken::Node(node.parent()?); + } + } + + match node.kind() { + TOKEN_COMMENT => match &node { + NodeOrToken::Token(token) => comments.push(token.text().clone()), + NodeOrToken::Node(_) => unreachable!(), + }, + // This stuff is found as part of `the-fn = f: ...` + // here: ^^^^^^^^ + NODE_KEY | TOKEN_ASSIGN => (), + t if t.is_trivia() => (), + _ => break, + } + } + let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str())); + Some(doc).filter(|it| !it.is_empty()) +} + +/// Get the docs for a function in the given file path at the given file position and return it as +/// a C string pointer +#[no_mangle] +pub extern "C" fn nd_get_function_docs( + filename: *const c_char, + line: usize, + col: usize, + ) -> *const c_char { + let fname = unsafe { CStr::from_ptr(filename) }; + fname + .to_str() + .ok() + .and_then(|f| { + panic::catch_unwind(|| get_function_docs(f, line, col)) + .map_err(|e| { + eprintln!("panic!! {:#?}", e); + e + }) + .ok() + }) + .flatten() + .and_then(|s| CString::new(s).ok()) + .map(|s| s.into_raw() as *const c_char) + .unwrap_or(ptr::null()) +} + +/// Call this to free a string from nd_get_function_docs +#[no_mangle] +pub extern "C" fn nd_free_string(s: *const c_char) { + unsafe { + // cast note: this cast is turning something that was cast to const + // back to mut + drop(CString::from_raw(s as *mut c_char)); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bytepos() { + let fakefile = "abc\ndef\nghi"; + assert_eq!(find_pos(fakefile, 2, 2), 5); + } + + #[test] + fn test_bytepos_cursed() { + let fakefile = "abc\rdef\r\nghi"; + assert_eq!(find_pos(fakefile, 2, 2), 5); + assert_eq!(find_pos(fakefile, 3, 2), 10); + } + + #[test] + fn test_comment_stripping() { + let ex1 = ["/* blah blah blah\n foooo baaar\n blah */"]; + assert_eq!( + cleanup_comments(&mut ex1.iter()), + "blah blah blah\n foooo baaar\nblah" + ); + + let ex2 = ["# a1", "# a2", "# aa"]; + assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n a2\na1"); + } + + #[test] + fn test_dedent() { + let ex1 = "a\n b\n c\n d"; + assert_eq!(dedent_comment(ex1), "a\nb\nc\n d"); + let ex2 = "a\nb\nc"; + assert_eq!(dedent_comment(ex2), ex2); + let ex3 = " a\n b\n\n c"; + assert_eq!(dedent_comment(ex3), "a\nb\n\n c"); + } + + #[test] + fn test_single_line_comment_stripping() { + let ex1 = " * a"; + let ex2 = " # a"; + let ex3 = " a"; + assert_eq!(cleanup_single_line(ex1), " a"); + assert_eq!(cleanup_single_line(ex2), " a"); + assert_eq!(cleanup_single_line(ex3), ex3); + } +} |