diff --git a/Cargo.lock b/Cargo.lock index fd1358e..849d4b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -256,6 +256,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cfg-if" version = "1.0.0" @@ -332,6 +338,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -365,6 +381,7 @@ dependencies = [ "axum", "clap", "futures", + "html2md", "hyper 0.14.32", "mcp-core", "mcp-macros", @@ -485,6 +502,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -653,6 +680,34 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "html2md" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cff9891f2e0d9048927fbdfc28b11bf378f6a93c7ba70b23d0fbee9af6071b4" +dependencies = [ + "html5ever", + "jni", + "lazy_static", + "markup5ever_rcdom", + "percent-encoding", + "regex", +] + +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "http" version = "0.2.12" @@ -984,6 +1039,26 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jni" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec" +dependencies = [ + "cesu8", + "combine", + "jni-sys", + "log", + "thiserror", + "walkdir", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + [[package]] name = "js-sys" version = "0.3.77" @@ -1034,6 +1109,38 @@ version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + [[package]] name = "matchers" version = "0.1.0" @@ -1177,6 +1284,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1296,6 +1409,44 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.10" @@ -1349,6 +1500,12 @@ dependencies = [ "zerocopy 0.7.35", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.94" @@ -1560,6 +1717,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.27" @@ -1717,6 +1883,12 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.9" @@ -1748,6 +1920,31 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "string_cache" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938d512196766101d333398efde81bc1f37b00cb42c2f8350e5df639f040bbbe" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "strsim" version = "0.11.1" @@ -1823,6 +2020,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -2093,6 +2301,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf16_iter" version = "1.0.5" @@ -2123,6 +2337,16 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2244,6 +2468,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -2444,6 +2677,17 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "xml5ever" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69" +dependencies = [ + "log", + "mac", + "markup5ever", +] + [[package]] name = "yoke" version = "0.7.5" diff --git a/Cargo.toml b/Cargo.toml index a7e1e7c..c4a2f9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ anyhow = "1.0" futures = "0.3" rand = "0.8" clap = { version = "4.4", features = ["derive"] } +html2md = "0.2.14" [dev-dependencies] # Testing utilities diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index 530ad44..6dafea4 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -1,16 +1,19 @@ use anyhow::Result; use clap::{Parser, Subcommand}; use cratedocs_mcp::tools::DocRouter; +use mcp_core::Content; use mcp_server::router::RouterService; -use mcp_server::{ByteTransport, Server}; +use mcp_server::{ByteTransport, Router, Server}; +use serde_json::json; use std::net::SocketAddr; use tokio::io::{stdin, stdout}; use tracing_appender::rolling::{RollingFileAppender, Rotation}; use tracing_subscriber::{self, EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; #[derive(Parser)] -#[command(author, version, about, long_about = None)] +#[command(author, version = "0.1.0", about, long_about = None)] #[command(propagate_version = true)] +#[command(disable_version_flag = true)] struct Cli { #[command(subcommand)] command: Commands, @@ -30,6 +33,36 @@ enum Commands { #[arg(short, long, default_value = "127.0.0.1:8080")] address: String, + /// Enable debug logging + #[arg(short, long)] + debug: bool, + }, + /// Test tools directly from the CLI + Test { + /// The tool to test (lookup_crate, search_crates, lookup_item) + #[arg(long, default_value = "lookup_crate")] + tool: String, + + /// Crate name for lookup_crate and lookup_item + #[arg(long)] + crate_name: Option, + + /// Item path for lookup_item (e.g., std::vec::Vec) + #[arg(long)] + item_path: Option, + + /// Search query for search_crates + #[arg(long)] + query: Option, + + /// Crate version (optional) + #[arg(long)] + version: Option, + + /// Result limit for search_crates + #[arg(long)] + limit: Option, + /// Enable debug logging #[arg(short, long)] debug: bool, @@ -43,6 +76,15 @@ async fn main() -> Result<()> { match cli.command { Commands::Stdio { debug } => run_stdio_server(debug).await, Commands::Http { address, debug } => run_http_server(address, debug).await, + Commands::Test { + tool, + crate_name, + item_path, + query, + version, + limit, + debug + } => run_test_tool(tool, crate_name, item_path, query, version, limit, debug).await, } } @@ -98,5 +140,113 @@ async fn run_http_server(address: String, debug: bool) -> Result<()> { let app = cratedocs_mcp::transport::http_sse_server::App::new(); axum::serve(listener, app.router()).await?; + Ok(()) +} + +/// Run a direct test of a documentation tool from the CLI +async fn run_test_tool( + tool: String, + crate_name: Option, + item_path: Option, + query: Option, + version: Option, + limit: Option, + debug: bool, +) -> Result<()> { + // Print help information if the tool is "help" + if tool == "help" { + println!("CrateDocs CLI Tool Tester\n"); + println!("Usage examples:"); + println!(" cargo run --bin cratedocs -- test --tool lookup_crate --crate-name serde"); + println!(" cargo run --bin cratedocs -- test --tool lookup_crate --crate-name tokio --version 1.35.0"); + println!(" cargo run --bin cratedocs -- test --tool search_crates --query logger\n"); + println!("Available tools:"); + println!(" lookup_crate - Look up documentation for a Rust crate"); + println!(" lookup_item - Look up documentation for a specific item in a crate"); + println!(" search_crates - Search for crates on crates.io"); + println!(" help - Show this help information\n"); + return Ok(()); + } + // Set up console logging + let level = if debug { tracing::Level::DEBUG } else { tracing::Level::INFO }; + + tracing_subscriber::fmt() + .with_max_level(level) + .without_time() + .with_target(false) + .init(); + + // Create router instance + let router = DocRouter::new(); + + tracing::info!("Testing tool: {}", tool); + + // Prepare arguments based on the tool being tested + let arguments = match tool.as_str() { + "lookup_crate" => { + let crate_name = crate_name.ok_or_else(|| + anyhow::anyhow!("--crate-name is required for lookup_crate tool"))?; + + json!({ + "crate_name": crate_name, + "version": version, + }) + }, + "lookup_item" => { + let crate_name = crate_name.ok_or_else(|| + anyhow::anyhow!("--crate-name is required for lookup_item tool"))?; + let item_path = item_path.ok_or_else(|| + anyhow::anyhow!("--item-path is required for lookup_item tool"))?; + + json!({ + "crate_name": crate_name, + "item_path": item_path, + "version": version, + }) + }, + "search_crates" => { + let query = query.ok_or_else(|| + anyhow::anyhow!("--query is required for search_crates tool"))?; + + json!({ + "query": query, + "limit": limit, + }) + }, + _ => return Err(anyhow::anyhow!("Unknown tool: {}", tool)), + }; + + // Call the tool and get results + tracing::debug!("Calling {} with arguments: {}", tool, arguments); + println!("Executing {} tool...", tool); + + let result = match router.call_tool(&tool, arguments).await { + Ok(result) => result, + Err(e) => { + eprintln!("\nERROR: {}", e); + eprintln!("\nTip: The direct item lookup may require very specific path formats. Try these commands instead:"); + eprintln!(" - For crate docs: cargo run --bin cratedocs -- test --tool lookup_crate --crate-name tokio"); + eprintln!(" - For crate docs with version: cargo run --bin cratedocs -- test --tool lookup_crate --crate-name serde --version 1.0.147"); + return Ok(()); + } + }; + + // Print results + if !result.is_empty() { + for content in result { + match content { + Content::Text(text) => { + println!("\n--- TOOL RESULT ---\n"); + // Access the raw string from TextContent.text field + println!("{}", text.text); + println!("\n--- END RESULT ---"); + }, + _ => println!("Received non-text content"), + } + } + } else { + println!("Tool returned no results"); + } + Ok(()) } \ No newline at end of file diff --git a/src/tools/docs/docs.rs b/src/tools/docs/docs.rs index d67694e..c46e7a2 100644 --- a/src/tools/docs/docs.rs +++ b/src/tools/docs/docs.rs @@ -10,6 +10,7 @@ use mcp_server::router::CapabilitiesBuilder; use reqwest::Client; use serde_json::{json, Value}; use tokio::sync::Mutex; +use html2md::parse_html; // Cache for documentation lookups to avoid repeated requests #[derive(Clone)] @@ -93,14 +94,17 @@ impl DocRouter { ))); } - let body = response.text().await.map_err(|e| { + let html_body = response.text().await.map_err(|e| { ToolError::ExecutionError(format!("Failed to read response body: {}", e)) })?; - - // Cache the result - self.cache.set(cache_key, body.clone()).await; - Ok(body) + // Convert HTML to markdown + let markdown_body = parse_html(&html_body); + + // Cache the markdown result + self.cache.set(cache_key, markdown_body.clone()).await; + + Ok(markdown_body) } // Search crates.io for crates matching a query @@ -124,7 +128,14 @@ impl DocRouter { ToolError::ExecutionError(format!("Failed to read response body: {}", e)) })?; - Ok(body) + // Check if response is JSON (API response) or HTML (web page) + if body.trim().starts_with('{') { + // This is likely JSON data, return as is + Ok(body) + } else { + // This is likely HTML, convert to markdown + Ok(parse_html(&body)) + } } // Get documentation for a specific item in a crate @@ -159,14 +170,17 @@ impl DocRouter { ))); } - let body = response.text().await.map_err(|e| { + let html_body = response.text().await.map_err(|e| { ToolError::ExecutionError(format!("Failed to read response body: {}", e)) })?; - - // Cache the result - self.cache.set(cache_key, body.clone()).await; - Ok(body) + // Convert HTML to markdown + let markdown_body = parse_html(&html_body); + + // Cache the markdown result + self.cache.set(cache_key, markdown_body.clone()).await; + + Ok(markdown_body) } } @@ -176,10 +190,11 @@ impl mcp_server::Router for DocRouter { } fn instructions(&self) -> String { - "This server provides tools for looking up Rust crate documentation. \ + "This server provides tools for looking up Rust crate documentation in markdown format. \ You can search for crates, lookup documentation for specific crates or \ items within crates. Use these tools to find information about Rust libraries \ - you are not familiar with.".to_string() + you are not familiar with. All HTML documentation is automatically converted to markdown \ + for better compatibility with language models.".to_string() } fn capabilities(&self) -> ServerCapabilities { @@ -194,7 +209,7 @@ impl mcp_server::Router for DocRouter { vec![ Tool::new( "lookup_crate".to_string(), - "Look up documentation for a Rust crate".to_string(), + "Look up documentation for a Rust crate (returns markdown)".to_string(), json!({ "type": "object", "properties": { @@ -212,7 +227,7 @@ impl mcp_server::Router for DocRouter { ), Tool::new( "search_crates".to_string(), - "Search for Rust crates on crates.io".to_string(), + "Search for Rust crates on crates.io (returns JSON or markdown)".to_string(), json!({ "type": "object", "properties": { @@ -230,7 +245,7 @@ impl mcp_server::Router for DocRouter { ), Tool::new( "lookup_item".to_string(), - "Look up documentation for a specific item in a Rust crate".to_string(), + "Look up documentation for a specific item in a Rust crate (returns markdown)".to_string(), json!({ "type": "object", "properties": {