init: archlens — architecture diagram generator
Some checks failed
CI / Check / Test (push) Failing after 1m24s
Some checks failed
CI / Check / Test (push) Failing after 1m24s
Hex arch + DDD, tree-sitter parsing, Mermaid/ASCII output. Supports Rust + Python. 92 tests. CI, diff, --check for staleness detection.
This commit is contained in:
337
crates/adapters/tree-sitter/src/python/mod.rs
Normal file
337
crates/adapters/tree-sitter/src/python/mod.rs
Normal file
@@ -0,0 +1,337 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use tree_sitter::{Node, Parser};
|
||||
|
||||
use archlens_domain::{
|
||||
AnalysisResult, AnalysisWarning, CodeElement, CodeElementKind, DomainError, FilePath,
|
||||
Relationship, RelationshipKind,
|
||||
};
|
||||
|
||||
pub fn analyze(source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> {
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.map_err(|e| DomainError::AnalysisError(e.to_string()))?;
|
||||
|
||||
let tree = parser
|
||||
.parse(source, None)
|
||||
.ok_or_else(|| DomainError::AnalysisError("failed to parse".to_string()))?;
|
||||
|
||||
let mut elements = Vec::new();
|
||||
let mut relationships = Vec::new();
|
||||
let mut warnings = Vec::new();
|
||||
let mut type_names: HashSet<String> = HashSet::new();
|
||||
|
||||
let root = tree.root_node();
|
||||
collect_classes(
|
||||
&root,
|
||||
source,
|
||||
file_path,
|
||||
&mut elements,
|
||||
&mut type_names,
|
||||
&mut relationships,
|
||||
&mut warnings,
|
||||
);
|
||||
collect_imports(&root, source, file_path, &mut relationships);
|
||||
|
||||
let relationships = relationships
|
||||
.into_iter()
|
||||
.map(|r| r.with_source_file(file_path.clone()))
|
||||
.collect();
|
||||
|
||||
Ok(AnalysisResult::new(elements, relationships, warnings))
|
||||
}
|
||||
|
||||
fn collect_classes(
|
||||
node: &Node,
|
||||
source: &str,
|
||||
file_path: &FilePath,
|
||||
elements: &mut Vec<CodeElement>,
|
||||
type_names: &mut HashSet<String>,
|
||||
relationships: &mut Vec<Relationship>,
|
||||
warnings: &mut Vec<AnalysisWarning>,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() != "class_definition" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(name_node) = child.child_by_field_name("name") else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let name = &source[name_node.byte_range()];
|
||||
let line = child.start_position().row + 1;
|
||||
|
||||
match CodeElement::new(name, CodeElementKind::Class, file_path.clone(), line) {
|
||||
Ok(element) => {
|
||||
type_names.insert(name.to_string());
|
||||
elements.push(element);
|
||||
}
|
||||
Err(e) => {
|
||||
if let Ok(w) = AnalysisWarning::new(file_path.clone(), line, &e.to_string()) {
|
||||
warnings.push(w);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(superclasses) = child.child_by_field_name("superclasses") {
|
||||
collect_inheritance(&superclasses, source, name, type_names, relationships);
|
||||
}
|
||||
|
||||
if let Some(body) = child.child_by_field_name("body") {
|
||||
collect_typed_fields(&body, source, name, type_names, relationships);
|
||||
collect_constructor_params(&body, source, name, type_names, relationships);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_inheritance(
|
||||
superclasses: &Node,
|
||||
source: &str,
|
||||
class_name: &str,
|
||||
_type_names: &HashSet<String>,
|
||||
relationships: &mut Vec<Relationship>,
|
||||
) {
|
||||
let mut cursor = superclasses.walk();
|
||||
for child in superclasses.children(&mut cursor) {
|
||||
if child.kind() == "identifier" {
|
||||
let base_name = &source[child.byte_range()];
|
||||
if !is_python_builtin(base_name)
|
||||
&& let Ok(rel) =
|
||||
Relationship::new(class_name, base_name, RelationshipKind::Inheritance)
|
||||
{
|
||||
relationships.push(rel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const PYTHON_BUILTINS: &[&str] = &[
|
||||
"str",
|
||||
"int",
|
||||
"float",
|
||||
"bool",
|
||||
"bytes",
|
||||
"list",
|
||||
"dict",
|
||||
"set",
|
||||
"tuple",
|
||||
"None",
|
||||
"type",
|
||||
"object",
|
||||
"Exception",
|
||||
"BaseException",
|
||||
"Optional",
|
||||
"Any",
|
||||
"Union",
|
||||
"List",
|
||||
"Dict",
|
||||
"Set",
|
||||
"Tuple",
|
||||
"Callable",
|
||||
"Sequence",
|
||||
"Mapping",
|
||||
"Iterable",
|
||||
"Iterator",
|
||||
"Generator",
|
||||
"Coroutine",
|
||||
"AsyncGenerator",
|
||||
"ClassVar",
|
||||
"Final",
|
||||
"Literal",
|
||||
"TypeVar",
|
||||
"Generic",
|
||||
"Protocol",
|
||||
"runtime_checkable",
|
||||
"Self",
|
||||
];
|
||||
|
||||
fn is_python_builtin(name: &str) -> bool {
|
||||
PYTHON_BUILTINS.contains(&name)
|
||||
}
|
||||
|
||||
const STDLIB_MODULES: &[&str] = &[
|
||||
"os",
|
||||
"sys",
|
||||
"typing",
|
||||
"logging",
|
||||
"json",
|
||||
"re",
|
||||
"io",
|
||||
"abc",
|
||||
"collections",
|
||||
"datetime",
|
||||
"enum",
|
||||
"functools",
|
||||
"hashlib",
|
||||
"http",
|
||||
"importlib",
|
||||
"inspect",
|
||||
"itertools",
|
||||
"math",
|
||||
"pathlib",
|
||||
"pickle",
|
||||
"random",
|
||||
"shutil",
|
||||
"signal",
|
||||
"socket",
|
||||
"string",
|
||||
"subprocess",
|
||||
"tempfile",
|
||||
"threading",
|
||||
"time",
|
||||
"traceback",
|
||||
"unittest",
|
||||
"urllib",
|
||||
"uuid",
|
||||
"warnings",
|
||||
"contextlib",
|
||||
"dataclasses",
|
||||
"copy",
|
||||
"struct",
|
||||
"base64",
|
||||
"csv",
|
||||
"glob",
|
||||
"operator",
|
||||
"textwrap",
|
||||
"asyncio",
|
||||
"concurrent",
|
||||
"multiprocessing",
|
||||
];
|
||||
|
||||
fn is_external_import(module: &str) -> bool {
|
||||
let top = module.split('.').next().unwrap_or(module);
|
||||
if STDLIB_MODULES.contains(&top) {
|
||||
return true;
|
||||
}
|
||||
if top.starts_with('_') {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn collect_imports(
|
||||
node: &Node,
|
||||
source: &str,
|
||||
file_path: &FilePath,
|
||||
relationships: &mut Vec<Relationship>,
|
||||
) {
|
||||
let file_name = std::path::Path::new(file_path.as_str())
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"import_statement" => {
|
||||
let mut name_cursor = child.walk();
|
||||
for name_child in child.children(&mut name_cursor) {
|
||||
if name_child.kind() == "dotted_name" {
|
||||
let module = &source[name_child.byte_range()];
|
||||
if !is_external_import(module)
|
||||
&& let Ok(rel) =
|
||||
Relationship::new(file_name, module, RelationshipKind::Import)
|
||||
{
|
||||
relationships.push(rel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"import_from_statement" => {
|
||||
if let Some(module_node) = child.child_by_field_name("module_name") {
|
||||
let module = &source[module_node.byte_range()];
|
||||
if !is_external_import(module)
|
||||
&& let Ok(rel) =
|
||||
Relationship::new(file_name, module, RelationshipKind::Import)
|
||||
{
|
||||
relationships.push(rel);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_constructor_params(
|
||||
body: &Node,
|
||||
source: &str,
|
||||
class_name: &str,
|
||||
_type_names: &HashSet<String>,
|
||||
relationships: &mut Vec<Relationship>,
|
||||
) {
|
||||
let mut cursor = body.walk();
|
||||
for child in body.children(&mut cursor) {
|
||||
if child.kind() != "function_definition" {
|
||||
continue;
|
||||
}
|
||||
let Some(fn_name) = child.child_by_field_name("name") else {
|
||||
continue;
|
||||
};
|
||||
if &source[fn_name.byte_range()] != "__init__" {
|
||||
continue;
|
||||
}
|
||||
let Some(params) = child.child_by_field_name("parameters") else {
|
||||
continue;
|
||||
};
|
||||
let mut param_cursor = params.walk();
|
||||
for param in params.children(&mut param_cursor) {
|
||||
if param.kind() == "typed_parameter"
|
||||
&& let Some(type_node) = param.child_by_field_name("type")
|
||||
{
|
||||
let type_text = &source[type_node.byte_range()];
|
||||
let base_type = type_text.split('[').next().unwrap_or(type_text).trim();
|
||||
if base_type != class_name
|
||||
&& !base_type.is_empty()
|
||||
&& !is_python_builtin(base_type)
|
||||
&& let Ok(rel) =
|
||||
Relationship::new(class_name, base_type, RelationshipKind::Composition)
|
||||
{
|
||||
relationships.push(rel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_typed_fields(
|
||||
body: &Node,
|
||||
source: &str,
|
||||
class_name: &str,
|
||||
type_names: &HashSet<String>,
|
||||
relationships: &mut Vec<Relationship>,
|
||||
) {
|
||||
collect_typed_fields_recursive(body, source, class_name, type_names, relationships);
|
||||
}
|
||||
|
||||
fn collect_typed_fields_recursive(
|
||||
node: &Node,
|
||||
source: &str,
|
||||
class_name: &str,
|
||||
_type_names: &HashSet<String>,
|
||||
relationships: &mut Vec<Relationship>,
|
||||
) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if (child.kind() == "assignment" || child.kind() == "typed_assignment")
|
||||
&& let Some(type_node) = child.child_by_field_name("type")
|
||||
{
|
||||
let type_text = &source[type_node.byte_range()];
|
||||
let base_type = type_text.split('[').next().unwrap_or(type_text).trim();
|
||||
if base_type != class_name
|
||||
&& !base_type.is_empty()
|
||||
&& !is_python_builtin(base_type)
|
||||
&& let Ok(rel) =
|
||||
Relationship::new(class_name, base_type, RelationshipKind::Composition)
|
||||
{
|
||||
relationships.push(rel);
|
||||
}
|
||||
}
|
||||
|
||||
collect_typed_fields_recursive(&child, source, class_name, _type_names, relationships);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user