diff --git a/crates/adapters/tree-sitter/src/language_extractor.rs b/crates/adapters/tree-sitter/src/language_extractor.rs index 4f17008..5808780 100644 --- a/crates/adapters/tree-sitter/src/language_extractor.rs +++ b/crates/adapters/tree-sitter/src/language_extractor.rs @@ -1,5 +1,39 @@ +use tree_sitter::{Node, Parser}; + use archlens_domain::{AnalysisResult, DomainError, FilePath}; +use crate::extraction_context::ExtractionContext; + pub trait LanguageExtractor { - fn analyze(&self, source: &str, file_path: &FilePath) -> Result; + fn tree_sitter_language(&self) -> tree_sitter::Language; + + fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext); + + fn extract_relationships(&self, root: &Node, source: &str, ctx: &mut ExtractionContext); + + fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext); +} + +pub fn run_extraction( + extractor: &dyn LanguageExtractor, + source: &str, + file_path: &FilePath, +) -> Result { + let mut parser = Parser::new(); + parser + .set_language(&extractor.tree_sitter_language().into()) + .map_err(|e| DomainError::AnalysisError(e.to_string()))?; + + let tree = parser + .parse(source, None) + .ok_or_else(|| DomainError::AnalysisError("failed to parse source".into()))?; + + let mut ctx = ExtractionContext::new(file_path.clone()); + let root = tree.root_node(); + + extractor.extract_types(&root, source, &mut ctx); + extractor.extract_relationships(&root, source, &mut ctx); + extractor.extract_imports(&root, source, &mut ctx); + + ctx.into_result() } diff --git a/crates/adapters/tree-sitter/src/python/mod.rs b/crates/adapters/tree-sitter/src/python/mod.rs index b9a1806..0a80b3a 100644 --- a/crates/adapters/tree-sitter/src/python/mod.rs +++ b/crates/adapters/tree-sitter/src/python/mod.rs @@ -1,9 +1,6 @@ -use tree_sitter::{Node, Parser}; +use tree_sitter::Node; -use archlens_domain::{ - AnalysisResult, CodeElement, CodeElementKind, DomainError, FilePath, Relationship, - RelationshipKind, -}; +use archlens_domain::{CodeElement, CodeElementKind, Relationship, RelationshipKind}; use crate::extraction_context::ExtractionContext; use crate::language_extractor::LanguageExtractor; @@ -11,28 +8,23 @@ use crate::language_extractor::LanguageExtractor; pub struct PythonExtractor; impl LanguageExtractor for PythonExtractor { - fn analyze(&self, source: &str, file_path: &FilePath) -> Result { - analyze(source, file_path) + fn tree_sitter_language(&self) -> tree_sitter::Language { + tree_sitter_python::LANGUAGE.into() } -} -pub fn analyze(source: &str, file_path: &FilePath) -> Result { - let mut parser = Parser::new(); - parser - .set_language(&tree_sitter_python::LANGUAGE.into()) - .map_err(|e| DomainError::AnalysisError(e.to_string()))?; + fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) { + // collect_classes handles class elements, inheritance, and field compositions + // in a single pass — Python's relationship extraction is interleaved with type extraction + collect_classes(root, source, ctx); + } - let tree = parser - .parse(source, None) - .ok_or_else(|| DomainError::AnalysisError("failed to parse".to_string()))?; + fn extract_relationships(&self, _root: &Node, _source: &str, _ctx: &mut ExtractionContext) { + // Relationships are collected inside collect_classes for Python + } - let mut ctx = ExtractionContext::new(file_path.clone()); - let root = tree.root_node(); - - collect_classes(&root, source, &mut ctx); - collect_imports(&root, source, &mut ctx); - - ctx.into_result() + fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) { + collect_imports(root, source, ctx); + } } fn collect_classes(node: &Node, source: &str, ctx: &mut ExtractionContext) { diff --git a/crates/adapters/tree-sitter/src/rust/mod.rs b/crates/adapters/tree-sitter/src/rust/mod.rs index 276c9c7..b9ca748 100644 --- a/crates/adapters/tree-sitter/src/rust/mod.rs +++ b/crates/adapters/tree-sitter/src/rust/mod.rs @@ -1,4 +1,4 @@ -use tree_sitter::{Node, Parser}; +use tree_sitter::Node; const RUST_PRIMITIVES: &[&str] = &[ "bool", @@ -36,8 +36,7 @@ const RUST_PRIMITIVES: &[&str] = &[ ]; use archlens_domain::{ - AnalysisResult, CodeElement, CodeElementKind, DomainError, FilePath, Relationship, - RelationshipKind, Visibility, + CodeElement, CodeElementKind, Relationship, RelationshipKind, Visibility, }; use crate::extraction_context::ExtractionContext; @@ -46,30 +45,22 @@ use crate::language_extractor::LanguageExtractor; pub struct RustExtractor; impl LanguageExtractor for RustExtractor { - fn analyze(&self, source: &str, file_path: &FilePath) -> Result { - analyze(source, file_path) + fn tree_sitter_language(&self) -> tree_sitter::Language { + tree_sitter_rust::LANGUAGE.into() } -} -pub fn analyze(source: &str, file_path: &FilePath) -> Result { - let mut parser = Parser::new(); - parser - .set_language(&tree_sitter_rust::LANGUAGE.into()) - .map_err(|e| DomainError::AnalysisError(e.to_string()))?; + fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) { + collect_types(root, source, ctx); + } - let tree = parser - .parse(source, None) - .ok_or_else(|| DomainError::AnalysisError("failed to parse".to_string()))?; + fn extract_relationships(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) { + collect_relationships(root, source, ctx); + } - let mut ctx = ExtractionContext::new(file_path.clone()); - let root = tree.root_node(); - - collect_types(&root, source, &mut ctx); - collect_relationships(&root, source, &mut ctx); - collect_mod_declarations(&root, source, &mut ctx); - collect_use_imports(&root, source, &mut ctx); - - ctx.into_result() + fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) { + collect_mod_declarations(root, source, ctx); + collect_use_imports(root, source, ctx); + } } fn collect_types(node: &Node, source: &str, ctx: &mut ExtractionContext) { diff --git a/crates/adapters/tree-sitter/src/tree_sitter_analyzer.rs b/crates/adapters/tree-sitter/src/tree_sitter_analyzer.rs index 1f23285..9335680 100644 --- a/crates/adapters/tree-sitter/src/tree_sitter_analyzer.rs +++ b/crates/adapters/tree-sitter/src/tree_sitter_analyzer.rs @@ -1,6 +1,6 @@ use archlens_domain::{AnalysisResult, DomainError, Language, SourceFile, ports::SourceAnalyzer}; -use crate::language_extractor::LanguageExtractor; +use crate::language_extractor::{LanguageExtractor, run_extraction}; use crate::python::PythonExtractor; use crate::rust::RustExtractor; @@ -38,7 +38,7 @@ impl SourceAnalyzer for TreeSitterAnalyzer { .map_err(|e| DomainError::IoError(e.to_string()))?; match self.extractor_for(file.language()) { - Some(extractor) => extractor.analyze(&source, file.path()), + Some(extractor) => run_extraction(extractor, &source, file.path()), None => Ok(AnalysisResult::empty()), } }