refactor: LanguageExtractor gains explicit 3-phase trait + run_extraction pipeline

This commit is contained in:
2026-06-17 13:30:41 +02:00
parent 8b20bf3874
commit 04da26beba
4 changed files with 66 additions and 49 deletions

View File

@@ -1,5 +1,39 @@
use tree_sitter::{Node, Parser};
use archlens_domain::{AnalysisResult, DomainError, FilePath}; use archlens_domain::{AnalysisResult, DomainError, FilePath};
use crate::extraction_context::ExtractionContext;
pub trait LanguageExtractor { pub trait LanguageExtractor {
fn analyze(&self, source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError>; fn tree_sitter_language(&self) -> tree_sitter::Language;
fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext);
fn extract_relationships(&self, root: &Node, source: &str, ctx: &mut ExtractionContext);
fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext);
}
pub fn run_extraction(
extractor: &dyn LanguageExtractor,
source: &str,
file_path: &FilePath,
) -> Result<AnalysisResult, DomainError> {
let mut parser = Parser::new();
parser
.set_language(&extractor.tree_sitter_language().into())
.map_err(|e| DomainError::AnalysisError(e.to_string()))?;
let tree = parser
.parse(source, None)
.ok_or_else(|| DomainError::AnalysisError("failed to parse source".into()))?;
let mut ctx = ExtractionContext::new(file_path.clone());
let root = tree.root_node();
extractor.extract_types(&root, source, &mut ctx);
extractor.extract_relationships(&root, source, &mut ctx);
extractor.extract_imports(&root, source, &mut ctx);
ctx.into_result()
} }

View File

@@ -1,9 +1,6 @@
use tree_sitter::{Node, Parser}; use tree_sitter::Node;
use archlens_domain::{ use archlens_domain::{CodeElement, CodeElementKind, Relationship, RelationshipKind};
AnalysisResult, CodeElement, CodeElementKind, DomainError, FilePath, Relationship,
RelationshipKind,
};
use crate::extraction_context::ExtractionContext; use crate::extraction_context::ExtractionContext;
use crate::language_extractor::LanguageExtractor; use crate::language_extractor::LanguageExtractor;
@@ -11,28 +8,23 @@ use crate::language_extractor::LanguageExtractor;
pub struct PythonExtractor; pub struct PythonExtractor;
impl LanguageExtractor for PythonExtractor { impl LanguageExtractor for PythonExtractor {
fn analyze(&self, source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> { fn tree_sitter_language(&self) -> tree_sitter::Language {
analyze(source, file_path) tree_sitter_python::LANGUAGE.into()
} }
}
pub fn analyze(source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> { fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
let mut parser = Parser::new(); // collect_classes handles class elements, inheritance, and field compositions
parser // in a single pass — Python's relationship extraction is interleaved with type extraction
.set_language(&tree_sitter_python::LANGUAGE.into()) collect_classes(root, source, ctx);
.map_err(|e| DomainError::AnalysisError(e.to_string()))?; }
let tree = parser fn extract_relationships(&self, _root: &Node, _source: &str, _ctx: &mut ExtractionContext) {
.parse(source, None) // Relationships are collected inside collect_classes for Python
.ok_or_else(|| DomainError::AnalysisError("failed to parse".to_string()))?; }
let mut ctx = ExtractionContext::new(file_path.clone()); fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
let root = tree.root_node(); collect_imports(root, source, ctx);
}
collect_classes(&root, source, &mut ctx);
collect_imports(&root, source, &mut ctx);
ctx.into_result()
} }
fn collect_classes(node: &Node, source: &str, ctx: &mut ExtractionContext) { fn collect_classes(node: &Node, source: &str, ctx: &mut ExtractionContext) {

View File

@@ -1,4 +1,4 @@
use tree_sitter::{Node, Parser}; use tree_sitter::Node;
const RUST_PRIMITIVES: &[&str] = &[ const RUST_PRIMITIVES: &[&str] = &[
"bool", "bool",
@@ -36,8 +36,7 @@ const RUST_PRIMITIVES: &[&str] = &[
]; ];
use archlens_domain::{ use archlens_domain::{
AnalysisResult, CodeElement, CodeElementKind, DomainError, FilePath, Relationship, CodeElement, CodeElementKind, Relationship, RelationshipKind, Visibility,
RelationshipKind, Visibility,
}; };
use crate::extraction_context::ExtractionContext; use crate::extraction_context::ExtractionContext;
@@ -46,30 +45,22 @@ use crate::language_extractor::LanguageExtractor;
pub struct RustExtractor; pub struct RustExtractor;
impl LanguageExtractor for RustExtractor { impl LanguageExtractor for RustExtractor {
fn analyze(&self, source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> { fn tree_sitter_language(&self) -> tree_sitter::Language {
analyze(source, file_path) tree_sitter_rust::LANGUAGE.into()
} }
}
pub fn analyze(source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> { fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
let mut parser = Parser::new(); collect_types(root, source, ctx);
parser }
.set_language(&tree_sitter_rust::LANGUAGE.into())
.map_err(|e| DomainError::AnalysisError(e.to_string()))?;
let tree = parser fn extract_relationships(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
.parse(source, None) collect_relationships(root, source, ctx);
.ok_or_else(|| DomainError::AnalysisError("failed to parse".to_string()))?; }
let mut ctx = ExtractionContext::new(file_path.clone()); fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
let root = tree.root_node(); collect_mod_declarations(root, source, ctx);
collect_use_imports(root, source, ctx);
collect_types(&root, source, &mut ctx); }
collect_relationships(&root, source, &mut ctx);
collect_mod_declarations(&root, source, &mut ctx);
collect_use_imports(&root, source, &mut ctx);
ctx.into_result()
} }
fn collect_types(node: &Node, source: &str, ctx: &mut ExtractionContext) { fn collect_types(node: &Node, source: &str, ctx: &mut ExtractionContext) {

View File

@@ -1,6 +1,6 @@
use archlens_domain::{AnalysisResult, DomainError, Language, SourceFile, ports::SourceAnalyzer}; use archlens_domain::{AnalysisResult, DomainError, Language, SourceFile, ports::SourceAnalyzer};
use crate::language_extractor::LanguageExtractor; use crate::language_extractor::{LanguageExtractor, run_extraction};
use crate::python::PythonExtractor; use crate::python::PythonExtractor;
use crate::rust::RustExtractor; use crate::rust::RustExtractor;
@@ -38,7 +38,7 @@ impl SourceAnalyzer for TreeSitterAnalyzer {
.map_err(|e| DomainError::IoError(e.to_string()))?; .map_err(|e| DomainError::IoError(e.to_string()))?;
match self.extractor_for(file.language()) { match self.extractor_for(file.language()) {
Some(extractor) => extractor.analyze(&source, file.path()), Some(extractor) => run_extraction(extractor, &source, file.path()),
None => Ok(AnalysisResult::empty()), None => Ok(AnalysisResult::empty()),
} }
} }