refactor: LanguageExtractor gains explicit 3-phase trait + run_extraction pipeline

This commit is contained in:
2026-06-17 13:30:41 +02:00
parent 8b20bf3874
commit 04da26beba
4 changed files with 66 additions and 49 deletions

View File

@@ -1,5 +1,39 @@
use tree_sitter::{Node, Parser};
use archlens_domain::{AnalysisResult, DomainError, FilePath};
use crate::extraction_context::ExtractionContext;
pub trait LanguageExtractor {
fn analyze(&self, source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError>;
fn tree_sitter_language(&self) -> tree_sitter::Language;
fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext);
fn extract_relationships(&self, root: &Node, source: &str, ctx: &mut ExtractionContext);
fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext);
}
pub fn run_extraction(
extractor: &dyn LanguageExtractor,
source: &str,
file_path: &FilePath,
) -> Result<AnalysisResult, DomainError> {
let mut parser = Parser::new();
parser
.set_language(&extractor.tree_sitter_language().into())
.map_err(|e| DomainError::AnalysisError(e.to_string()))?;
let tree = parser
.parse(source, None)
.ok_or_else(|| DomainError::AnalysisError("failed to parse source".into()))?;
let mut ctx = ExtractionContext::new(file_path.clone());
let root = tree.root_node();
extractor.extract_types(&root, source, &mut ctx);
extractor.extract_relationships(&root, source, &mut ctx);
extractor.extract_imports(&root, source, &mut ctx);
ctx.into_result()
}

View File

@@ -1,9 +1,6 @@
use tree_sitter::{Node, Parser};
use tree_sitter::Node;
use archlens_domain::{
AnalysisResult, CodeElement, CodeElementKind, DomainError, FilePath, Relationship,
RelationshipKind,
};
use archlens_domain::{CodeElement, CodeElementKind, Relationship, RelationshipKind};
use crate::extraction_context::ExtractionContext;
use crate::language_extractor::LanguageExtractor;
@@ -11,28 +8,23 @@ use crate::language_extractor::LanguageExtractor;
pub struct PythonExtractor;
impl LanguageExtractor for PythonExtractor {
fn analyze(&self, source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> {
analyze(source, file_path)
fn tree_sitter_language(&self) -> tree_sitter::Language {
tree_sitter_python::LANGUAGE.into()
}
}
pub fn analyze(source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_python::LANGUAGE.into())
.map_err(|e| DomainError::AnalysisError(e.to_string()))?;
fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
// collect_classes handles class elements, inheritance, and field compositions
// in a single pass — Python's relationship extraction is interleaved with type extraction
collect_classes(root, source, ctx);
}
let tree = parser
.parse(source, None)
.ok_or_else(|| DomainError::AnalysisError("failed to parse".to_string()))?;
fn extract_relationships(&self, _root: &Node, _source: &str, _ctx: &mut ExtractionContext) {
// Relationships are collected inside collect_classes for Python
}
let mut ctx = ExtractionContext::new(file_path.clone());
let root = tree.root_node();
collect_classes(&root, source, &mut ctx);
collect_imports(&root, source, &mut ctx);
ctx.into_result()
fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
collect_imports(root, source, ctx);
}
}
fn collect_classes(node: &Node, source: &str, ctx: &mut ExtractionContext) {

View File

@@ -1,4 +1,4 @@
use tree_sitter::{Node, Parser};
use tree_sitter::Node;
const RUST_PRIMITIVES: &[&str] = &[
"bool",
@@ -36,8 +36,7 @@ const RUST_PRIMITIVES: &[&str] = &[
];
use archlens_domain::{
AnalysisResult, CodeElement, CodeElementKind, DomainError, FilePath, Relationship,
RelationshipKind, Visibility,
CodeElement, CodeElementKind, Relationship, RelationshipKind, Visibility,
};
use crate::extraction_context::ExtractionContext;
@@ -46,30 +45,22 @@ use crate::language_extractor::LanguageExtractor;
pub struct RustExtractor;
impl LanguageExtractor for RustExtractor {
fn analyze(&self, source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> {
analyze(source, file_path)
fn tree_sitter_language(&self) -> tree_sitter::Language {
tree_sitter_rust::LANGUAGE.into()
}
}
pub fn analyze(source: &str, file_path: &FilePath) -> Result<AnalysisResult, DomainError> {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_rust::LANGUAGE.into())
.map_err(|e| DomainError::AnalysisError(e.to_string()))?;
fn extract_types(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
collect_types(root, source, ctx);
}
let tree = parser
.parse(source, None)
.ok_or_else(|| DomainError::AnalysisError("failed to parse".to_string()))?;
fn extract_relationships(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
collect_relationships(root, source, ctx);
}
let mut ctx = ExtractionContext::new(file_path.clone());
let root = tree.root_node();
collect_types(&root, source, &mut ctx);
collect_relationships(&root, source, &mut ctx);
collect_mod_declarations(&root, source, &mut ctx);
collect_use_imports(&root, source, &mut ctx);
ctx.into_result()
fn extract_imports(&self, root: &Node, source: &str, ctx: &mut ExtractionContext) {
collect_mod_declarations(root, source, ctx);
collect_use_imports(root, source, ctx);
}
}
fn collect_types(node: &Node, source: &str, ctx: &mut ExtractionContext) {

View File

@@ -1,6 +1,6 @@
use archlens_domain::{AnalysisResult, DomainError, Language, SourceFile, ports::SourceAnalyzer};
use crate::language_extractor::LanguageExtractor;
use crate::language_extractor::{LanguageExtractor, run_extraction};
use crate::python::PythonExtractor;
use crate::rust::RustExtractor;
@@ -38,7 +38,7 @@ impl SourceAnalyzer for TreeSitterAnalyzer {
.map_err(|e| DomainError::IoError(e.to_string()))?;
match self.extractor_for(file.language()) {
Some(extractor) => extractor.analyze(&source, file.path()),
Some(extractor) => run_extraction(extractor, &source, file.path()),
None => Ok(AnalysisResult::empty()),
}
}