importer feature
This commit is contained in:
13
crates/adapters/importer/src/error.rs
Normal file
13
crates/adapters/importer/src/error.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ImportError {
|
||||
#[error("CSV parse error: {0}")]
|
||||
Csv(String),
|
||||
#[error("JSON parse error: {0}")]
|
||||
Json(String),
|
||||
#[error("XLSX parse error: {0}")]
|
||||
Xlsx(String),
|
||||
#[error("Empty file")]
|
||||
Empty,
|
||||
#[error("Missing header row")]
|
||||
NoHeader,
|
||||
}
|
||||
12
crates/adapters/importer/src/lib.rs
Normal file
12
crates/adapters/importer/src/lib.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
pub mod error;
|
||||
pub mod mapper;
|
||||
pub mod parsers;
|
||||
pub mod types;
|
||||
|
||||
pub use error::ImportError;
|
||||
pub use mapper::apply_mapping;
|
||||
pub use parsers::{parse_csv, parse_json};
|
||||
pub use types::{AnnotatedRow, DomainField, FieldMapping, ImportRow, ParsedFile, RowResult, Transform};
|
||||
|
||||
#[cfg(feature = "xlsx")]
|
||||
pub use parsers::parse_xlsx;
|
||||
192
crates/adapters/importer/src/mapper.rs
Normal file
192
crates/adapters/importer/src/mapper.rs
Normal file
@@ -0,0 +1,192 @@
|
||||
use crate::types::{AnnotatedRow, DomainField, FieldMapping, ImportRow, ParsedFile, RowResult, Transform};
|
||||
|
||||
pub fn apply_mapping(file: &ParsedFile, mappings: &[FieldMapping]) -> Vec<AnnotatedRow> {
|
||||
file.rows.iter().map(|row| {
|
||||
let result = map_row(row, &file.columns, mappings);
|
||||
AnnotatedRow { result, is_duplicate: false }
|
||||
}).collect()
|
||||
}
|
||||
|
||||
fn map_row(row: &[String], columns: &[String], mappings: &[FieldMapping]) -> RowResult {
|
||||
let mut import_row = ImportRow::default();
|
||||
let mut errors = Vec::new();
|
||||
|
||||
for mapping in mappings {
|
||||
let Some(col_idx) = columns.iter().position(|c| c == &mapping.source_column) else {
|
||||
continue;
|
||||
};
|
||||
let raw_value = row.get(col_idx).map(|s| s.as_str()).unwrap_or("").trim();
|
||||
if raw_value.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if let Some(value) = apply_transform(raw_value, &mapping.transform, &mut errors) {
|
||||
set_field(&mut import_row, &mapping.domain_field, value);
|
||||
}
|
||||
}
|
||||
|
||||
if import_row.title.is_none() && import_row.external_metadata_id.is_none() {
|
||||
errors.push("missing required field: title or external_metadata_id".into());
|
||||
}
|
||||
if import_row.rating.is_none() {
|
||||
errors.push("missing required field: rating".into());
|
||||
}
|
||||
if import_row.watched_at.is_none() {
|
||||
errors.push("missing required field: watched_at".into());
|
||||
}
|
||||
|
||||
if errors.is_empty() {
|
||||
RowResult::Valid(import_row)
|
||||
} else {
|
||||
let raw = columns.iter()
|
||||
.zip(row.iter())
|
||||
.map(|(c, v)| (c.clone(), v.clone()))
|
||||
.collect();
|
||||
RowResult::Invalid { errors, raw }
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_transform(value: &str, transform: &Transform, errors: &mut Vec<String>) -> Option<String> {
|
||||
match transform {
|
||||
Transform::Identity => Some(value.to_string()),
|
||||
Transform::DateFormat(_) => Some(value.to_string()),
|
||||
Transform::RatingScale(factor) => {
|
||||
match value.parse::<f64>() {
|
||||
Ok(n) => Some((n * factor).round().to_string()),
|
||||
Err(_) => {
|
||||
errors.push(format!("rating '{}' is not a number", value));
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_field(row: &mut ImportRow, field: &DomainField, value: String) {
|
||||
match field {
|
||||
DomainField::Title => row.title = Some(value),
|
||||
DomainField::ReleaseYear => row.release_year = Some(value),
|
||||
DomainField::Director => row.director = Some(value),
|
||||
DomainField::Rating => row.rating = Some(value),
|
||||
DomainField::WatchedAt => row.watched_at = Some(value),
|
||||
DomainField::Comment => row.comment = Some(value),
|
||||
DomainField::ExternalMetadataId => row.external_metadata_id = Some(value),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::types::{DomainField, FieldMapping, ParsedFile, RowResult, Transform};
|
||||
|
||||
fn sample_file() -> ParsedFile {
|
||||
ParsedFile {
|
||||
columns: vec!["Name".into(), "Stars".into(), "Date".into()],
|
||||
rows: vec![
|
||||
vec!["Inception".into(), "10".into(), "2024-01-15".into()],
|
||||
vec!["Dune".into(), "8".into(), "2024-02-20".into()],
|
||||
vec!["".into(), "3".into(), "2024-03-01".into()], // missing title → invalid
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
fn full_mappings() -> Vec<FieldMapping> {
|
||||
vec![
|
||||
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
|
||||
FieldMapping { source_column: "Stars".into(), domain_field: DomainField::Rating, transform: Transform::RatingScale(0.5) },
|
||||
FieldMapping { source_column: "Date".into(), domain_field: DomainField::WatchedAt, transform: Transform::Identity },
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn maps_valid_rows() {
|
||||
let results = apply_mapping(&sample_file(), &full_mappings());
|
||||
assert_eq!(results.len(), 3);
|
||||
// First two rows are valid
|
||||
assert!(matches!(results[0].result, RowResult::Valid(_)));
|
||||
assert!(matches!(results[1].result, RowResult::Valid(_)));
|
||||
// is_duplicate defaults to false
|
||||
assert!(!results[0].is_duplicate);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn applies_rating_scale_transform() {
|
||||
let results = apply_mapping(&sample_file(), &full_mappings());
|
||||
if let RowResult::Valid(row) = &results[0].result {
|
||||
// 10 * 0.5 = 5
|
||||
assert_eq!(row.rating.as_deref(), Some("5"));
|
||||
} else {
|
||||
panic!("expected Valid");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn marks_missing_required_fields_invalid() {
|
||||
let results = apply_mapping(&sample_file(), &full_mappings());
|
||||
// Row 2 has empty title
|
||||
assert!(matches!(results[2].result, RowResult::Invalid { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_unmapped_columns() {
|
||||
let mappings = vec![
|
||||
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
|
||||
];
|
||||
let file = ParsedFile {
|
||||
columns: vec!["Name".into(), "Extra".into()],
|
||||
rows: vec![vec!["Inception".into(), "ignored".into()]],
|
||||
};
|
||||
let results = apply_mapping(&file, &mappings);
|
||||
assert_eq!(results.len(), 1);
|
||||
// Missing rating and watched_at → invalid
|
||||
assert!(matches!(results[0].result, RowResult::Invalid { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonexistent_source_column_skipped() {
|
||||
let mappings = vec![
|
||||
FieldMapping { source_column: "DoesNotExist".into(), domain_field: DomainField::Title, transform: Transform::Identity },
|
||||
];
|
||||
let file = ParsedFile {
|
||||
columns: vec!["Name".into()],
|
||||
rows: vec![vec!["Inception".into()]],
|
||||
};
|
||||
let results = apply_mapping(&file, &mappings);
|
||||
// Column not found → field not set → invalid (missing title, rating, watched_at)
|
||||
assert!(matches!(results[0].result, RowResult::Invalid { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collects_all_errors_not_just_first() {
|
||||
let mappings = vec![
|
||||
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
|
||||
FieldMapping { source_column: "Stars".into(), domain_field: DomainField::Rating, transform: Transform::RatingScale(0.5) },
|
||||
// no watched_at mapping
|
||||
];
|
||||
let file = ParsedFile {
|
||||
columns: vec!["Name".into(), "Stars".into()],
|
||||
rows: vec![vec!["Inception".into(), "notanumber".into()]],
|
||||
};
|
||||
let results = apply_mapping(&file, &mappings);
|
||||
if let RowResult::Invalid { errors, .. } = &results[0].result {
|
||||
assert!(errors.iter().any(|e| e.contains("not a number")), "expected rating error, got: {:?}", errors);
|
||||
assert!(errors.iter().any(|e| e.contains("watched_at")), "expected watched_at error, got: {:?}", errors);
|
||||
} else {
|
||||
panic!("expected Invalid");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_numeric_rating_produces_error_in_row() {
|
||||
let mappings = vec![
|
||||
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
|
||||
FieldMapping { source_column: "Stars".into(), domain_field: DomainField::Rating, transform: Transform::RatingScale(0.5) },
|
||||
FieldMapping { source_column: "Date".into(), domain_field: DomainField::WatchedAt, transform: Transform::Identity },
|
||||
];
|
||||
let file = ParsedFile {
|
||||
columns: vec!["Name".into(), "Stars".into(), "Date".into()],
|
||||
rows: vec![vec!["Inception".into(), "five".into(), "2024-01-15".into()]],
|
||||
};
|
||||
let results = apply_mapping(&file, &mappings);
|
||||
assert!(matches!(results[0].result, RowResult::Invalid { .. }));
|
||||
}
|
||||
}
|
||||
49
crates/adapters/importer/src/parsers/csv.rs
Normal file
49
crates/adapters/importer/src/parsers/csv.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use crate::{ImportError, types::ParsedFile};
|
||||
|
||||
pub fn parse_csv(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
|
||||
if bytes.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
let delimiter = detect_delimiter(bytes);
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.delimiter(delimiter)
|
||||
.from_reader(bytes);
|
||||
|
||||
let columns: Vec<String> = rdr
|
||||
.headers()
|
||||
.map_err(|e| ImportError::Csv(e.to_string()))?
|
||||
.iter()
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect();
|
||||
|
||||
if columns.is_empty() {
|
||||
return Err(ImportError::NoHeader);
|
||||
}
|
||||
|
||||
let rows: Vec<Vec<String>> = rdr
|
||||
.records()
|
||||
.map(|r| {
|
||||
r.map_err(|e| ImportError::Csv(e.to_string()))
|
||||
.map(|rec| {
|
||||
let mut cells: Vec<String> = rec.iter().map(|f| f.trim().to_string()).collect();
|
||||
cells.resize(columns.len(), String::new());
|
||||
cells.truncate(columns.len());
|
||||
cells
|
||||
})
|
||||
})
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
if rows.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
Ok(ParsedFile { columns, rows })
|
||||
}
|
||||
|
||||
fn detect_delimiter(bytes: &[u8]) -> u8 {
|
||||
let first_line = bytes.split(|&b| b == b'\n').next().unwrap_or(bytes);
|
||||
let tabs = first_line.iter().filter(|&&b| b == b'\t').count();
|
||||
let commas = first_line.iter().filter(|&&b| b == b',').count();
|
||||
if tabs > commas { b'\t' } else { b',' }
|
||||
}
|
||||
43
crates/adapters/importer/src/parsers/json.rs
Normal file
43
crates/adapters/importer/src/parsers/json.rs
Normal file
@@ -0,0 +1,43 @@
|
||||
use serde_json::Value;
|
||||
use crate::{ImportError, types::ParsedFile};
|
||||
|
||||
pub fn parse_json(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
|
||||
let value: Value = serde_json::from_slice(bytes)
|
||||
.map_err(|e| ImportError::Json(e.to_string()))?;
|
||||
|
||||
let arr = value.as_array()
|
||||
.ok_or_else(|| ImportError::Json("expected a JSON array".into()))?;
|
||||
|
||||
if arr.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
let first = arr[0].as_object()
|
||||
.ok_or_else(|| ImportError::Json("array elements must be objects".into()))?;
|
||||
let columns: Vec<String> = first.keys().cloned().collect();
|
||||
|
||||
if columns.is_empty() {
|
||||
return Err(ImportError::NoHeader);
|
||||
}
|
||||
|
||||
let rows: Vec<Vec<String>> = arr.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, item)| {
|
||||
let obj = item.as_object()
|
||||
.ok_or_else(|| ImportError::Json(format!("element at index {} is not an object", idx)))?;
|
||||
Ok(columns.iter()
|
||||
.map(|col| obj.get(col).map(value_to_string).unwrap_or_default())
|
||||
.collect())
|
||||
})
|
||||
.collect::<Result<_, ImportError>>()?;
|
||||
|
||||
Ok(ParsedFile { columns, rows })
|
||||
}
|
||||
|
||||
fn value_to_string(v: &Value) -> String {
|
||||
match v {
|
||||
Value::String(s) => s.clone(),
|
||||
Value::Null => String::new(),
|
||||
other => other.to_string(),
|
||||
}
|
||||
}
|
||||
50
crates/adapters/importer/src/parsers/mod.rs
Normal file
50
crates/adapters/importer/src/parsers/mod.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
mod csv;
|
||||
mod json;
|
||||
#[cfg(feature = "xlsx")]
|
||||
mod xlsx;
|
||||
|
||||
pub use csv::parse_csv;
|
||||
pub use json::parse_json;
|
||||
#[cfg(feature = "xlsx")]
|
||||
pub use xlsx::parse_xlsx;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn csv_parses_headers_and_rows() {
|
||||
let data = b"title,rating,watched_at\nInception,5,2024-01-01\nDune,4,2024-02-15\n";
|
||||
let file = parse_csv(data).unwrap();
|
||||
assert_eq!(file.columns, vec!["title", "rating", "watched_at"]);
|
||||
assert_eq!(file.rows.len(), 2);
|
||||
assert_eq!(file.rows[0], vec!["Inception", "5", "2024-01-01"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn csv_rejects_empty() {
|
||||
assert!(parse_csv(b"").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tsv_parses_correctly() {
|
||||
let data = b"title\trating\nInception\t5\n";
|
||||
let file = parse_csv(data).unwrap();
|
||||
assert_eq!(file.columns, vec!["title", "rating"]);
|
||||
assert_eq!(file.rows[0], vec!["Inception", "5"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_array_of_objects() {
|
||||
let data = br#"[{"title":"Inception","rating":"5"},{"title":"Dune","rating":"4"}]"#;
|
||||
let file = parse_json(data).unwrap();
|
||||
assert_eq!(file.columns.len(), 2);
|
||||
assert!(file.columns.contains(&"title".to_string()));
|
||||
assert_eq!(file.rows.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_empty_array_errors() {
|
||||
assert!(parse_json(b"[]").is_err());
|
||||
}
|
||||
}
|
||||
64
crates/adapters/importer/src/parsers/xlsx.rs
Normal file
64
crates/adapters/importer/src/parsers/xlsx.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
use calamine::{Reader, open_workbook_from_rs, Xlsx, Data};
|
||||
use std::io::Cursor;
|
||||
use crate::{ImportError, types::ParsedFile};
|
||||
|
||||
pub fn parse_xlsx(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
|
||||
let cursor = Cursor::new(bytes);
|
||||
let mut workbook: Xlsx<_> = open_workbook_from_rs(cursor)
|
||||
.map_err(|e: calamine::XlsxError| ImportError::Xlsx(e.to_string()))?;
|
||||
|
||||
let sheet_name = workbook.sheet_names()
|
||||
.first()
|
||||
.cloned()
|
||||
.ok_or(ImportError::Empty)?;
|
||||
|
||||
let range = workbook.worksheet_range(&sheet_name)
|
||||
.map_err(|e| ImportError::Xlsx(e.to_string()))?;
|
||||
|
||||
let mut iter = range.rows();
|
||||
|
||||
let header = iter.next().ok_or(ImportError::NoHeader)?;
|
||||
let columns: Vec<String> = header.iter()
|
||||
.map(|c| cell_to_string(c).trim().to_string())
|
||||
.collect();
|
||||
|
||||
if columns.is_empty() {
|
||||
return Err(ImportError::NoHeader);
|
||||
}
|
||||
|
||||
let rows: Vec<Vec<String>> = iter
|
||||
.map(|row| {
|
||||
let mut cells: Vec<String> = row.iter().map(cell_to_string).collect();
|
||||
cells.resize(columns.len(), String::new());
|
||||
cells.truncate(columns.len());
|
||||
cells
|
||||
})
|
||||
.collect();
|
||||
|
||||
if rows.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
Ok(ParsedFile { columns, rows })
|
||||
}
|
||||
|
||||
fn cell_to_string(cell: &Data) -> String {
|
||||
match cell {
|
||||
Data::String(s) => s.clone(),
|
||||
Data::Float(f) => {
|
||||
if f.fract() == 0.0 { format!("{}", *f as i64) } else { format!("{}", f) }
|
||||
}
|
||||
Data::Int(i) => i.to_string(),
|
||||
Data::Bool(b) => b.to_string(),
|
||||
Data::DateTime(dt) => {
|
||||
// ExcelDateTime::to_ymd_hms_milli() works without the chrono feature.
|
||||
let (year, month, day, _, _, _, _) = dt.to_ymd_hms_milli();
|
||||
format!("{:04}-{:02}-{:02}", year, month, day)
|
||||
}
|
||||
Data::DateTimeIso(s) => s.clone(),
|
||||
Data::DurationIso(s) => s.clone(),
|
||||
Data::Empty | Data::Error(_) => String::new(),
|
||||
// Fallback for unexpected calamine Data variants; renders as debug string
|
||||
other => format!("{other:?}"),
|
||||
}
|
||||
}
|
||||
57
crates/adapters/importer/src/types.rs
Normal file
57
crates/adapters/importer/src/types.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct ParsedFile {
|
||||
pub columns: Vec<String>,
|
||||
pub rows: Vec<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum DomainField {
|
||||
Title,
|
||||
ReleaseYear,
|
||||
Director,
|
||||
Rating,
|
||||
WatchedAt,
|
||||
Comment,
|
||||
ExternalMetadataId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Transform {
|
||||
RatingScale(f64),
|
||||
DateFormat(String),
|
||||
Identity,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FieldMapping {
|
||||
pub source_column: String,
|
||||
pub domain_field: DomainField,
|
||||
pub transform: Transform,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct ImportRow {
|
||||
pub title: Option<String>,
|
||||
pub release_year: Option<String>,
|
||||
pub director: Option<String>,
|
||||
pub rating: Option<String>,
|
||||
pub watched_at: Option<String>,
|
||||
pub comment: Option<String>,
|
||||
pub external_metadata_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum RowResult {
|
||||
Valid(ImportRow),
|
||||
Invalid { errors: Vec<String>, raw: Vec<(String, String)> },
|
||||
}
|
||||
|
||||
/// Wraps a RowResult with a duplicate flag so this information persists when
|
||||
/// serialised as JSON into the import_sessions.row_results DB column.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AnnotatedRow {
|
||||
pub result: RowResult,
|
||||
pub is_duplicate: bool,
|
||||
}
|
||||
Reference in New Issue
Block a user