importer feature

This commit is contained in:
2026-05-10 21:23:56 +02:00
parent a47e3ae4e6
commit f2f1317660
77 changed files with 4884 additions and 1810 deletions

View File

@@ -0,0 +1,13 @@
#[derive(Debug, thiserror::Error)]
pub enum ImportError {
#[error("CSV parse error: {0}")]
Csv(String),
#[error("JSON parse error: {0}")]
Json(String),
#[error("XLSX parse error: {0}")]
Xlsx(String),
#[error("Empty file")]
Empty,
#[error("Missing header row")]
NoHeader,
}

View File

@@ -0,0 +1,12 @@
pub mod error;
pub mod mapper;
pub mod parsers;
pub mod types;
pub use error::ImportError;
pub use mapper::apply_mapping;
pub use parsers::{parse_csv, parse_json};
pub use types::{AnnotatedRow, DomainField, FieldMapping, ImportRow, ParsedFile, RowResult, Transform};
#[cfg(feature = "xlsx")]
pub use parsers::parse_xlsx;

View File

@@ -0,0 +1,192 @@
use crate::types::{AnnotatedRow, DomainField, FieldMapping, ImportRow, ParsedFile, RowResult, Transform};
pub fn apply_mapping(file: &ParsedFile, mappings: &[FieldMapping]) -> Vec<AnnotatedRow> {
file.rows.iter().map(|row| {
let result = map_row(row, &file.columns, mappings);
AnnotatedRow { result, is_duplicate: false }
}).collect()
}
fn map_row(row: &[String], columns: &[String], mappings: &[FieldMapping]) -> RowResult {
let mut import_row = ImportRow::default();
let mut errors = Vec::new();
for mapping in mappings {
let Some(col_idx) = columns.iter().position(|c| c == &mapping.source_column) else {
continue;
};
let raw_value = row.get(col_idx).map(|s| s.as_str()).unwrap_or("").trim();
if raw_value.is_empty() {
continue;
}
if let Some(value) = apply_transform(raw_value, &mapping.transform, &mut errors) {
set_field(&mut import_row, &mapping.domain_field, value);
}
}
if import_row.title.is_none() && import_row.external_metadata_id.is_none() {
errors.push("missing required field: title or external_metadata_id".into());
}
if import_row.rating.is_none() {
errors.push("missing required field: rating".into());
}
if import_row.watched_at.is_none() {
errors.push("missing required field: watched_at".into());
}
if errors.is_empty() {
RowResult::Valid(import_row)
} else {
let raw = columns.iter()
.zip(row.iter())
.map(|(c, v)| (c.clone(), v.clone()))
.collect();
RowResult::Invalid { errors, raw }
}
}
fn apply_transform(value: &str, transform: &Transform, errors: &mut Vec<String>) -> Option<String> {
match transform {
Transform::Identity => Some(value.to_string()),
Transform::DateFormat(_) => Some(value.to_string()),
Transform::RatingScale(factor) => {
match value.parse::<f64>() {
Ok(n) => Some((n * factor).round().to_string()),
Err(_) => {
errors.push(format!("rating '{}' is not a number", value));
None
}
}
}
}
}
fn set_field(row: &mut ImportRow, field: &DomainField, value: String) {
match field {
DomainField::Title => row.title = Some(value),
DomainField::ReleaseYear => row.release_year = Some(value),
DomainField::Director => row.director = Some(value),
DomainField::Rating => row.rating = Some(value),
DomainField::WatchedAt => row.watched_at = Some(value),
DomainField::Comment => row.comment = Some(value),
DomainField::ExternalMetadataId => row.external_metadata_id = Some(value),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{DomainField, FieldMapping, ParsedFile, RowResult, Transform};
fn sample_file() -> ParsedFile {
ParsedFile {
columns: vec!["Name".into(), "Stars".into(), "Date".into()],
rows: vec![
vec!["Inception".into(), "10".into(), "2024-01-15".into()],
vec!["Dune".into(), "8".into(), "2024-02-20".into()],
vec!["".into(), "3".into(), "2024-03-01".into()], // missing title → invalid
],
}
}
fn full_mappings() -> Vec<FieldMapping> {
vec![
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
FieldMapping { source_column: "Stars".into(), domain_field: DomainField::Rating, transform: Transform::RatingScale(0.5) },
FieldMapping { source_column: "Date".into(), domain_field: DomainField::WatchedAt, transform: Transform::Identity },
]
}
#[test]
fn maps_valid_rows() {
let results = apply_mapping(&sample_file(), &full_mappings());
assert_eq!(results.len(), 3);
// First two rows are valid
assert!(matches!(results[0].result, RowResult::Valid(_)));
assert!(matches!(results[1].result, RowResult::Valid(_)));
// is_duplicate defaults to false
assert!(!results[0].is_duplicate);
}
#[test]
fn applies_rating_scale_transform() {
let results = apply_mapping(&sample_file(), &full_mappings());
if let RowResult::Valid(row) = &results[0].result {
// 10 * 0.5 = 5
assert_eq!(row.rating.as_deref(), Some("5"));
} else {
panic!("expected Valid");
}
}
#[test]
fn marks_missing_required_fields_invalid() {
let results = apply_mapping(&sample_file(), &full_mappings());
// Row 2 has empty title
assert!(matches!(results[2].result, RowResult::Invalid { .. }));
}
#[test]
fn ignores_unmapped_columns() {
let mappings = vec![
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
];
let file = ParsedFile {
columns: vec!["Name".into(), "Extra".into()],
rows: vec![vec!["Inception".into(), "ignored".into()]],
};
let results = apply_mapping(&file, &mappings);
assert_eq!(results.len(), 1);
// Missing rating and watched_at → invalid
assert!(matches!(results[0].result, RowResult::Invalid { .. }));
}
#[test]
fn nonexistent_source_column_skipped() {
let mappings = vec![
FieldMapping { source_column: "DoesNotExist".into(), domain_field: DomainField::Title, transform: Transform::Identity },
];
let file = ParsedFile {
columns: vec!["Name".into()],
rows: vec![vec!["Inception".into()]],
};
let results = apply_mapping(&file, &mappings);
// Column not found → field not set → invalid (missing title, rating, watched_at)
assert!(matches!(results[0].result, RowResult::Invalid { .. }));
}
#[test]
fn collects_all_errors_not_just_first() {
let mappings = vec![
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
FieldMapping { source_column: "Stars".into(), domain_field: DomainField::Rating, transform: Transform::RatingScale(0.5) },
// no watched_at mapping
];
let file = ParsedFile {
columns: vec!["Name".into(), "Stars".into()],
rows: vec![vec!["Inception".into(), "notanumber".into()]],
};
let results = apply_mapping(&file, &mappings);
if let RowResult::Invalid { errors, .. } = &results[0].result {
assert!(errors.iter().any(|e| e.contains("not a number")), "expected rating error, got: {:?}", errors);
assert!(errors.iter().any(|e| e.contains("watched_at")), "expected watched_at error, got: {:?}", errors);
} else {
panic!("expected Invalid");
}
}
#[test]
fn non_numeric_rating_produces_error_in_row() {
let mappings = vec![
FieldMapping { source_column: "Name".into(), domain_field: DomainField::Title, transform: Transform::Identity },
FieldMapping { source_column: "Stars".into(), domain_field: DomainField::Rating, transform: Transform::RatingScale(0.5) },
FieldMapping { source_column: "Date".into(), domain_field: DomainField::WatchedAt, transform: Transform::Identity },
];
let file = ParsedFile {
columns: vec!["Name".into(), "Stars".into(), "Date".into()],
rows: vec![vec!["Inception".into(), "five".into(), "2024-01-15".into()]],
};
let results = apply_mapping(&file, &mappings);
assert!(matches!(results[0].result, RowResult::Invalid { .. }));
}
}

View File

@@ -0,0 +1,49 @@
use crate::{ImportError, types::ParsedFile};
pub fn parse_csv(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
if bytes.is_empty() {
return Err(ImportError::Empty);
}
let delimiter = detect_delimiter(bytes);
let mut rdr = csv::ReaderBuilder::new()
.delimiter(delimiter)
.from_reader(bytes);
let columns: Vec<String> = rdr
.headers()
.map_err(|e| ImportError::Csv(e.to_string()))?
.iter()
.map(|s| s.trim().to_string())
.collect();
if columns.is_empty() {
return Err(ImportError::NoHeader);
}
let rows: Vec<Vec<String>> = rdr
.records()
.map(|r| {
r.map_err(|e| ImportError::Csv(e.to_string()))
.map(|rec| {
let mut cells: Vec<String> = rec.iter().map(|f| f.trim().to_string()).collect();
cells.resize(columns.len(), String::new());
cells.truncate(columns.len());
cells
})
})
.collect::<Result<_, _>>()?;
if rows.is_empty() {
return Err(ImportError::Empty);
}
Ok(ParsedFile { columns, rows })
}
fn detect_delimiter(bytes: &[u8]) -> u8 {
let first_line = bytes.split(|&b| b == b'\n').next().unwrap_or(bytes);
let tabs = first_line.iter().filter(|&&b| b == b'\t').count();
let commas = first_line.iter().filter(|&&b| b == b',').count();
if tabs > commas { b'\t' } else { b',' }
}

View File

@@ -0,0 +1,43 @@
use serde_json::Value;
use crate::{ImportError, types::ParsedFile};
pub fn parse_json(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
let value: Value = serde_json::from_slice(bytes)
.map_err(|e| ImportError::Json(e.to_string()))?;
let arr = value.as_array()
.ok_or_else(|| ImportError::Json("expected a JSON array".into()))?;
if arr.is_empty() {
return Err(ImportError::Empty);
}
let first = arr[0].as_object()
.ok_or_else(|| ImportError::Json("array elements must be objects".into()))?;
let columns: Vec<String> = first.keys().cloned().collect();
if columns.is_empty() {
return Err(ImportError::NoHeader);
}
let rows: Vec<Vec<String>> = arr.iter()
.enumerate()
.map(|(idx, item)| {
let obj = item.as_object()
.ok_or_else(|| ImportError::Json(format!("element at index {} is not an object", idx)))?;
Ok(columns.iter()
.map(|col| obj.get(col).map(value_to_string).unwrap_or_default())
.collect())
})
.collect::<Result<_, ImportError>>()?;
Ok(ParsedFile { columns, rows })
}
fn value_to_string(v: &Value) -> String {
match v {
Value::String(s) => s.clone(),
Value::Null => String::new(),
other => other.to_string(),
}
}

View File

@@ -0,0 +1,50 @@
mod csv;
mod json;
#[cfg(feature = "xlsx")]
mod xlsx;
pub use csv::parse_csv;
pub use json::parse_json;
#[cfg(feature = "xlsx")]
pub use xlsx::parse_xlsx;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn csv_parses_headers_and_rows() {
let data = b"title,rating,watched_at\nInception,5,2024-01-01\nDune,4,2024-02-15\n";
let file = parse_csv(data).unwrap();
assert_eq!(file.columns, vec!["title", "rating", "watched_at"]);
assert_eq!(file.rows.len(), 2);
assert_eq!(file.rows[0], vec!["Inception", "5", "2024-01-01"]);
}
#[test]
fn csv_rejects_empty() {
assert!(parse_csv(b"").is_err());
}
#[test]
fn tsv_parses_correctly() {
let data = b"title\trating\nInception\t5\n";
let file = parse_csv(data).unwrap();
assert_eq!(file.columns, vec!["title", "rating"]);
assert_eq!(file.rows[0], vec!["Inception", "5"]);
}
#[test]
fn json_array_of_objects() {
let data = br#"[{"title":"Inception","rating":"5"},{"title":"Dune","rating":"4"}]"#;
let file = parse_json(data).unwrap();
assert_eq!(file.columns.len(), 2);
assert!(file.columns.contains(&"title".to_string()));
assert_eq!(file.rows.len(), 2);
}
#[test]
fn json_empty_array_errors() {
assert!(parse_json(b"[]").is_err());
}
}

View File

@@ -0,0 +1,64 @@
use calamine::{Reader, open_workbook_from_rs, Xlsx, Data};
use std::io::Cursor;
use crate::{ImportError, types::ParsedFile};
pub fn parse_xlsx(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
let cursor = Cursor::new(bytes);
let mut workbook: Xlsx<_> = open_workbook_from_rs(cursor)
.map_err(|e: calamine::XlsxError| ImportError::Xlsx(e.to_string()))?;
let sheet_name = workbook.sheet_names()
.first()
.cloned()
.ok_or(ImportError::Empty)?;
let range = workbook.worksheet_range(&sheet_name)
.map_err(|e| ImportError::Xlsx(e.to_string()))?;
let mut iter = range.rows();
let header = iter.next().ok_or(ImportError::NoHeader)?;
let columns: Vec<String> = header.iter()
.map(|c| cell_to_string(c).trim().to_string())
.collect();
if columns.is_empty() {
return Err(ImportError::NoHeader);
}
let rows: Vec<Vec<String>> = iter
.map(|row| {
let mut cells: Vec<String> = row.iter().map(cell_to_string).collect();
cells.resize(columns.len(), String::new());
cells.truncate(columns.len());
cells
})
.collect();
if rows.is_empty() {
return Err(ImportError::Empty);
}
Ok(ParsedFile { columns, rows })
}
fn cell_to_string(cell: &Data) -> String {
match cell {
Data::String(s) => s.clone(),
Data::Float(f) => {
if f.fract() == 0.0 { format!("{}", *f as i64) } else { format!("{}", f) }
}
Data::Int(i) => i.to_string(),
Data::Bool(b) => b.to_string(),
Data::DateTime(dt) => {
// ExcelDateTime::to_ymd_hms_milli() works without the chrono feature.
let (year, month, day, _, _, _, _) = dt.to_ymd_hms_milli();
format!("{:04}-{:02}-{:02}", year, month, day)
}
Data::DateTimeIso(s) => s.clone(),
Data::DurationIso(s) => s.clone(),
Data::Empty | Data::Error(_) => String::new(),
// Fallback for unexpected calamine Data variants; renders as debug string
other => format!("{other:?}"),
}
}

View File

@@ -0,0 +1,57 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ParsedFile {
pub columns: Vec<String>,
pub rows: Vec<Vec<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum DomainField {
Title,
ReleaseYear,
Director,
Rating,
WatchedAt,
Comment,
ExternalMetadataId,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Transform {
RatingScale(f64),
DateFormat(String),
Identity,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldMapping {
pub source_column: String,
pub domain_field: DomainField,
pub transform: Transform,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ImportRow {
pub title: Option<String>,
pub release_year: Option<String>,
pub director: Option<String>,
pub rating: Option<String>,
pub watched_at: Option<String>,
pub comment: Option<String>,
pub external_metadata_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RowResult {
Valid(ImportRow),
Invalid { errors: Vec<String>, raw: Vec<(String, String)> },
}
/// Wraps a RowResult with a duplicate flag so this information persists when
/// serialised as JSON into the import_sessions.row_results DB column.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnnotatedRow {
pub result: RowResult,
pub is_duplicate: bool,
}