importer feature

This commit is contained in:
2026-05-10 21:23:56 +02:00
parent a47e3ae4e6
commit f2f1317660
77 changed files with 4884 additions and 1810 deletions

View File

@@ -0,0 +1,49 @@
use crate::{ImportError, types::ParsedFile};
pub fn parse_csv(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
if bytes.is_empty() {
return Err(ImportError::Empty);
}
let delimiter = detect_delimiter(bytes);
let mut rdr = csv::ReaderBuilder::new()
.delimiter(delimiter)
.from_reader(bytes);
let columns: Vec<String> = rdr
.headers()
.map_err(|e| ImportError::Csv(e.to_string()))?
.iter()
.map(|s| s.trim().to_string())
.collect();
if columns.is_empty() {
return Err(ImportError::NoHeader);
}
let rows: Vec<Vec<String>> = rdr
.records()
.map(|r| {
r.map_err(|e| ImportError::Csv(e.to_string()))
.map(|rec| {
let mut cells: Vec<String> = rec.iter().map(|f| f.trim().to_string()).collect();
cells.resize(columns.len(), String::new());
cells.truncate(columns.len());
cells
})
})
.collect::<Result<_, _>>()?;
if rows.is_empty() {
return Err(ImportError::Empty);
}
Ok(ParsedFile { columns, rows })
}
fn detect_delimiter(bytes: &[u8]) -> u8 {
let first_line = bytes.split(|&b| b == b'\n').next().unwrap_or(bytes);
let tabs = first_line.iter().filter(|&&b| b == b'\t').count();
let commas = first_line.iter().filter(|&&b| b == b',').count();
if tabs > commas { b'\t' } else { b',' }
}

View File

@@ -0,0 +1,43 @@
use serde_json::Value;
use crate::{ImportError, types::ParsedFile};
pub fn parse_json(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
let value: Value = serde_json::from_slice(bytes)
.map_err(|e| ImportError::Json(e.to_string()))?;
let arr = value.as_array()
.ok_or_else(|| ImportError::Json("expected a JSON array".into()))?;
if arr.is_empty() {
return Err(ImportError::Empty);
}
let first = arr[0].as_object()
.ok_or_else(|| ImportError::Json("array elements must be objects".into()))?;
let columns: Vec<String> = first.keys().cloned().collect();
if columns.is_empty() {
return Err(ImportError::NoHeader);
}
let rows: Vec<Vec<String>> = arr.iter()
.enumerate()
.map(|(idx, item)| {
let obj = item.as_object()
.ok_or_else(|| ImportError::Json(format!("element at index {} is not an object", idx)))?;
Ok(columns.iter()
.map(|col| obj.get(col).map(value_to_string).unwrap_or_default())
.collect())
})
.collect::<Result<_, ImportError>>()?;
Ok(ParsedFile { columns, rows })
}
fn value_to_string(v: &Value) -> String {
match v {
Value::String(s) => s.clone(),
Value::Null => String::new(),
other => other.to_string(),
}
}

View File

@@ -0,0 +1,50 @@
mod csv;
mod json;
#[cfg(feature = "xlsx")]
mod xlsx;
pub use csv::parse_csv;
pub use json::parse_json;
#[cfg(feature = "xlsx")]
pub use xlsx::parse_xlsx;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn csv_parses_headers_and_rows() {
let data = b"title,rating,watched_at\nInception,5,2024-01-01\nDune,4,2024-02-15\n";
let file = parse_csv(data).unwrap();
assert_eq!(file.columns, vec!["title", "rating", "watched_at"]);
assert_eq!(file.rows.len(), 2);
assert_eq!(file.rows[0], vec!["Inception", "5", "2024-01-01"]);
}
#[test]
fn csv_rejects_empty() {
assert!(parse_csv(b"").is_err());
}
#[test]
fn tsv_parses_correctly() {
let data = b"title\trating\nInception\t5\n";
let file = parse_csv(data).unwrap();
assert_eq!(file.columns, vec!["title", "rating"]);
assert_eq!(file.rows[0], vec!["Inception", "5"]);
}
#[test]
fn json_array_of_objects() {
let data = br#"[{"title":"Inception","rating":"5"},{"title":"Dune","rating":"4"}]"#;
let file = parse_json(data).unwrap();
assert_eq!(file.columns.len(), 2);
assert!(file.columns.contains(&"title".to_string()));
assert_eq!(file.rows.len(), 2);
}
#[test]
fn json_empty_array_errors() {
assert!(parse_json(b"[]").is_err());
}
}

View File

@@ -0,0 +1,64 @@
use calamine::{Reader, open_workbook_from_rs, Xlsx, Data};
use std::io::Cursor;
use crate::{ImportError, types::ParsedFile};
pub fn parse_xlsx(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
let cursor = Cursor::new(bytes);
let mut workbook: Xlsx<_> = open_workbook_from_rs(cursor)
.map_err(|e: calamine::XlsxError| ImportError::Xlsx(e.to_string()))?;
let sheet_name = workbook.sheet_names()
.first()
.cloned()
.ok_or(ImportError::Empty)?;
let range = workbook.worksheet_range(&sheet_name)
.map_err(|e| ImportError::Xlsx(e.to_string()))?;
let mut iter = range.rows();
let header = iter.next().ok_or(ImportError::NoHeader)?;
let columns: Vec<String> = header.iter()
.map(|c| cell_to_string(c).trim().to_string())
.collect();
if columns.is_empty() {
return Err(ImportError::NoHeader);
}
let rows: Vec<Vec<String>> = iter
.map(|row| {
let mut cells: Vec<String> = row.iter().map(cell_to_string).collect();
cells.resize(columns.len(), String::new());
cells.truncate(columns.len());
cells
})
.collect();
if rows.is_empty() {
return Err(ImportError::Empty);
}
Ok(ParsedFile { columns, rows })
}
fn cell_to_string(cell: &Data) -> String {
match cell {
Data::String(s) => s.clone(),
Data::Float(f) => {
if f.fract() == 0.0 { format!("{}", *f as i64) } else { format!("{}", f) }
}
Data::Int(i) => i.to_string(),
Data::Bool(b) => b.to_string(),
Data::DateTime(dt) => {
// ExcelDateTime::to_ymd_hms_milli() works without the chrono feature.
let (year, month, day, _, _, _, _) = dt.to_ymd_hms_milli();
format!("{:04}-{:02}-{:02}", year, month, day)
}
Data::DateTimeIso(s) => s.clone(),
Data::DurationIso(s) => s.clone(),
Data::Empty | Data::Error(_) => String::new(),
// Fallback for unexpected calamine Data variants; renders as debug string
other => format!("{other:?}"),
}
}