importer feature
This commit is contained in:
49
crates/adapters/importer/src/parsers/csv.rs
Normal file
49
crates/adapters/importer/src/parsers/csv.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use crate::{ImportError, types::ParsedFile};
|
||||
|
||||
pub fn parse_csv(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
|
||||
if bytes.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
let delimiter = detect_delimiter(bytes);
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.delimiter(delimiter)
|
||||
.from_reader(bytes);
|
||||
|
||||
let columns: Vec<String> = rdr
|
||||
.headers()
|
||||
.map_err(|e| ImportError::Csv(e.to_string()))?
|
||||
.iter()
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect();
|
||||
|
||||
if columns.is_empty() {
|
||||
return Err(ImportError::NoHeader);
|
||||
}
|
||||
|
||||
let rows: Vec<Vec<String>> = rdr
|
||||
.records()
|
||||
.map(|r| {
|
||||
r.map_err(|e| ImportError::Csv(e.to_string()))
|
||||
.map(|rec| {
|
||||
let mut cells: Vec<String> = rec.iter().map(|f| f.trim().to_string()).collect();
|
||||
cells.resize(columns.len(), String::new());
|
||||
cells.truncate(columns.len());
|
||||
cells
|
||||
})
|
||||
})
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
if rows.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
Ok(ParsedFile { columns, rows })
|
||||
}
|
||||
|
||||
fn detect_delimiter(bytes: &[u8]) -> u8 {
|
||||
let first_line = bytes.split(|&b| b == b'\n').next().unwrap_or(bytes);
|
||||
let tabs = first_line.iter().filter(|&&b| b == b'\t').count();
|
||||
let commas = first_line.iter().filter(|&&b| b == b',').count();
|
||||
if tabs > commas { b'\t' } else { b',' }
|
||||
}
|
||||
43
crates/adapters/importer/src/parsers/json.rs
Normal file
43
crates/adapters/importer/src/parsers/json.rs
Normal file
@@ -0,0 +1,43 @@
|
||||
use serde_json::Value;
|
||||
use crate::{ImportError, types::ParsedFile};
|
||||
|
||||
pub fn parse_json(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
|
||||
let value: Value = serde_json::from_slice(bytes)
|
||||
.map_err(|e| ImportError::Json(e.to_string()))?;
|
||||
|
||||
let arr = value.as_array()
|
||||
.ok_or_else(|| ImportError::Json("expected a JSON array".into()))?;
|
||||
|
||||
if arr.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
let first = arr[0].as_object()
|
||||
.ok_or_else(|| ImportError::Json("array elements must be objects".into()))?;
|
||||
let columns: Vec<String> = first.keys().cloned().collect();
|
||||
|
||||
if columns.is_empty() {
|
||||
return Err(ImportError::NoHeader);
|
||||
}
|
||||
|
||||
let rows: Vec<Vec<String>> = arr.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, item)| {
|
||||
let obj = item.as_object()
|
||||
.ok_or_else(|| ImportError::Json(format!("element at index {} is not an object", idx)))?;
|
||||
Ok(columns.iter()
|
||||
.map(|col| obj.get(col).map(value_to_string).unwrap_or_default())
|
||||
.collect())
|
||||
})
|
||||
.collect::<Result<_, ImportError>>()?;
|
||||
|
||||
Ok(ParsedFile { columns, rows })
|
||||
}
|
||||
|
||||
fn value_to_string(v: &Value) -> String {
|
||||
match v {
|
||||
Value::String(s) => s.clone(),
|
||||
Value::Null => String::new(),
|
||||
other => other.to_string(),
|
||||
}
|
||||
}
|
||||
50
crates/adapters/importer/src/parsers/mod.rs
Normal file
50
crates/adapters/importer/src/parsers/mod.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
mod csv;
|
||||
mod json;
|
||||
#[cfg(feature = "xlsx")]
|
||||
mod xlsx;
|
||||
|
||||
pub use csv::parse_csv;
|
||||
pub use json::parse_json;
|
||||
#[cfg(feature = "xlsx")]
|
||||
pub use xlsx::parse_xlsx;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn csv_parses_headers_and_rows() {
|
||||
let data = b"title,rating,watched_at\nInception,5,2024-01-01\nDune,4,2024-02-15\n";
|
||||
let file = parse_csv(data).unwrap();
|
||||
assert_eq!(file.columns, vec!["title", "rating", "watched_at"]);
|
||||
assert_eq!(file.rows.len(), 2);
|
||||
assert_eq!(file.rows[0], vec!["Inception", "5", "2024-01-01"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn csv_rejects_empty() {
|
||||
assert!(parse_csv(b"").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tsv_parses_correctly() {
|
||||
let data = b"title\trating\nInception\t5\n";
|
||||
let file = parse_csv(data).unwrap();
|
||||
assert_eq!(file.columns, vec!["title", "rating"]);
|
||||
assert_eq!(file.rows[0], vec!["Inception", "5"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_array_of_objects() {
|
||||
let data = br#"[{"title":"Inception","rating":"5"},{"title":"Dune","rating":"4"}]"#;
|
||||
let file = parse_json(data).unwrap();
|
||||
assert_eq!(file.columns.len(), 2);
|
||||
assert!(file.columns.contains(&"title".to_string()));
|
||||
assert_eq!(file.rows.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_empty_array_errors() {
|
||||
assert!(parse_json(b"[]").is_err());
|
||||
}
|
||||
}
|
||||
64
crates/adapters/importer/src/parsers/xlsx.rs
Normal file
64
crates/adapters/importer/src/parsers/xlsx.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
use calamine::{Reader, open_workbook_from_rs, Xlsx, Data};
|
||||
use std::io::Cursor;
|
||||
use crate::{ImportError, types::ParsedFile};
|
||||
|
||||
pub fn parse_xlsx(bytes: &[u8]) -> Result<ParsedFile, ImportError> {
|
||||
let cursor = Cursor::new(bytes);
|
||||
let mut workbook: Xlsx<_> = open_workbook_from_rs(cursor)
|
||||
.map_err(|e: calamine::XlsxError| ImportError::Xlsx(e.to_string()))?;
|
||||
|
||||
let sheet_name = workbook.sheet_names()
|
||||
.first()
|
||||
.cloned()
|
||||
.ok_or(ImportError::Empty)?;
|
||||
|
||||
let range = workbook.worksheet_range(&sheet_name)
|
||||
.map_err(|e| ImportError::Xlsx(e.to_string()))?;
|
||||
|
||||
let mut iter = range.rows();
|
||||
|
||||
let header = iter.next().ok_or(ImportError::NoHeader)?;
|
||||
let columns: Vec<String> = header.iter()
|
||||
.map(|c| cell_to_string(c).trim().to_string())
|
||||
.collect();
|
||||
|
||||
if columns.is_empty() {
|
||||
return Err(ImportError::NoHeader);
|
||||
}
|
||||
|
||||
let rows: Vec<Vec<String>> = iter
|
||||
.map(|row| {
|
||||
let mut cells: Vec<String> = row.iter().map(cell_to_string).collect();
|
||||
cells.resize(columns.len(), String::new());
|
||||
cells.truncate(columns.len());
|
||||
cells
|
||||
})
|
||||
.collect();
|
||||
|
||||
if rows.is_empty() {
|
||||
return Err(ImportError::Empty);
|
||||
}
|
||||
|
||||
Ok(ParsedFile { columns, rows })
|
||||
}
|
||||
|
||||
fn cell_to_string(cell: &Data) -> String {
|
||||
match cell {
|
||||
Data::String(s) => s.clone(),
|
||||
Data::Float(f) => {
|
||||
if f.fract() == 0.0 { format!("{}", *f as i64) } else { format!("{}", f) }
|
||||
}
|
||||
Data::Int(i) => i.to_string(),
|
||||
Data::Bool(b) => b.to_string(),
|
||||
Data::DateTime(dt) => {
|
||||
// ExcelDateTime::to_ymd_hms_milli() works without the chrono feature.
|
||||
let (year, month, day, _, _, _, _) = dt.to_ymd_hms_milli();
|
||||
format!("{:04}-{:02}-{:02}", year, month, day)
|
||||
}
|
||||
Data::DateTimeIso(s) => s.clone(),
|
||||
Data::DurationIso(s) => s.clone(),
|
||||
Data::Empty | Data::Error(_) => String::new(),
|
||||
// Fallback for unexpected calamine Data variants; renders as debug string
|
||||
other => format!("{other:?}"),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user