refactor: 5 architectural improvements (Tasks 2-5 + Task 6 fix)
Some checks failed
lint / lint (push) Has been cancelled
test / unit (push) Has been cancelled
test / integration (push) Has been cancelled
lint / lint (pull_request) Failing after 5m2s
test / unit (pull_request) Successful in 16m19s
test / integration (pull_request) Failing after 17m15s

- feat(domain): Hashtag value object with canonical extract() — unifies two
  divergent private implementations; fields pre-compute raw/normalized/url_slug/ap_name

- feat(presentation): Deps<S: FromAppState> extractor — each handler now
  declares its exact dependency surface; AppState unchanged; handlers
  become unit-testable without mocking all 20 deps

- refactor(feed): replace 5 flat FeedRepository methods with FeedQuery/FeedScope
  — single query() method; SQL shared logic lives once; adding feed types
  no longer requires 5 edits

- refactor(activitypub): ActivityPubRepository + OutboundFederationPort moved
  out of domain::ports into activitypub-base::ap_ports — domain crate no
  longer knows about AP IDs, inboxes, or actor URLs

- fix(outbox): OutboxRelay now opens a per-row transaction so FOR UPDATE
  SKIP LOCKED actually holds the lock during publish + mark_delivered
This commit is contained in:
2026-05-15 18:54:20 +02:00
parent 6024a65060
commit 0592861edd
37 changed files with 1401 additions and 865 deletions

View File

@@ -0,0 +1,139 @@
use std::collections::HashSet;
/// A hashtag extracted from content.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Hashtag {
/// Original casing, e.g. "Rust"
pub raw: String,
/// Lowercased, e.g. "rust" — used for DB lookups
pub normalized: String,
/// "tags/rust" — callers prepend base_url
pub url_slug: String,
/// "#rust" — used directly in AP tag array
pub ap_name: String,
}
/// Extract hashtags from content using a char-by-char scan.
///
/// Rules:
/// - Tag starts after a bare `#` followed immediately by an alphanumeric char.
/// - Tag chars: `[A-Za-z0-9_]`.
/// - Deduplicated case-insensitively; first occurrence wins.
/// - Returned in order of first appearance.
pub fn extract(content: &str) -> Vec<Hashtag> {
let mut seen: HashSet<String> = HashSet::new();
let mut tags: Vec<Hashtag> = Vec::new();
let mut chars = content.char_indices().peekable();
while let Some((_, c)) = chars.next() {
if c == '#'
&& chars
.peek()
.map(|(_, nc)| nc.is_alphanumeric())
.unwrap_or(false)
{
let raw: String = chars
.by_ref()
.take_while(|(_, nc)| nc.is_alphanumeric() || *nc == '_')
.map(|(_, nc)| nc)
.collect();
if raw.is_empty() {
continue;
}
let normalized = raw.to_lowercase();
if seen.insert(normalized.clone()) {
tags.push(Hashtag {
url_slug: format!("tags/{}", normalized),
ap_name: format!("#{}", normalized),
raw,
normalized,
});
}
}
}
tags
}
#[cfg(test)]
mod tests {
use super::*;
fn names(tags: &[Hashtag]) -> Vec<&str> {
tags.iter().map(|h| h.normalized.as_str()).collect()
}
#[test]
fn basic() {
let tags = extract("Hello #world and #Rust!");
assert_eq!(names(&tags), ["world", "rust"]);
}
#[test]
fn fields() {
let tags = extract("#Rust");
assert_eq!(tags.len(), 1);
let h = &tags[0];
assert_eq!(h.raw, "Rust");
assert_eq!(h.normalized, "rust");
assert_eq!(h.url_slug, "tags/rust");
assert_eq!(h.ap_name, "#rust");
}
#[test]
fn dedup_case_insensitive() {
let tags = extract("#rust #Rust #RUST");
assert_eq!(names(&tags), ["rust"]);
assert_eq!(tags[0].raw, "rust"); // first occurrence wins
}
#[test]
fn deduplicates_non_adjacent() {
// The old algorithm used Vec::dedup() which only removes adjacent duplicates.
// Using HashSet silently fixed this bug. This test documents the fix.
let tags = extract("#a #b #a");
assert_eq!(tags.len(), 2);
assert_eq!(tags[0].normalized, "a");
assert_eq!(tags[1].normalized, "b");
}
#[test]
fn mid_word_extracted() {
// `text#tag` — `#` not preceded by whitespace is still matched by the
// char-by-char scan (the old algorithm didn't require whitespace before `#`).
// This test documents the authoritative behaviour: mid-word tags ARE extracted.
let tags = extract("text#tag");
assert_eq!(names(&tags), ["tag"]);
}
#[test]
fn hash_only_ignored() {
assert!(extract("# lone hash").is_empty());
}
#[test]
fn trailing_punctuation_excluded() {
// punctuation after tag terminates the tag, not included
let tags = extract("#rust.");
assert_eq!(names(&tags), ["rust"]);
}
#[test]
fn underscore_allowed() {
let tags = extract("#hello_world");
assert_eq!(names(&tags), ["hello_world"]);
}
#[test]
fn empty_content() {
assert!(extract("").is_empty());
}
#[test]
fn order_of_appearance() {
let tags = extract("#b #a #c");
assert_eq!(names(&tags), ["b", "a", "c"]);
}
}

View File

@@ -1,5 +1,6 @@
pub mod errors;
pub mod events;
pub mod hashtag;
pub mod models;
pub mod ports;
pub mod value_objects;

View File

@@ -317,37 +317,43 @@ impl<
{
}
#[derive(Debug, Clone)]
pub enum FeedScope {
Home { following_ids: Vec<UserId> },
Public,
Tag { tag_name: String },
User { user_id: UserId },
Search { query: String },
}
#[derive(Debug, Clone)]
pub struct FeedQuery {
pub scope: FeedScope,
pub page: PageParams,
pub viewer_id: Option<UserId>,
}
impl FeedQuery {
pub fn home(viewer_id: UserId, following_ids: Vec<UserId>, page: PageParams) -> Self {
Self { scope: FeedScope::Home { following_ids }, page, viewer_id: Some(viewer_id) }
}
pub fn public(page: PageParams, viewer_id: Option<UserId>) -> Self {
Self { scope: FeedScope::Public, page, viewer_id }
}
pub fn tag(tag_name: impl Into<String>, page: PageParams, viewer_id: Option<UserId>) -> Self {
Self { scope: FeedScope::Tag { tag_name: tag_name.into() }, page, viewer_id }
}
pub fn user(user_id: UserId, page: PageParams, viewer_id: Option<UserId>) -> Self {
Self { scope: FeedScope::User { user_id }, page, viewer_id }
}
pub fn search(query: impl Into<String>, page: PageParams, viewer_id: Option<UserId>) -> Self {
Self { scope: FeedScope::Search { query: query.into() }, page, viewer_id }
}
}
#[async_trait]
pub trait FeedRepository: Send + Sync {
async fn home_feed(
&self,
following_ids: &[UserId],
page: &PageParams,
viewer_id: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError>;
async fn public_feed(
&self,
page: &PageParams,
viewer_id: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError>;
async fn search(
&self,
query: &str,
page: &PageParams,
viewer_id: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError>;
async fn tag_feed(
&self,
tag_name: &str,
page: &PageParams,
viewer_id: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError>;
async fn user_feed(
&self,
user_id: &UserId,
page: &PageParams,
viewer_id: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError>;
async fn query(&self, q: &FeedQuery) -> Result<Paginated<FeedEntry>, DomainError>;
}
#[async_trait]
@@ -368,166 +374,6 @@ pub trait SearchPort: Send + Sync {
) -> Result<Paginated<User>, DomainError>;
}
/// AP-protocol endpoints for a locally-stored user (local or interned remote).
#[derive(Debug, Clone)]
pub struct ActorApUrls {
pub ap_id: String,
pub inbox_url: String,
}
/// A local thought ready for AP serialization, with the author's username
/// pre-joined so the handler can build AP URLs without a second query.
#[derive(Debug, Clone)]
pub struct OutboxEntry {
pub thought: crate::models::thought::Thought,
pub author_username: Username,
}
#[async_trait]
pub trait ActivityPubRepository: Send + Sync {
// ── Outbox (local → remote) ──────────────────────────────────────
/// All public local thoughts for this actor. Used for outbox totals
/// and full-collection delivery.
async fn outbox_entries_for_actor(
&self,
user_id: &UserId,
) -> Result<Vec<OutboxEntry>, DomainError>;
/// Cursor page of public local thoughts, newest-first, before `before`.
/// Used for OrderedCollectionPage responses.
async fn outbox_page_for_actor(
&self,
user_id: &UserId,
before: Option<chrono::DateTime<chrono::Utc>>,
limit: usize,
) -> Result<Vec<OutboxEntry>, DomainError>;
// ── Remote actor resolution ──────────────────────────────────────
/// Find the local UserId for a remote actor by its AP URL.
async fn find_remote_actor_id(&self, actor_ap_url: &str)
-> Result<Option<UserId>, DomainError>;
/// Ensure a remote actor placeholder exists; create one if absent.
/// Idempotent — safe to call multiple times with the same URL.
async fn intern_remote_actor(&self, actor_ap_url: &str) -> Result<UserId, DomainError>;
/// Update display_name and avatar_url for an already-interned remote actor.
async fn update_remote_actor_display(
&self,
user_id: &UserId,
display_name: Option<&str>,
avatar_url: Option<&str>,
) -> Result<(), DomainError>;
// ── Inbox processing (remote → local) ───────────────────────────
/// Persist an incoming remote Note. Idempotent on ap_id.
#[allow(clippy::too_many_arguments)]
async fn accept_note(
&self,
ap_id: &str,
author_id: &UserId,
content: &str,
published: chrono::DateTime<chrono::Utc>,
sensitive: bool,
content_warning: Option<String>,
visibility: &str,
in_reply_to: Option<&str>,
) -> Result<(), DomainError>;
/// Apply an Update to a previously accepted remote Note.
async fn apply_note_update(&self, ap_id: &str, new_content: &str) -> Result<(), DomainError>;
/// Remove a specific remote Note (Delete activity). Only touches
/// remotely-originated thoughts.
async fn retract_note(&self, ap_id: &str) -> Result<(), DomainError>;
/// Remove all Notes from a remote actor (actor-level Delete/Tombstone).
async fn retract_actor_notes(&self, actor_ap_url: &str) -> Result<(), DomainError>;
// ── Node-level stats ─────────────────────────────────────────────
/// Total locally-authored thought count for NodeInfo responses.
async fn count_local_notes(&self) -> Result<u64, DomainError>;
/// Return the ActivityPub object URL for a thought, if one is stored.
/// Returns None for local thoughts (caller constructs URL from base_url + thought_id).
async fn get_thought_ap_id(
&self,
thought_id: &ThoughtId,
) -> Result<Option<String>, DomainError>;
/// Return the AP actor URL and inbox URL for a user, if stored.
/// Returns None for users that have not been federated.
async fn get_actor_ap_urls(&self, user_id: &UserId)
-> Result<Option<ActorApUrls>, DomainError>;
}
#[async_trait]
pub trait OutboundFederationPort: Send + Sync {
/// Fan out a new local Note to all accepted followers.
async fn broadcast_create(
&self,
author_user_id: &UserId,
thought: &Thought,
author_username: &str,
in_reply_to_url: Option<&str>,
) -> Result<(), DomainError>;
/// Fan out a Delete tombstone for a now-deleted local Note.
/// `thought_ap_id` is pre-constructed by the caller because the thought
/// has already been deleted from the DB when this fires.
async fn broadcast_delete(
&self,
author_user_id: &UserId,
thought_ap_id: &str,
) -> Result<(), DomainError>;
/// Fan out an Update(Note) for an edited local thought.
async fn broadcast_update(
&self,
author_user_id: &UserId,
thought: &Thought,
author_username: &str,
in_reply_to_url: Option<&str>,
) -> Result<(), DomainError>;
/// Fan out an Announce(object_ap_id) for a boost.
async fn broadcast_announce(
&self,
booster_user_id: &UserId,
object_ap_id: &str,
) -> Result<(), DomainError>;
/// Fan out an Undo(Announce) to followers when a boost is removed.
async fn broadcast_undo_announce(
&self,
booster_user_id: &UserId,
object_ap_id: &str,
) -> Result<(), DomainError>;
/// Send a Like activity to a remote thought author's inbox.
/// Only called when a LOCAL user likes a REMOTE thought (one with an ap_id).
async fn broadcast_like(
&self,
liker_user_id: &UserId,
object_ap_id: &str,
author_inbox_url: &str,
) -> Result<(), DomainError>;
/// Send Undo(Like) to a remote thought author's inbox.
async fn broadcast_undo_like(
&self,
liker_user_id: &UserId,
object_ap_id: &str,
author_inbox_url: &str,
) -> Result<(), DomainError>;
/// Fan out an Update(Actor) to all accepted followers after a profile change.
async fn broadcast_actor_update(&self, user_id: &UserId) -> Result<(), DomainError>;
}
#[async_trait]
pub trait FederationSchedulerPort: Send + Sync {

View File

@@ -39,8 +39,6 @@ pub struct TestStore {
pub actor_ap_ids: Arc<Mutex<HashMap<String, UserId>>>,
/// ThoughtId → AP object URL (used by get_thought_ap_id)
pub thought_ap_ids: Arc<Mutex<HashMap<ThoughtId, String>>>,
/// UserId → ActorApUrls (used by get_actor_ap_urls)
pub actor_ap_urls: Arc<Mutex<HashMap<UserId, ActorApUrls>>>,
}
#[async_trait]
@@ -706,63 +704,7 @@ impl RemoteActorConnectionRepository for TestStore {
#[async_trait]
impl FeedRepository for TestStore {
async fn home_feed(
&self,
_ids: &[UserId],
_p: &PageParams,
_v: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError> {
Ok(Paginated {
items: vec![],
total: 0,
page: 1,
per_page: 20,
})
}
async fn public_feed(
&self,
_p: &PageParams,
_v: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError> {
Ok(Paginated {
items: vec![],
total: 0,
page: 1,
per_page: 20,
})
}
async fn search(
&self,
_q: &str,
_p: &PageParams,
_v: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError> {
Ok(Paginated {
items: vec![],
total: 0,
page: 1,
per_page: 20,
})
}
async fn tag_feed(
&self,
_tag_name: &str,
_page: &PageParams,
_viewer_id: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError> {
Ok(Paginated {
items: vec![],
total: 0,
page: 1,
per_page: 20,
})
}
async fn user_feed(
&self,
_user_id: &UserId,
_page: &PageParams,
_viewer_id: Option<&UserId>,
) -> Result<Paginated<FeedEntry>, DomainError> {
async fn query(&self, _q: &crate::ports::FeedQuery) -> Result<Paginated<FeedEntry>, DomainError> {
Ok(Paginated {
items: vec![],
total: 0,
@@ -801,109 +743,6 @@ impl SearchPort for TestStore {
}
}
#[async_trait]
impl ActivityPubRepository for TestStore {
async fn outbox_entries_for_actor(
&self,
_uid: &UserId,
) -> Result<Vec<crate::ports::OutboxEntry>, DomainError> {
Ok(vec![])
}
async fn outbox_page_for_actor(
&self,
_uid: &UserId,
_before: Option<chrono::DateTime<chrono::Utc>>,
_limit: usize,
) -> Result<Vec<crate::ports::OutboxEntry>, DomainError> {
Ok(vec![])
}
async fn find_remote_actor_id(
&self,
actor_ap_url: &str,
) -> Result<Option<UserId>, DomainError> {
Ok(self.actor_ap_ids.lock().unwrap().get(actor_ap_url).cloned())
}
async fn intern_remote_actor(&self, actor_ap_url: &str) -> Result<UserId, DomainError> {
if let Some(uid) = self.find_remote_actor_id(actor_ap_url).await? {
return Ok(uid);
}
let uid = UserId::new();
let handle = url::Url::parse(actor_ap_url)
.map(|u| u.path().trim_start_matches('/').replace('/', "_"))
.unwrap_or_else(|_| format!("remote_{}", &uid.to_string()[..8]));
let user = crate::models::user::User {
id: uid.clone(),
username: Username::from_trusted(handle.clone()),
email: Email::from_trusted(format!("{}@remote", uid)),
password_hash: PasswordHash("".into()),
display_name: None,
bio: None,
avatar_url: None,
header_url: None,
custom_css: None,
local: false,
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
};
self.users.lock().unwrap().push(user);
self.actor_ap_ids
.lock()
.unwrap()
.insert(actor_ap_url.to_string(), uid.clone());
Ok(uid)
}
async fn update_remote_actor_display(
&self,
_user_id: &UserId,
_display_name: Option<&str>,
_avatar_url: Option<&str>,
) -> Result<(), DomainError> {
Ok(())
}
async fn accept_note(
&self,
_ap_id: &str,
_author_id: &UserId,
_content: &str,
_published: chrono::DateTime<chrono::Utc>,
_sensitive: bool,
_content_warning: Option<String>,
_visibility: &str,
_in_reply_to: Option<&str>,
) -> Result<(), DomainError> {
Ok(())
}
async fn apply_note_update(&self, _ap_id: &str, _new_content: &str) -> Result<(), DomainError> {
Ok(())
}
async fn retract_note(&self, _ap_id: &str) -> Result<(), DomainError> {
Ok(())
}
async fn retract_actor_notes(&self, _actor_ap_url: &str) -> Result<(), DomainError> {
Ok(())
}
async fn count_local_notes(&self) -> Result<u64, DomainError> {
Ok(self
.thoughts
.lock()
.unwrap()
.iter()
.filter(|t| t.local)
.count() as u64)
}
async fn get_thought_ap_id(
&self,
thought_id: &ThoughtId,
) -> Result<Option<String>, DomainError> {
Ok(self.thought_ap_ids.lock().unwrap().get(thought_id).cloned())
}
async fn get_actor_ap_urls(
&self,
user_id: &UserId,
) -> Result<Option<ActorApUrls>, DomainError> {
Ok(self.actor_ap_urls.lock().unwrap().get(user_id).cloned())
}
}
#[async_trait]
impl FederationSchedulerPort for TestStore {
@@ -964,31 +803,6 @@ impl OutboxWriter for NoOpOutboxWriter {
}
}
#[cfg(test)]
mod ap_repo_tests {
use super::*;
use crate::value_objects::UserId;
#[tokio::test]
async fn test_store_outbox_returns_empty() {
let store = TestStore::default();
let result = store
.outbox_entries_for_actor(&UserId::new())
.await
.unwrap();
assert!(result.is_empty());
}
#[tokio::test]
async fn test_store_intern_creates_placeholder() {
let store = TestStore::default();
let url = "https://example.com/users/alice";
let id1 = store.intern_remote_actor(url).await.unwrap();
let id2 = store.intern_remote_actor(url).await.unwrap();
assert_eq!(id1, id2, "intern must be idempotent");
}
}
#[cfg(test)]
mod federation_port_tests {
use super::*;