From 1d35cedf3f8a9df7eeb742c7e03d54272b78afce Mon Sep 17 00:00:00 2001 From: Gabriel Kaszewski Date: Sat, 16 May 2026 02:43:39 +0200 Subject: [PATCH] docs: federated hashtag indexing implementation plan --- .../2026-05-16-federated-hashtag-indexing.md | 402 ++++++++++++++++++ 1 file changed, 402 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-16-federated-hashtag-indexing.md diff --git a/docs/superpowers/plans/2026-05-16-federated-hashtag-indexing.md b/docs/superpowers/plans/2026-05-16-federated-hashtag-indexing.md new file mode 100644 index 0000000..6fb64b2 --- /dev/null +++ b/docs/superpowers/plans/2026-05-16-federated-hashtag-indexing.md @@ -0,0 +1,402 @@ +# Federated Hashtag Indexing Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** When a federated Note arrives via the ActivityPub inbox, extract its hashtags from the AP `tag` array and attach them to the stored thought so they appear in tag feeds. + +**Architecture:** `accept_note` is changed to return `ThoughtId` (instead of `()`) by doing a SELECT after the INSERT. The `ThoughtsObjectHandler` gains a `TagRepository` dependency; after `accept_note` succeeds, it walks the Note's `tag` array, finds entries with `type == "Hashtag"`, strips and lowercases the `name`, and calls `tag_repo.find_or_create` + `tag_repo.attach_to_thought`. Bootstrap wires the new dependency. + +**Tech Stack:** Rust, sqlx (Postgres), async-trait, `serde_json::Value` (AP tag array), `domain::ports::TagRepository` + +--- + +## Key Facts + +- `ActivityPubRepository::accept_note` is defined in `crates/adapters/activitypub-base/src/ap_ports.rs` line 65 — currently returns `Result<(), DomainError>` +- `PgActivityPubRepository::accept_note` is in `crates/adapters/postgres/src/activitypub.rs` line 213 — does `INSERT ... ON CONFLICT(ap_id) DO NOTHING` and maps result to `()` +- `ThoughtsObjectHandler` is in `crates/adapters/activitypub/src/handler.rs` — has `repo: Arc` and calls `accept_note` at line 141 +- `note.tag` is `Vec` (see `crates/adapters/activitypub/src/note.rs` line 28) +- `TagRepository::find_or_create(&str) -> Result` and `attach_to_thought(&ThoughtId, i32) -> Result<(), DomainError>` are in `crates/domain/src/ports.rs` lines 158–163 +- `ThoughtId` is already imported in `postgres/src/activitypub.rs` (line 13) +- Bootstrap wires `ThoughtsObjectHandler::new(...)` at `crates/bootstrap/src/factory.rs` line 76 +- Baseline: `cargo test` passes 149 tests + +--- + +## File Map + +| File | Change | +|---|---| +| `crates/adapters/activitypub-base/src/ap_ports.rs` | `accept_note` return: `() → ThoughtId` | +| `crates/adapters/postgres/src/activitypub.rs` | Return `ThoughtId` from `accept_note` — INSERT then SELECT | +| `crates/adapters/activitypub/src/handler.rs` | Add `tag_repo` field; extract hashtags + attach after `accept_note` | +| `crates/bootstrap/src/factory.rs` | Pass `PgTagRepository` to `ThoughtsObjectHandler::new` | + +--- + +## Task 1: Change `accept_note` trait signature to return `ThoughtId` + +**Files:** +- Modify: `crates/adapters/activitypub-base/src/ap_ports.rs:65-75` + +- [ ] **Step 1: Add `ThoughtId` import** + +At the top of `crates/adapters/activitypub-base/src/ap_ports.rs`, `ThoughtId` must be in scope. Check existing imports. If not present, add: + +```rust +use domain::value_objects::ThoughtId; +``` + +- [ ] **Step 2: Change the return type** + +Find `accept_note` (line 65). Change `-> Result<(), DomainError>` to `-> Result`: + +```rust +/// Persist an incoming remote Note. Idempotent on ap_id. +#[allow(clippy::too_many_arguments)] +async fn accept_note( + &self, + ap_id: &str, + author_id: &UserId, + content: &str, + published: chrono::DateTime, + sensitive: bool, + content_warning: Option, + visibility: &str, + in_reply_to: Option<&str>, +) -> Result; +``` + +- [ ] **Step 3: Verify it compiles (expect errors in impl)** + +```bash +cargo build -p activitypub-base 2>&1 | tail -5 +``` + +Expected: the `activitypub-base` crate itself compiles. Errors in `postgres` and `activitypub` crates are expected — fixed in next tasks. + +- [ ] **Step 4: Commit** + +```bash +git add crates/adapters/activitypub-base/src/ap_ports.rs +git commit -m "refactor(ap-ports): accept_note returns ThoughtId instead of ()" +``` + +--- + +## Task 2: Update `PgActivityPubRepository::accept_note` to return `ThoughtId` + +**Files:** +- Modify: `crates/adapters/postgres/src/activitypub.rs:213-256` + +- [ ] **Step 1: Write a failing test for the new return type** + +In `crates/adapters/postgres/src/activitypub.rs`, find the `#[cfg(test)]` block (around line 330). Add: + +```rust +#[sqlx::test(migrations = "./migrations")] +async fn accept_note_returns_thought_id(pool: sqlx::PgPool) { + let repo = PgActivityPubRepository::new(pool.clone()); + // Create a remote actor first + let actor_user_id = repo + .intern_remote_actor("https://remote.example/users/alice") + .await + .unwrap(); + + let thought_id = repo + .accept_note( + "https://remote.example/notes/1", + &actor_user_id, + "Hello #rust world", + chrono::Utc::now(), + false, + None, + "public", + None, + ) + .await + .unwrap(); + + // Verify the returned ThoughtId is a real UUID in the DB + let row: (uuid::Uuid,) = sqlx::query_as("SELECT id FROM thoughts WHERE ap_id=$1") + .bind("https://remote.example/notes/1") + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(thought_id.as_uuid(), row.0); +} +``` + +- [ ] **Step 2: Run to confirm it fails** + +```bash +cargo test -p postgres accept_note_returns_thought_id 2>&1 | tail -10 +``` + +Expected: compile error — `accept_note` still returns `()`. + +- [ ] **Step 3: Update the implementation** + +Replace the `accept_note` body (lines 213–256). The change: after the INSERT, SELECT the `id` by `ap_id` and wrap it in `ThoughtId`. + +```rust +async fn accept_note( + &self, + ap_id: &str, + author_id: &UserId, + content: &str, + published: DateTime, + sensitive: bool, + content_warning: Option, + visibility: &str, + in_reply_to: Option<&str>, +) -> Result { + let capped: String = content.chars().take(MAX_REMOTE_CONTENT_CHARS).collect(); + let (in_reply_to_id, in_reply_to_url) = match in_reply_to { + Some(url) => { + let local_uuid = url::Url::parse(url).ok().and_then(|u| { + u.path() + .strip_prefix(THOUGHTS_PATH_PREFIX) + .and_then(|s| s.split('/').next()) + .and_then(|s| uuid::Uuid::parse_str(s).ok()) + }); + (local_uuid, Some(url.to_string())) + } + None => (None, None), + }; + sqlx::query( + "INSERT INTO thoughts(id,user_id,content,ap_id,visibility,sensitive,local,content_warning,created_at,in_reply_to_id,in_reply_to_url) + VALUES($1,$2,$3,$4,$8,$5,false,$6,$7,$9,$10) ON CONFLICT(ap_id) DO NOTHING", + ) + .bind(uuid::Uuid::new_v4()) + .bind(author_id.as_uuid()) + .bind(&capped) + .bind(ap_id) + .bind(sensitive) + .bind(content_warning) + .bind(published) + .bind(visibility) + .bind(in_reply_to_id) + .bind(&in_reply_to_url) + .execute(&self.pool) + .await + .into_domain()?; + + // SELECT the id regardless of whether the INSERT was a no-op (idempotent). + let row: (uuid::Uuid,) = + sqlx::query_as("SELECT id FROM thoughts WHERE ap_id=$1") + .bind(ap_id) + .fetch_one(&self.pool) + .await + .into_domain()?; + Ok(ThoughtId::from_uuid(row.0)) +} +``` + +`ThoughtId::from_uuid(Uuid) -> ThoughtId` is generated by the `uuid_id!` macro in `crates/domain/src/value_objects.rs` — the call above is correct. + +- [ ] **Step 4: Run the test** + +```bash +cargo test -p postgres accept_note_returns_thought_id 2>&1 | tail -5 +``` + +Expected: PASS. + +- [ ] **Step 5: Run all backend tests** + +```bash +cargo test 2>&1 | tail -5 +``` + +Expected: 149+ tests pass (the new test is extra). + +- [ ] **Step 6: Commit** + +```bash +git add crates/adapters/postgres/src/activitypub.rs +git commit -m "fix(postgres): accept_note returns ThoughtId via SELECT after INSERT" +``` + +--- + +## Task 3: Add hashtag indexing to `ThoughtsObjectHandler` + +**Files:** +- Modify: `crates/adapters/activitypub/src/handler.rs` + +- [ ] **Step 1: Add `TagRepository` import and field** + +At the top of `handler.rs`, add: +```rust +use domain::ports::TagRepository; +``` + +Add `tag_repo` to the struct and constructor: + +```rust +pub struct ThoughtsObjectHandler { + repo: Arc, + urls: ThoughtsUrls, + event_publisher: Option>, + tag_repo: Arc, +} + +impl ThoughtsObjectHandler { + pub fn new( + repo: Arc, + base_url: &str, + event_publisher: Option>, + tag_repo: Arc, + ) -> Self { + Self { + repo, + urls: ThoughtsUrls::new(base_url), + event_publisher, + tag_repo, + } + } +} +``` + +- [ ] **Step 2: Use the returned `ThoughtId` and attach hashtags** + +In the `handle_note` (or equivalent) method, find where `accept_note` is called (around line 141). Change: + +```rust +// Before: +self.repo + .accept_note( + ap_id.as_str(), + &author_id, + ¬e.content, + note.published, + note.sensitive, + note.summary, + visibility, + note.in_reply_to.as_ref().map(|u| u.as_str()), + ) + .await + .map_err(|e| anyhow!("{e}"))?; +``` + +To: + +```rust +// After: +let thought_id = self.repo + .accept_note( + ap_id.as_str(), + &author_id, + ¬e.content, + note.published, + note.sensitive, + note.summary, + visibility, + note.in_reply_to.as_ref().map(|u| u.as_str()), + ) + .await + .map_err(|e| anyhow!("{e}"))?; + +// Extract hashtags from the AP tag array and index them. +let hashtag_names: Vec = note + .tag + .iter() + .filter(|t| t.get("type").and_then(|v| v.as_str()) == Some("Hashtag")) + .filter_map(|t| t.get("name").and_then(|v| v.as_str())) + .map(|name| name.trim_start_matches('#').to_lowercase()) + .filter(|name| !name.is_empty()) + .collect(); + +for name in hashtag_names { + if let Ok(tag) = self.tag_repo.find_or_create(&name).await { + let _ = self.tag_repo.attach_to_thought(&thought_id, tag.id).await; + } +} +``` + +- [ ] **Step 3: Build to catch errors** + +```bash +cargo build -p activitypub 2>&1 | tail -10 +``` + +Expected: compiles. If `TagRepository` or `ThoughtId` imports are wrong, fix them now. + +- [ ] **Step 4: Run all tests** + +```bash +cargo test 2>&1 | tail -5 +``` + +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add crates/adapters/activitypub/src/handler.rs +git commit -m "feat(activitypub): index hashtags from incoming federated notes" +``` + +--- + +## Task 4: Update bootstrap to inject `TagRepository` + +**Files:** +- Modify: `crates/bootstrap/src/factory.rs:76-80` + +- [ ] **Step 1: Find the `ThoughtsObjectHandler::new(...)` call** + +Around line 76: +```rust +Arc::new(ThoughtsObjectHandler::new( + Arc::new(PgActivityPubRepository::new(pool.clone())), + &cfg.base_url, + Some(event_publisher.clone()), +)) +``` + +- [ ] **Step 2: Add the `tag_repo` argument** + +```rust +Arc::new(ThoughtsObjectHandler::new( + Arc::new(PgActivityPubRepository::new(pool.clone())), + &cfg.base_url, + Some(event_publisher.clone()), + Arc::new(postgres::tag::PgTagRepository::new(pool.clone())), +)) +``` + +`PgTagRepository` is already imported/used in the same file (line 99 constructs one for `AppState`). No new import needed. + +- [ ] **Step 3: Build the full workspace** + +```bash +cargo build 2>&1 | tail -5 +``` + +Expected: clean build, no errors. + +- [ ] **Step 4: Run all tests** + +```bash +cargo test 2>&1 | tail -5 +``` + +Expected: 150+ tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add crates/bootstrap/src/factory.rs +git commit -m "feat(bootstrap): inject TagRepository into ThoughtsObjectHandler" +``` + +--- + +## Final Verification + +- [ ] `cargo test` passes all tests +- [ ] `grep -n "tag_repo" crates/adapters/activitypub/src/handler.rs` shows both the field and the usage after `accept_note` +- [ ] `grep -n "ThoughtId" crates/adapters/activitypub-base/src/ap_ports.rs` shows the return type on `accept_note` +- [ ] `grep -n "PgTagRepository" crates/bootstrap/src/factory.rs` shows two usages (existing AppState one + new handler one)