diff --git a/CHANGES.md b/CHANGES.md index 9398d5286..9dd38cb0c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -221,6 +221,20 @@ To be released. [#771]: https://github.com/fedify-dev/fedify/pull/771 [#772]: https://github.com/fedify-dev/fedify/pull/772 +### @fedify/backfill + + - Added *@fedify/backfill* for reconstructing ActivityPub conversations. + It supports FEP-f228 context collections containing post-like objects or + `Create` activities, optional reply-tree traversal, ordered hybrid + strategies, shared safety budgets, deduplication, and traversal-local + document caching. [[#275], [#779], [#801], [#807], [#816] by Jiwon Kwon] + +[#275]: https://github.com/fedify-dev/fedify/issues/275 +[#779]: https://github.com/fedify-dev/fedify/pull/779 +[#801]: https://github.com/fedify-dev/fedify/pull/801 +[#807]: https://github.com/fedify-dev/fedify/pull/807 +[#816]: https://github.com/fedify-dev/fedify/pull/816 + ### @fedify/fixture - Added `createTestMeterProvider()` and `TestMetricRecorder` helpers for diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9c421d48f..21327fa4e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -407,6 +407,8 @@ The repository is organized as a monorepo with the following packages: creating new Fedify projects. Wraps @fedify/init. - *packages/amqp/*: AMQP/RabbitMQ driver (@fedify/amqp) for Fedify. - *packages/astro/*: Astro integration (@fedify/astro) for Fedify. + - *packages/backfill/*: ActivityPub conversation backfill support + (@fedify/backfill) for Fedify. - *packages/cfworkers/*: Cloudflare Workers integration (@fedify/cfworkers) for Fedify. - *packages/debugger/*: Embedded ActivityPub debug dashboard diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 5c2a39a32..d58897b1f 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -137,6 +137,7 @@ const MANUAL = { { text: "Outbox listeners", link: "/manual/outbox.md" }, { text: "Sending activities", link: "/manual/send.md" }, { text: "Collections", link: "/manual/collections.md" }, + { text: "Conversation backfill", link: "/manual/backfill.md" }, { text: "Object dispatcher", link: "/manual/object.md" }, { text: "Access control", link: "/manual/access-control.md" }, { text: "WebFinger", link: "/manual/webfinger.md" }, diff --git a/docs/manual/backfill.md b/docs/manual/backfill.md new file mode 100644 index 000000000..74f52280f --- /dev/null +++ b/docs/manual/backfill.md @@ -0,0 +1,203 @@ +--- +description: >- + Reconstruct ActivityPub conversations from FEP-f228 context collections or + reply relationships using the @fedify/backfill package. +--- + +Conversation backfill +===================== + +*This API is available since Fedify 2.3.0.* + +Fedify provides the *@fedify/backfill* package for reconstructing ActivityPub +conversations that may be incomplete on the local server. It can retrieve +post-like objects from [FEP-f228] context collections and optionally crawl +`inReplyTo` ancestors and `replies` descendants. + +[FEP-f228]: https://w3id.org/fep/f228 + + +Installation +------------ + +::: code-group + +~~~~ sh [Deno] +deno add jsr:@fedify/backfill +~~~~ + +~~~~ sh [npm] +npm add @fedify/backfill +~~~~ + +~~~~ sh [pnpm] +pnpm add @fedify/backfill +~~~~ + +~~~~ sh [Yarn] +yarn add @fedify/backfill +~~~~ + +~~~~ sh [Bun] +bun add @fedify/backfill +~~~~ + +::: + + +Backfilling a conversation +-------------------------- + +The `backfill()` function accepts a backfill context, a seed object, and +traversal options. The context supplies a `documentLoader` for dereferencing +context collections, collection items, reply targets, and replies collections: + +~~~~ typescript twoslash +import { backfill, type BackfillDocumentLoader } from "@fedify/backfill"; +import { lookupObject, Note } from "@fedify/vocab"; + +declare const note: Note; +// ---cut-before--- +const documentLoader: BackfillDocumentLoader = (iri, options) => + lookupObject(iri, { signal: options?.signal }); + +for await ( + const item of backfill({ documentLoader }, note, { + maxItems: 20, + maxRequests: 50, + }) +) { + console.log(item.id?.href); +} +~~~~ + +The seed object itself is not yielded. If the same object appears in a +discovered collection, it is skipped by ID. + +By default, `backfill()` uses the `"context-auto"` strategy. It expects the +seed's `context` to dereference to a `Collection`, `OrderedCollection`, +`CollectionPage`, or `OrderedCollectionPage`. Ordinary post-like items are +yielded directly, while supported `Create` activities are unwrapped and their +objects are yielded. + +If the seed has no context, or its context resolves to a non-collection, +context strategies yield nothing. + + +Strategies +---------- + +Strategies run in the configured order. They share request and item budgets, +abort state, document caching, and object ID deduplication. If multiple +strategies discover the same object, the first one keeps its `BackfillItem` +metadata. + +`"context-auto"` +: Handles both direct post-like objects and supported `Create` activities + from a context collection. This is the default strategy. + +`"context-objects"` +: Accepts only post-like objects contained directly in a context collection: + + ~~~~ typescript twoslash + import { backfill, type BackfillContext } from "@fedify/backfill"; + import { Note } from "@fedify/vocab"; + + declare const context: BackfillContext; + declare const note: Note; + // ---cut-before--- + for await ( + const item of backfill(context, note, { + strategies: ["context-objects"], + }) + ) { + console.log(item.object); + } + ~~~~ + +`"context-activities"` +: Accepts supported activities from a context collection. It currently + supports `Create` and yields the activity's object rather than the activity + itself: + + ~~~~ typescript twoslash + import { backfill, type BackfillContext } from "@fedify/backfill"; + import { Note } from "@fedify/vocab"; + + declare const context: BackfillContext; + declare const note: Note; + // ---cut-before--- + for await ( + const item of backfill(context, note, { + strategies: ["context-activities"], + }) + ) { + console.log(item.object); + } + ~~~~ + +`"reply-tree"` +: Walks `inReplyTo` ancestors and `replies` descendants. It yields + post-like objects only and does not unwrap Activity objects. This strategy + is opt-in because it can require substantially more network requests than + a context collection. + +For hybrid coverage, run the FEP-f228 path first and use reply-tree traversal +after it: + +~~~~ typescript twoslash +import { backfill, type BackfillContext } from "@fedify/backfill"; +import { Note } from "@fedify/vocab"; + +declare const context: BackfillContext; +declare const note: Note; +// ---cut-before--- +for await ( + const item of backfill(context, note, { + strategies: ["context-auto", "reply-tree"], + maxDepth: 4, + }) +) { + console.log(item.origin, item.depth, item.object); +} +~~~~ + + +Traversal controls +------------------ + +`maxItems` +: Limits the number of yielded objects. Skipped duplicates do not count. + +`maxRequests` +: Limits calls to `documentLoader`. Embedded objects and collections do not + count as requests. + +`maxDepth` +: Limits reply-tree traversal and defaults to 10. Immediate parents and + direct replies have depth 1; their next-level parents or replies have depth + 2, and so on. Context collection items have depth 0 and are not limited by + this option. + +`interval` +: Adds a delay between `documentLoader` requests. A callback receives the + zero-based request index. String durations require the global `Temporal` + API or a polyfill; `Temporal.DurationLike` objects work without the global + API. + +`signal` +: Cancels traversal before requests and yields. The signal is also passed to + `documentLoader`. + + +Caching and failures +-------------------- + +Dereferenced documents are cached in memory for one `backfill()` traversal. +Applications that need persistent or shared caching can implement it in the +provided `documentLoader`. + +Failed external dereferences are skipped so other conversation items can still +be discovered. Failed loads are not retained in the traversal cache, allowing +the same IRI to be retried if another traversal path reaches it. Aborting the +provided signal stops traversal instead of skipping the request. diff --git a/docs/package.json b/docs/package.json index 9d59d59a7..6d6548d20 100644 --- a/docs/package.json +++ b/docs/package.json @@ -5,6 +5,7 @@ "@deno/kv": "^0.8.4", "@fedify/amqp": "workspace:^", "@fedify/astro": "workspace:^", + "@fedify/backfill": "workspace:^", "@fedify/cfworkers": "workspace:^", "@fedify/debugger": "workspace:^", "@fedify/express": "workspace:^", diff --git a/packages/backfill/README.md b/packages/backfill/README.md index cd1e5f33b..a9e4e06c5 100644 --- a/packages/backfill/README.md +++ b/packages/backfill/README.md @@ -11,7 +11,7 @@ This package provides ActivityPub conversation backfill support for the [Fedify] ecosystem. It can retrieve post-like objects from a seed object's -context collection, following the direct FEP-f228-style path where the +context collection, following the direct [FEP-f228] path where the context dereferences to a `Collection`, `OrderedCollection`, `CollectionPage`, or `OrderedCollectionPage`. It can also use an opt-in reply-tree strategy to walk `inReplyTo` ancestors and `replies` descendants when context collections @@ -24,6 +24,7 @@ are unavailable or incomplete. [@fedify@hollo.social badge]: https://fedi-badge.deno.dev/@fedify@hollo.social/followers.svg [@fedify@hollo.social]: https://hollo.social/@fedify [Fedify]: https://fedify.dev/ +[FEP-f228]: https://w3id.org/fep/f228 Installation @@ -73,6 +74,19 @@ collection items are treated as backfillable objects by default. If an item is recognized as a supported `Create` activity, `backfill()` extracts the activity's object instead. +To accept only post-like objects directly contained in the context collection, +use the `context-objects` strategy: + +~~~~ typescript +for await ( + const item of backfill({ documentLoader }, note, { + strategies: ["context-objects"], + }) +) { + console.log(item.object); +} +~~~~ + To read only FEP-f228 activity collections, enable the `context-activities` strategy: @@ -109,3 +123,30 @@ objects from Activity wrappers. Immediate parents and direct replies have depth 1, their next-level parents or replies have depth 2, and so on. Reply-tree traversal defaults to a maximum depth of 10; set `maxDepth` to use a different limit. + + +Traversal controls +------------------ + +All configured strategies share the same traversal controls: + + - `maxItems` limits the number of yielded objects. Skipped duplicates do + not count. + - `maxRequests` limits calls to `documentLoader`. Embedded objects and + collections do not count. + - `maxDepth` limits reply-tree traversal and defaults to 10. It does not + limit context collection items. + - `interval` adds a delay between loader requests. Its callback receives + the zero-based request index. + - `signal` cancels traversal and is forwarded to `documentLoader`. + +An `interval` string requires the global `Temporal` API or a polyfill. +`Temporal.DurationLike` objects work without the global API. + +If the seed has no context, or its context resolves to a non-collection, +context strategies yield nothing. Loader failures are skipped unless +traversal is aborted. + +Dereferenced documents are cached in memory for one `backfill()` traversal. +Applications that need persistent or shared caching can provide it through +the `documentLoader`. diff --git a/packages/backfill/src/backfill.ts b/packages/backfill/src/backfill.ts index 02cb32319..a836f4c4a 100644 --- a/packages/backfill/src/backfill.ts +++ b/packages/backfill/src/backfill.ts @@ -26,7 +26,7 @@ const DEFAULT_MAX_DEPTH = 10; /** * Thrown when backfill traversal exceeds the configured request budget. * - * @since 2.x.0 + * @since 2.3.0 */ export class MaxRequestsExceeded extends Error {} @@ -57,7 +57,7 @@ type ReplyTreeTraversal = { * The seed object is not yielded by default, but its ID is treated as already * seen so it will not be yielded again if the collection contains it. * - * @since 2.x.0 + * @since 2.3.0 */ export async function* backfill< TObject extends APObject = APObject, diff --git a/packages/backfill/src/types.ts b/packages/backfill/src/types.ts index c7d15f80d..58090e698 100644 --- a/packages/backfill/src/types.ts +++ b/packages/backfill/src/types.ts @@ -13,7 +13,7 @@ import type { Object as APObject } from "@fedify/vocab"; * - `"reply-tree"` walks the reply graph through `inReplyTo` ancestors and * `replies` descendants, yielding discovered post-like objects. * - * @since 2.x.0 + * @since 2.3.0 */ export type BackfillStrategy = | "context-objects" @@ -24,10 +24,9 @@ export type BackfillStrategy = /** * Source relation that produced a backfilled object. * - * @since 2.x.0 + * @since 2.3.0 */ export type BackfillOrigin = - | "context" | "collection" | "in-reply-to" | "replies"; @@ -35,7 +34,7 @@ export type BackfillOrigin = /** * Options passed to {@link BackfillDocumentLoader}. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillDocumentLoaderOptions { /** @@ -47,7 +46,7 @@ export interface BackfillDocumentLoaderOptions { /** * Dereferences an ActivityPub object or collection IRI. * - * @since 2.x.0 + * @since 2.3.0 */ export type BackfillDocumentLoader = ( iri: URL, @@ -57,7 +56,7 @@ export type BackfillDocumentLoader = ( /** * Dependencies used by backfill traversal. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillContext { /** @@ -70,7 +69,7 @@ export interface BackfillContext { /** * Controls backfill traversal. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillOptions< TObject extends APObject = APObject, @@ -86,7 +85,7 @@ export interface BackfillOptions< * If `"context-auto"` is included, it absorbs other context collection * strategies. * - * @since 2.x.0 + * @since 2.3.0 */ readonly strategies?: readonly BackfillStrategy[]; @@ -134,7 +133,7 @@ export interface BackfillOptions< /** * A single object discovered by backfill traversal. * - * @since 2.x.0 + * @since 2.3.0 */ export interface BackfillItem< TObject extends APObject = APObject, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index df3689951..b694adbe4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -199,6 +199,9 @@ importers: '@fedify/astro': specifier: workspace:^ version: link:../packages/astro + '@fedify/backfill': + specifier: workspace:^ + version: link:../packages/backfill '@fedify/cfworkers': specifier: workspace:^ version: link:../packages/cfworkers