Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion packages/backend/native/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ version = "1.0.0"
crate-type = ["cdylib"]

[dependencies]
affine_common = { workspace = true, features = ["doc-loader"] }
affine_common = { workspace = true, features = ["doc-loader", "hashcash"] }
chrono = { workspace = true }
file-format = { workspace = true }
infer = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions packages/common/native/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ name = "affine_common"
version = "0.1.0"

[features]
default = ["hashcash"]
default = []
doc-loader = [
"docx-parser",
"infer",
Expand Down Expand Up @@ -35,7 +35,7 @@ tree-sitter = [
"dep:tree-sitter-scala",
"dep:tree-sitter-typescript",
]
ydoc-loader = ["assert-json-diff", "y-octo"]
ydoc-loader = ["assert-json-diff", "serde", "serde_json", "thiserror", "y-octo"]

[dependencies]
chrono = { workspace = true }
Expand Down
2 changes: 2 additions & 0 deletions packages/common/nbstore/src/impls/sqlite/db.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { AutoReconnectConnection } from '../../connection';
import type {
BlobRecord,
CrawlResult,
DocClock,
DocRecord,
ListedBlobRecord,
Expand Down Expand Up @@ -81,6 +82,7 @@ export interface NativeDBApis {
peer: string,
blobId: string
) => Promise<Date | null>;
crawlDocData: (id: string, docId: string) => Promise<CrawlResult>;
}

type NativeDBApisWrapper = NativeDBApis extends infer APIs
Expand Down
125 changes: 125 additions & 0 deletions packages/common/nbstore/src/impls/sqlite/doc.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { share } from '../../connection';
import {
type BlockInfo,
type CrawlResult,
type DocClocks,
type DocRecord,
DocStorageBase,
Expand Down Expand Up @@ -79,4 +81,127 @@ export class SqliteDocStorage extends DocStorageBase<SqliteNativeDBOptions> {
updates.map(update => update.timestamp)
);
}

override async crawlDocData(docId: string): Promise<CrawlResult | null> {
const result = await this.db.crawlDocData(docId);
return normalizeNativeCrawlResult(result);
}
}

function normalizeNativeCrawlResult(result: unknown): CrawlResult | null {
if (!isRecord(result)) {
console.warn('[nbstore] crawlDocData returned non-object result');
return null;
}

if (
typeof result.title !== 'string' ||
typeof result.summary !== 'string' ||
!Array.isArray(result.blocks)
) {
console.warn('[nbstore] crawlDocData result missing basic fields');
return null;
}

const { title, summary } = result as { title: string; summary: string };
const rawBlocks = result.blocks as unknown[];

const blocks: BlockInfo[] = [];
for (const block of rawBlocks) {
const normalized = normalizeBlock(block);
if (normalized) {
blocks.push(normalized);
}
}

if (blocks.length === 0) {
console.warn('[nbstore] crawlDocData has no valid blocks');
return null;
}

return {
blocks,
title,
summary,
};
}

function normalizeBlock(block: unknown): BlockInfo | null {
if (!isRecord(block)) {
return null;
}

const blockId = readStringField(block, 'blockId');
const flavour = readStringField(block, 'flavour');

if (!blockId || !flavour) {
return null;
}

return {
blockId,
flavour,
content: readStringArrayField(block, 'content'),
blob: readStringArrayField(block, 'blob'),
refDocId: readStringArrayField(block, 'refDocId'),
refInfo: readStringArrayField(block, 'refInfo'),
parentFlavour: readStringField(block, 'parentFlavour'),
parentBlockId: readStringField(block, 'parentBlockId'),
additional: safeAdditionalField(block),
};
}

function readStringField(
target: Record<string, unknown>,
key: string
): string | undefined {
const value = readField(target, key);
return typeof value === 'string' && value ? value : undefined;
}

function readStringArrayField(
target: Record<string, unknown>,
key: string
): string[] | undefined {
const value = readField(target, key);
if (Array.isArray(value)) {
const filtered = value.filter(
(item): item is string => typeof item === 'string' && item.length > 0
);
return filtered.length ? filtered : undefined;
}
if (typeof value === 'string' && value.length > 0) {
return [value];
}
return undefined;
}

function safeAdditionalField(
target: Record<string, unknown>
): string | undefined {
const value = readField(target, 'additional');
if (typeof value !== 'string' || value.length === 0) {
return undefined;
}
try {
const parsed = JSON.parse(value);
return JSON.stringify(parsed);
} catch {
console.warn(
'[nbstore] ignore invalid additional payload in crawlDocData block'
);
return undefined;
}
}

function readField(target: Record<string, unknown>, key: string) {
return target[key] ?? target[toSnakeCase(key)];
}

function toSnakeCase(key: string) {
return key.replace(/[A-Z]/g, letter => `_${letter.toLowerCase()}`);
}

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null;
}
24 changes: 24 additions & 0 deletions packages/common/nbstore/src/storage/doc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ import type { Locker } from './lock';
import { SingletonLocker } from './lock';
import { type Storage } from './storage';

export interface BlockInfo {
blockId: string;
flavour: string;
content?: string[];
blob?: string[];
refDocId?: string[];
refInfo?: string[];
parentFlavour?: string;
parentBlockId?: string;
additional?: string;
}

export interface CrawlResult {
blocks: BlockInfo[];
title: string;
summary: string;
}

export interface DocClock {
docId: string;
timestamp: Date;
Expand Down Expand Up @@ -94,6 +112,8 @@ export interface DocStorage extends Storage {
subscribeDocUpdate(
callback: (update: DocRecord, origin?: string) => void
): () => void;

crawlDocData?(docId: string): Promise<CrawlResult | null>;
}

export abstract class DocStorageBase<Opts = {}> implements DocStorage {
Expand Down Expand Up @@ -174,6 +194,10 @@ export abstract class DocStorageBase<Opts = {}> implements DocStorage {
};
}

async crawlDocData(_docId: string): Promise<CrawlResult | null> {
return null;
}

// REGION: api for internal usage
protected on(
event: 'update',
Expand Down
77 changes: 57 additions & 20 deletions packages/common/nbstore/src/sync/indexer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -407,33 +407,40 @@ export class IndexerSyncImpl implements IndexerSync {
continue;
}

const docBin = await this.doc.getDoc(docId);
if (!docBin) {
// doc is deleted, just skip
continue;
}
console.log('[indexer] start indexing doc', docId);
const docYDoc = new YDoc({ guid: docId });
applyUpdate(docYDoc, docBin.bin);

let blocks: IndexerDocument<'block'>[] = [];
let preview: string | undefined;

try {
const result = await crawlingDocData({
ydoc: docYDoc,
rootYDoc: this.status.rootDoc,
spaceId: this.status.rootDocId,
docId,
});
if (!result) {
// doc is empty without root block, just skip
const nativeResult = await this.tryNativeCrawlDocData(docId);
if (nativeResult) {
blocks = nativeResult.block;
preview = nativeResult.summary;
} else {
const docBin = await this.doc.getDoc(docId);
if (!docBin) {
// doc is deleted, just skip
continue;
}
blocks = result.blocks;
preview = result.preview;
} catch (error) {
console.error('error crawling doc', error);
const docYDoc = new YDoc({ guid: docId });
applyUpdate(docYDoc, docBin.bin);

try {
const result = await crawlingDocData({
ydoc: docYDoc,
rootYDoc: this.status.rootDoc,
spaceId: this.status.rootDocId,
docId,
});
if (!result) {
// doc is empty without root block, just skip
continue;
}
blocks = result.blocks;
preview = result.preview;
} catch (error) {
console.error('error crawling doc', error);
}
}

await this.indexer.deleteByQuery('block', {
Expand Down Expand Up @@ -484,6 +491,36 @@ export class IndexerSyncImpl implements IndexerSync {
});
}

private async tryNativeCrawlDocData(docId: string) {
try {
const result = await this.doc.crawlDocData?.(docId);
if (result) {
return {
title: result.title,
block: result.blocks.map(block =>
IndexerDocument.from<'block'>(`${docId}:${block.blockId}`, {
docId,
blockId: block.blockId,
content: block.content,
flavour: block.flavour,
blob: block.blob,
refDocId: block.refDocId,
ref: block.refInfo,
parentFlavour: block.parentFlavour,
parentBlockId: block.parentBlockId,
additional: block.additional,
})
),
summary: result.summary,
};
}
return null;
} catch (error) {
console.warn('[indexer] native crawlDocData failed', docId, error);
return null;
}
}

private async getAllDocsFromIndexer() {
const docs = await this.indexer.search(
'doc',
Expand Down
Loading
Loading