Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
678 changes: 678 additions & 0 deletions toolbox/mdcode/bun.lock

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions toolbox/mdcode/src/libts/gcp/biglake.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// API client for BigLake Metastore Service
//

import * as api from './api';
import * as context from './context';


export interface BigLakeTable {
name: string;
[key: string]: any;
}

interface TableList {
tables?: BigLakeTable[];
nextPageToken?: string;
}


export class BigLakeClient extends api.ApiClient {

constructor(ctx: context.ApiContext, catalogType: 'iceberg') {
const pathPrefix = catalogType === 'iceberg' ? 'iceberg/v1/restcatalog/v1' : '';
super('https://biglake.googleapis.com', pathPrefix, ctx);
}

async *listTables(project: string, location: string, catalog: string, namespace: string): AsyncGenerator<BigLakeTable> {
const name = `projects/${project}/catalogs/${catalog}/namespaces/${namespace}/tables`;

const params: Record<string, any> = {};
const res = await this._get<{ identifiers?: Array<{ namespace: string[], name: string }> }>(name, params);
if (res.status !== 200) {
throw new Error(`Failed to list BigLake Iceberg tables: ${res.message || res.status}`);
}

const tables = res.result?.identifiers || [];
for (const table of tables) {
yield {
name: `projects/${project}/locations/${location}/catalogs/${catalog}/namespaces/${namespace}/tables/${table.name}`
};
}
}
}
1 change: 1 addition & 0 deletions toolbox/mdcode/src/libts/gcp/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
export * from './context';
export * from './api';
export * from './dataplex';
export * from './biglake';
6 changes: 4 additions & 2 deletions toolbox/mdcode/src/libts/layout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import * as md from './metadata';
import { StandardLayout } from './layouts/standard';
import { DocumentsLayout } from './layouts/documents';
import { CatalogManifest } from './manifest';

export enum Layouts {
STANDARD = 'standard',
Expand All @@ -23,10 +24,11 @@ export interface CatalogLayout {


export function createLayout(layout: Layouts,
catalogPath: string): CatalogLayout {
catalogPath: string,
manifest?: CatalogManifest): CatalogLayout {
switch (layout) {
case Layouts.STANDARD:
return new StandardLayout(catalogPath);
return new StandardLayout(catalogPath, manifest);
case Layouts.DOCUMENTS:
return new DocumentsLayout(catalogPath);
default:
Expand Down
164 changes: 156 additions & 8 deletions toolbox/mdcode/src/libts/layouts/standard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@ import * as glob from 'glob';
import * as yaml from 'yaml';
import { CatalogLayout } from '../layout';
import * as md from '../metadata';
import { CatalogManifest } from '../manifest';


export class StandardLayout implements CatalogLayout {

private readonly _catalogPath: string;
private readonly _manifest?: CatalogManifest;

private readonly _index = new Map<string, string>();

constructor(catalogPath: string) {
constructor(catalogPath: string, manifest?: CatalogManifest) {
this._catalogPath = catalogPath;
this._manifest = manifest;
}

async init(): Promise<void> {
Expand All @@ -26,11 +29,7 @@ export class StandardLayout implements CatalogLayout {
return;
}

const matches = await glob.glob('**/*.yaml', {
cwd: this._catalogPath,
absolute: true,
nodir: true,
});
const matches = await walkDir(this._catalogPath, '.yaml');

for (const localPath of matches) {
try {
Expand Down Expand Up @@ -62,14 +61,87 @@ export class StandardLayout implements CatalogLayout {
}

const content = await fs.promises.readFile(entryPath, 'utf8');
return yaml.parse(content) as md.Entry;
const entry = yaml.parse(content) as md.Entry;

// Load and merge any markdown sidecar files
const dir = path.dirname(entryPath);
const baseName = path.basename(entryPath, '.yaml');
try {
const files = await fs.promises.readdir(dir);
const sidecarFiles = files.filter(f => f.startsWith(`${baseName}.`) && f.endsWith('.md'));

for (const sidecarFile of sidecarFiles) {
const aspectSuffix = sidecarFile.substring(baseName.length + 1, sidecarFile.length - 3);
let matchedKey = aspectSuffix;

if (this._manifest) {
const snapshotAspects = this._manifest.snapshotConfig?.aspects || [];
const publishingAspects = this._manifest.publishingConfig?.aspects || [];
const allAspects = Array.from(new Set([...snapshotAspects, ...publishingAspects]));

for (const key of allAspects) {
if (key === aspectSuffix || key.endsWith(`.${aspectSuffix}`)) {
matchedKey = key;
break;
}
}
}

const sidecarPath = path.join(dir, sidecarFile);
const sidecarContent = await fs.promises.readFile(sidecarPath, 'utf8');
const parsed = parseSidecarMarkdown(sidecarContent);

if (!entry.aspects) {
entry.aspects = {};
}
if (!entry.aspects[matchedKey]) {
entry.aspects[matchedKey] = {};
}

Object.assign(entry.aspects[matchedKey], parsed.data, {
content: parsed.body.trim()
});

if (!entry.aspects[matchedKey].contentType && matchedKey.endsWith('.overview')) {
entry.aspects[matchedKey].contentType = 'MARKDOWN';
}
}
}
catch (err) {
// Ignore reading directory errors
}

return entry;
}

async saveEntry(name: string, entry: md.Entry): Promise<void> {
const entryPath = path.join(this._catalogPath, `${name}.yaml`);
await fs.promises.mkdir(path.dirname(entryPath), { recursive: true });

await fs.promises.writeFile(entryPath, yaml.stringify(entry), 'utf8');
// Clone the entry to avoid modifying the original entry aspects
const entryClone = JSON.parse(JSON.stringify(entry)) as md.Entry;

if (entryClone.aspects) {
const dir = path.dirname(entryPath);
const baseName = path.basename(entryPath, '.yaml');

for (const key in entryClone.aspects) {
const aspectData = entryClone.aspects[key];
if (isMarkdownAspect(key, aspectData)) {
const sidecarPath = path.join(dir, `${baseName}.${key}.md`);
const sidecarContent = toSidecarMarkdown(aspectData);
await fs.promises.writeFile(sidecarPath, sidecarContent, 'utf8');

delete entryClone.aspects[key];
}
}

if (Object.keys(entryClone.aspects).length === 0) {
delete entryClone.aspects;
}
}

await fs.promises.writeFile(entryPath, yaml.stringify(entryClone), 'utf8');
this._index.set(name, entryPath);
}

Expand All @@ -79,7 +151,83 @@ export class StandardLayout implements CatalogLayout {
throw new Error(`Entry not found: ${name}`);
}

// Delete the entry YAML file
await fs.promises.unlink(entryPath);
this._index.delete(name);

// Delete any associated markdown sidecar files
const dir = path.dirname(entryPath);
const baseName = path.basename(entryPath, '.yaml');
try {
const files = await fs.promises.readdir(dir);
const sidecars = files.filter(f => f.startsWith(`${baseName}.`) && f.endsWith('.md'));
for (const sidecar of sidecars) {
await fs.promises.unlink(path.join(dir, sidecar));
}
}
catch (err) {
// Ignore reading directory errors
}
}
}

function parseSidecarMarkdown(content: string): { data: Record<string, any>; body: string } {
const lines = content.split(/\r?\n/);
if (lines[0] !== '---') {
return { data: {}, body: content };
}
const endIndex = lines.indexOf('---', 1);
if (endIndex === -1) {
return { data: {}, body: content };
}

const frontmatter = lines.slice(1, endIndex).join('\n');
const data = yaml.parse(frontmatter) || {};
const body = lines.slice(endIndex + 1).join('\n');

return { data, body };
}

function toSidecarMarkdown(aspectData: Record<string, any>): string {
const cloned = JSON.parse(JSON.stringify(aspectData));
const body = cloned.content || '';
delete cloned.content;
delete cloned.contentType;

if (Object.keys(cloned).length === 0) {
return body;
}

const frontmatter = yaml.stringify(cloned).trim();
return `---\n${frontmatter}\n---\n${body}`;
}


function isMarkdownAspect(key: string, data: any): boolean {
if (key === 'dataplex-types.global.overview' || key.endsWith('.overview')) {
return true;
}
if (data && typeof data === 'object' && data.contentType === 'MARKDOWN') {
return true;
}
return false;
}


async function walkDir(dir: string, ext: string): Promise<string[]> {
const files: string[] = [];
try {
const entries = await fs.promises.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
files.push(...(await walkDir(fullPath, ext)));
} else if (entry.isFile() && entry.name.endsWith(ext)) {
files.push(fullPath);
}
}
} catch (err) {
// Ignore errors reading directories
}
return files;
}
9 changes: 9 additions & 0 deletions toolbox/mdcode/src/libts/manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ export class CatalogManifest {
return new CatalogManifest(source);
}

static async initWithBigLakeNamespace(name: string, catalogType: 'iceberg', ctx: gcp.ApiContext): Promise<CatalogManifest> {
const source = await createSource(
catalogType === 'iceberg' ? Sources.BIGLAKE_ICEBERG_NAMESPACE : Sources.BIGLAKE_NAMESPACE,
name,
ctx
);
return new CatalogManifest(source);
}

static async load(path: string, ctx: gcp.ApiContext): Promise<CatalogManifest> {
const content = fs.readFileSync(path, 'utf8');
const parsed = yaml.parse(content);
Expand Down
29 changes: 28 additions & 1 deletion toolbox/mdcode/src/libts/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export class CatalogSnapshot {
this.manifest = manifest;

const catalogPath = path.join(this.basePath, 'catalog');
this._layout = createLayout(manifest.source.layout, catalogPath);
this._layout = createLayout(manifest.source.layout, catalogPath, manifest);
}

static async fromPath(basePath: string, ctx: gcp.ApiContext): Promise<CatalogSnapshot> {
Expand Down Expand Up @@ -141,6 +141,16 @@ export class CatalogSnapshot {
const parts = entryType.split('.');
const res = await catalog.getEntryType(parts[0], parts[1], parts[2]);
if (!res.result) {
if (res.status === 403) {
console.warn(`Warning: Permission denied loading type information for entry type ${entryType}. Proceeding...`);
const placeholderType: dataplex.EntryType = {
name: `projects/${parts[0]}/locations/${parts[1]}/entryTypes/${parts[2]}`,
requiredAspects: []
};
this._entryTypes.set(placeholderType.name, placeholderType);
this._entryTypes.set(entryType, placeholderType);
continue;
}
throw new Error(`Unable to load type information for entry type ${entryType}`);
}

Expand All @@ -152,6 +162,15 @@ export class CatalogSnapshot {
const parts = requiredAspect.type.split('/');
const res = await catalog.getAspectType(parts[1], parts[3], parts[5]);
if (!res.result) {
if (res.status === 403) {
console.warn(`Warning: Permission denied loading type information for required aspect type ${requiredAspect.type}. Proceeding...`);
const placeholderAspect: dataplex.AspectType = {
name: requiredAspect.type
};
this._aspectTypes.set(placeholderAspect.name, placeholderAspect);
this._aspectTypes.set(`${parts[0]}.${parts[3]}.${parts[5]}`, placeholderAspect);
continue;
}
throw new Error(`Unable to load type information for aspect type ${requiredAspect.type}`);
}
this._aspectTypes.set(res.result.name, res.result);
Expand All @@ -168,6 +187,14 @@ export class CatalogSnapshot {
const parts = aspectType.split('.');
const res = await catalog.getAspectType(parts[0], parts[1], parts[2]);
if (!res.result) {
if (res.status === 403) {
const placeholderAspect: dataplex.AspectType = {
name: `projects/${parts[0]}/locations/${parts[1]}/aspectTypes/${parts[2]}`
};
this._aspectTypes.set(placeholderAspect.name, placeholderAspect);
this._aspectTypes.set(aspectType, placeholderAspect);
continue;
}
throw new Error(`Unable to load type information for aspect type ${aspectType}`);
}
this._aspectTypes.set(res.result.name, res.result);
Expand Down
Loading