diff --git a/docs/api-reference/index.mdx b/docs/api-reference/index.mdx index 2607aa9..e9cd6ff 100644 --- a/docs/api-reference/index.mdx +++ b/docs/api-reference/index.mdx @@ -1,7 +1,7 @@ --- -title: Client SDKs +title: SDKs and REST API Reference sidebarTitle: "SDKs" -description: "SDK & REST API reference for LanceDB" +description: "SDK and REST API reference for LanceDB Enterprise and OSS." --- @@ -15,20 +15,35 @@ If you're looking for conceptual and practical namespace guidance before diving ## Supported SDKs -Python, Typescript and Rust SDKs are officially supported by LanceDB. +Python, Typescript and Rust SDKs are officially supported by LanceDB. You can use these SDKs to interact with both LanceDB OSS and Enterprise deployments. -| SDK Reference | Description | +| Reference | Description | |:--------------|-------------------| -| [Python SDK](https://lancedb.github.io/lancedb/python/python/) | Full-featured Python client with pandas & numpy integration | -| [Typescript SDK](https://lancedb.github.io/lancedb/js/) | A TypeScript wrapper around the Rust library, built with `napi-rs` -| [Rust SDK](https://docs.rs/lancedb/latest/lancedb/index.html) | Native Rust library with persistent-storage and high performance | +| [Python](https://lancedb.github.io/lancedb/python/python/) | Full-featured Python client with pandas & numpy integration | +| [Typescript](https://lancedb.github.io/lancedb/js/) | A TypeScript wrapper around the Rust library, built with `napi-rs` +| [Rust](https://docs.rs/lancedb/latest/lancedb/index.html) | Native Rust library with persistent-storage and high performance | -## Examples in other languages +## REST API SDKs -Other language SDKs are available through examples or third-party contributions. +Enterprise -| SDK Examples | Description | +REST API-based SDKs provide a convenient way to interact with LanceDB Cloud and Enterprise deployments using the Lance REST Namespace API. + +| Reference | Description | +|:--------------|-------------------| +| [Java](https://lancedb.github.io/lancedb/java/java/)| REST API Enterprise SDK in Java | + +## Community-driven SDKs + +In addition to the officially supported SDKs, the LanceDB community may contribute SDKs in other languages. +These SDKs may not have the same level of support or feature parity as the official ones supported by LanceDB, but they can be an option +for users working in languages other than those listed above. + +| Reference | Description | |:--------------|-------------------| -| [Java API Quickstart]https://lancedb.github.io/lancedb/java/java/)| Streamline REST API interactions in Java| +| [Go](https://pkg.go.dev/github.com/lancedb/lancedb-go/pkg/lancedb) | Community-contributed Go SDK for LanceDB | +| [Ruby](https://github.com/scientist-labs/lancelot) | Community-contributed Ruby bindings for LanceDB | +| [Swift](https://github.com/RyanLisse/LanceDbSwiftKit) | Community-contributed Swift SDK for LanceDB | +| [R](https://github.com/CathalByrneGit/lancedb) | Community-contributed R package for LanceDB | +| [Flutter](https://github.com/Alexcn/flutter_lancedb) | Community-contributed Flutter bindings for LanceDB | -{/* TODO: Add Go bindings reference page here */} diff --git a/docs/namespaces.mdx b/docs/namespaces.mdx index 570c45b..2a99c95 100644 --- a/docs/namespaces.mdx +++ b/docs/namespaces.mdx @@ -23,61 +23,90 @@ A namespace can contain a collection of tables, and it can also contain namespac ![](/static/assets/images/namespaces/lance-namespace.png) -## Root namespace and the familiar `data/` layout +Before diving into examples, it helps to keep two terms in mind: the **namespace client** is the abstraction that presents a consistent namespace API, while the **namespace implementation** is the concrete backend that resolves namespaces and table locations (for example, a local directory or an external catalog). +If you want to go deeper, see the Lance format [namespace documentation](https://lance.org/format/namespace/). + +## Directory namespaces The simplest namespace model in LanceDB is a single root namespace, often represented by one directory: ```bash -/data/ # root namespace -├─ users.lance # table ["users"] in root -└─ orders.lance # table ["orders"] in root +./local_lancedb (root) +└── prod + └── search + └── user (table) + └── data (table) ``` -As a user of LanceDB, you might never notice namespaces at first, because LanceDB exposes the single-level -hierarchy shown above, with the data stored in the `data/` directory, where the root namespace -is implicit. In alternative setups, you could have multiple namespaces that we won't cover here, -but you can learn more about them in the [namespace documentation](https://lance.org/format/namespace/) for the Lance format. +As a user of LanceDB OSS, you might never notice namespaces at first, because LanceDB exposes the single-level hierarchy shown above, with the data stored in the `data/` directory, where the root namespace is implicit. Connecting to this namespace is as simple as connecting to the catalog root: -## Best-practice guidance +```python Python icon="python" +import lancedb -- Use the default, single-level root namespace in LanceDB for locally stored, single-application, or early-stage projects. -- For remote storage locations, introduce explicit namespaces when multiple teams, environments, or domains share the same catalog. -- Treat namespace paths as stable identifiers (for example `"prod/search"`, `"staging/recs"`). -- Avoid hard-coding object-store table paths in application code -- instead, prefer catalog identifiers + namespaces. +# Connect to the directory namespace root +db = lancedb.connect("./local_lancedb") +``` -See the Python example below for how to use namespaces in practice. +This will create the default namespace directory (`data/`) under the specified root path: + +You can also explicitly connect to a namespace using `lancedb.connect_namespace(...)` with the directory namespace implementation: ```python Python icon="python" import lancedb # Local namespace-backed catalog root (DirectoryNamespace) -db = lancedb.connect_namespace("dir", {"root": "./namespace_test"}) - -# Business identifier + namespace path (stable app-level IDs) -namespace = ["prod", "recommendations"] -table_name = "user_profiles" - -# Create namespace hierarchy sequentially -for i in range(1, len(namespace) + 1): - db.create_namespace(namespace[:i], mode="exist_ok") - -# Good: resolve table through catalog + namespace -table = db.create_table( - table_name, - data=[{"id": 1, "vector": [0.1, 0.2], "name": "alice"}], - namespace=namespace, - mode="overwrite", -) +# See https://lance.org/format/namespace/dir/catalog-spec/ +db = lancedb.connect_namespace("dir", {"root": "./local_lancedb"}) + +table_name = "user" +data = [{"id": 1, "vector": [0.1, 0.2], "name": "alice"}] + +table = db.create_table(table_name, data=data, mode="create") +print(f"Created table: {table.name}") +# Created table: user +``` -# Bad: Avoid hard-coded physical object-store table paths -# (it's bad for maintainability reasons) -# table = lancedb.connect( -# "s3://my-lakehouse/catalog/prod/recommendations/user_profiles.lance" -# ) + +- For simple use cases in LanceDB OSS, you don't need to go too deep into namespaces. +- To integrate LanceDB with external catalogs and to use it as a true **multimodal lakehouse**, it's useful to understant the different namespace implementations and how to use them in your organization's setup. + + +## Remote or external catalog namespaces + +The example above showed local directory-baed namespaces. LanceDB also supports namespaces backed by remote object stores and external catalogs, via the REST namespace implementation. + +For remote object stores with central metadata/catalog services (either commercial or open source), use the REST namespace implementation, +This is backed by REST routes +(for example `POST /v1/namespace/{id}/create` and `GET /v1/namespace/{id}/list`) and server-provided table locations. + +For authentication, any property prefixed with `headers` is forwarded as an HTTP header +(for example `headers.Authorization` becomes `Authorization`, and `headers.X-API-Key` becomes `X-API-Key`). + +```python Python icon="python" +import os +import lancedb + +# Remote namespace-backed catalog root (RestNamespace) +# See https://lance.org/format/namespace/rest/catalog-spec/ +db = lancedb.connect_namespace( + "rest", + { + "uri": "https://.internal..com", + "headers.x-api-key": os.environ["API_KEY"], + # or: + # "headers.Authorization": f"Bearer {os.environ['REST_AUTH_TOKEN']}", + }, +) ``` +[LanceDB Enterprise](/enterprise) operates a REST namespace server on top of the Lance format, so any REST client that can speak the REST namespace API +contract can be used to interact with it. For authentication examples in LanceDB Enterprise, visit +the [Namespaces in SDKs](/tables/namespaces#namespaces-in-lancedb-enterprise) page. -## SDK usage +## Best practices -1. For language-specific examples of `namespace` usage across Python, TypeScript, and Rust, see "[Using namespaces in SDKs](/tables/namespaces)". -2. For REST-level operations, see the [REST API Reference](/api-reference/rest). +Below, we list some best practices for working with namespaces: +- For simple use cases and single, stand-alone applications, the directory-based root namespace is sufficient and requires no special configuration. +- For remote storage locations, introduce explicit namespaces when multiple teams, environments, or domains share the same catalog. +- Treat namespace paths as stable identifiers (for example `"prod/search"`, `"staging/recs"`). +- For maintainability reasons, avoid hard-coding object-store table paths in application code -- instead, prefer catalog identifiers + namespaces. \ No newline at end of file diff --git a/docs/snippets/connection.mdx b/docs/snippets/connection.mdx index a9f69d7..781b80a 100644 --- a/docs/snippets/connection.mdx +++ b/docs/snippets/connection.mdx @@ -12,9 +12,9 @@ export const PyConnectObjectStorage = "import lancedb\n\nuri = \"s3://your-bucke export const PyConnectObjectStorageAsync = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\nasync_db = await lancedb.connect_async(uri)\n"; -export const PyNamespaceAdminOps = "import lancedb\n\ndb = lancedb.connect_namespace(\"dir\", {\"root\": \"./data/sample-lancedb\"})\nnamespace = [\"prod\", \"search\"]\n\nfor i in range(1, len(namespace) + 1):\n db.create_namespace(namespace[:i], mode=\"exist_ok\")\n\nchild_namespaces = db.list_namespaces(namespace=[\"prod\"]).namespaces\nmetadata = db.describe_namespace([\"prod\", \"search\"])\n\ndb.drop_namespace([\"prod\", \"search\"], mode=\"skip\")\ndb.drop_namespace([\"prod\"], mode=\"skip\")\n"; +export const PyNamespaceAdminOps = "import lancedb\n\ndb = lancedb.connect_namespace(\"dir\", {\"root\": \"./local_lancedb\"})\nnamespace = [\"prod\", \"search\"]\n\ndb.create_namespace([\"prod\"])\ndb.create_namespace([\"prod\", \"search\"])\n\nchild_namespaces = db.list_namespaces(namespace=[\"prod\"]).namespaces\nprint(f\"Child namespaces under {namespace}: {child_namespaces}\")\n# Child namespaces under ['prod', 'search']: ['search']\n\nmetadata = db.describe_namespace([\"prod\", \"search\"])\nprint(f\"Metadata for namespace {namespace}: {metadata}\")\n# Metadata for namespace ['prod', 'search']: properties=None\n\ndb.drop_namespace([\"prod\", \"search\"], mode=\"skip\")\ndb.drop_namespace([\"prod\"], mode=\"skip\")\n"; -export const PyNamespaceTableOps = "import lancedb\n\ndb = lancedb.connect_namespace(\"dir\", {\"root\": \"./data/sample-lancedb\"})\nnamespace = [\"prod\", \"search\"]\n\nfor i in range(1, len(namespace) + 1):\n db.create_namespace(namespace[:i], mode=\"exist_ok\")\n\ndb.create_table(\n \"users\",\n data=[{\"id\": 1, \"vector\": [0.1, 0.2], \"name\": \"alice\"}],\n mode=\"overwrite\",\n namespace=namespace,\n)\n\ntable = db.open_table(\"users\", namespace=namespace)\ntables = db.list_tables(namespace=namespace).tables\n\ndb.drop_table(\"users\", namespace=namespace)\n# drop_all_tables is namespace-aware as well:\n# db.drop_all_tables(namespace=namespace)\n"; +export const PyNamespaceTableOps = "import lancedb\n\ndb = lancedb.connect_namespace(\"dir\", {\"root\": \"./local_lancedb\"})\n\n# Create namespace tree: prod/search\ndb.create_namespace([\"prod\"], mode=\"exist_ok\")\ndb.create_namespace([\"prod\", \"search\"], mode=\"exist_ok\")\ndb.create_namespace([\"prod\", \"recommendations\"], mode=\"exist_ok\")\n\ndb.create_table(\n \"user\",\n data=[{\"id\": 1, \"vector\": [0.1, 0.2], \"name\": \"alice\"}],\n namespace=[\"prod\", \"search\"],\n mode=\"create\", # use \"overwrite\" only if you want to replace existing table\n)\n\ndb.create_table(\n \"user\",\n data=[{\"id\": 2, \"vector\": [0.3, 0.4], \"name\": \"bob\"}],\n namespace=[\"prod\", \"recommendations\"],\n mode=\"create\", # use \"overwrite\" only if you want to replace existing table\n)\n\n# Verify\nprint(db.list_namespaces()) # ['prod']\nprint(db.list_namespaces(namespace=[\"prod\"])) # ['recommendations', 'search']\nprint(db.list_tables(namespace=[\"prod\", \"search\"])) # ['user']\nprint(db.list_tables(namespace=[\"prod\", \"recommendations\"])) # ['user']\n"; export const TsConnect = "import * as lancedb from \"@lancedb/lancedb\";\n\nasync function connectExample(uri: string) {\n const db = await lancedb.connect(uri);\n return db;\n}\n"; @@ -22,13 +22,13 @@ export const TsConnectCloud = "const uri = \"db://your-database-uri\";\nconst ap export const TsConnectObjectStorage = "async function connectObjectStorageExample() {\n const uri = \"s3://your-bucket/path\";\n // You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\n const db = await lancedb.connect(uri);\n return db;\n}\n"; -export const TsNamespaceTableOps = "const db = await lancedb.connect(uri);\nconst namespace = [\"prod\", \"search\"];\n\nawait db.createTable(\n \"users\",\n [{ id: 1, name: \"alice\" }],\n namespace,\n { mode: \"overwrite\" },\n);\n\nconst table = await db.openTable(\"users\", namespace);\nconst tableNames = await db.tableNames(namespace);\n\nawait db.dropTable(\"users\", namespace);\n// dropAllTables is namespace-aware as well:\n// await db.dropAllTables(namespace);\n"; - export const RsConnect = "async fn connect_example(uri: &str) {\n let db = connect(uri).execute().await.unwrap();\n let _ = db;\n}\n"; export const RsConnectCloud = "let uri = \"db://your-database-uri\";\nlet api_key = \"your-api-key\";\nlet region = \"us-east-1\";\n"; export const RsConnectObjectStorage = "let uri = \"s3://your-bucket/path\";\n// You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\n"; -export const RsNamespaceTableOps = "let conn = connect(uri).execute().await?;\nlet namespace = vec![\"prod\".to_string(), \"search\".to_string()];\n\nlet schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![\n arrow_schema::Field::new(\"id\", arrow_schema::DataType::Int64, false),\n]));\n\nconn.create_empty_table(\"users\", schema)\n .namespace(namespace.clone())\n .execute()\n .await?;\n\nlet _table = conn\n .open_table(\"users\")\n .namespace(namespace.clone())\n .execute()\n .await?;\nlet _table_names = conn\n .table_names()\n .namespace(namespace.clone())\n .execute()\n .await?;\n\nconn.drop_table(\"users\", &namespace).await?;\n// drop_all_tables is namespace-aware as well:\n// conn.drop_all_tables(&namespace).await?;\n"; +export const RsNamespaceAdminOps = "let mut properties = std::collections::HashMap::new();\nproperties.insert(\"root\".to_string(), \"./local_lancedb\".to_string());\nlet db = lancedb::connect_namespace(\"dir\", properties).execute().await?;\nlet namespace = vec![\"prod\".to_string(), \"search\".to_string()];\n\ndb.create_namespace(lancedb::database::CreateNamespaceRequest {\n namespace: vec![\"prod\".to_string()],\n})\n.await?;\ndb.create_namespace(lancedb::database::CreateNamespaceRequest {\n namespace: namespace.clone(),\n})\n.await?;\n\nlet child_namespaces = db\n .list_namespaces(lancedb::database::ListNamespacesRequest {\n namespace: vec![\"prod\".to_string()],\n ..Default::default()\n })\n .await?;\nprintln!(\n \"Child namespaces under {:?}: {:?}\",\n namespace, child_namespaces\n);\n// Child namespaces under [\"prod\", \"search\"]: [\"search\"]\n\ndb.drop_namespace(lancedb::database::DropNamespaceRequest {\n namespace: namespace.clone(),\n})\n.await?;\ndb.drop_namespace(lancedb::database::DropNamespaceRequest {\n namespace: vec![\"prod\".to_string()],\n})\n.await?;\n"; + +export const RsNamespaceTableOps = "let conn = connect(uri).execute().await?;\nlet search_namespace = vec![\"prod\".to_string(), \"search\".to_string()];\nlet recommendations_namespace = vec![\"prod\".to_string(), \"recommendations\".to_string()];\n\nlet schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![\n arrow_schema::Field::new(\"id\", arrow_schema::DataType::Int64, false),\n]));\n\nconn.create_empty_table(\"user\", schema.clone())\n .namespace(search_namespace.clone())\n .execute()\n .await?;\n\nconn.create_empty_table(\"user\", schema)\n .namespace(recommendations_namespace.clone())\n .execute()\n .await?;\n\nlet search_table_names = conn\n .table_names()\n .namespace(search_namespace)\n .execute()\n .await?;\nlet recommendation_table_names = conn\n .table_names()\n .namespace(recommendations_namespace)\n .execute()\n .await?;\n\nprintln!(\"{search_table_names:?}\"); // [\"user\"]\nprintln!(\"{recommendation_table_names:?}\"); // [\"user\"]\n"; diff --git a/docs/snippets/multimodal.mdx b/docs/snippets/multimodal.mdx index 3dc435c..206cd50 100644 --- a/docs/snippets/multimodal.mdx +++ b/docs/snippets/multimodal.mdx @@ -1,6 +1,6 @@ {/* Auto-generated by scripts/mdx_snippets_gen.py. Do not edit manually. */} -export const PyBlobApiIngest = "import lancedb\nimport lance\n\ndb = lancedb.connect(db_path_factory(\"blob_db\"))\n \n# Create sample data\ndata = [\n {\"id\": 1, \"video\": b\"fake_video_bytes_1\"},\n {\"id\": 2, \"video\": b\"fake_video_bytes_2\"}\n]\n \n# Create the table\ntbl = db.create_table(\"videos\", data=data, schema=schema)\n"; +export const PyBlobApiIngest = "import lancedb\n\ndb = lancedb.connect(db_path_factory(\"blob_db\"))\n \n# Create sample data\ndata = [\n {\"id\": 1, \"video\": b\"fake_video_bytes_1\"},\n {\"id\": 2, \"video\": b\"fake_video_bytes_2\"}\n]\n \n# Create the table\ntbl = db.create_table(\"videos\", data=data, schema=schema)\n"; export const PyBlobApiSchema = "import pyarrow as pa\n\n# Define schema with Blob API metadata for lazy loading\nschema = pa.schema([\n pa.field(\"id\", pa.int64()),\n pa.field(\n \"video\", \n pa.large_binary(), \n metadata={\"lance-encoding:blob\": \"true\"} # Enable Blob API\n ),\n])\n"; diff --git a/docs/snippets/search.mdx b/docs/snippets/search.mdx index e227b11..6612b61 100644 --- a/docs/snippets/search.mdx +++ b/docs/snippets/search.mdx @@ -1,8 +1,8 @@ {/* Auto-generated by scripts/mdx_snippets_gen.py. Do not edit manually. */} -export const PyBasicFts = "uri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n\ntable = db.create_table(\n \"my_table_fts\",\n data=[\n {\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"},\n {\"vector\": [5.9, 26.5], \"text\": \"There are several kittens playing\"},\n ],\n)\n\n# passing `use_tantivy=False` to use lance FTS index\n# `use_tantivy=True` by default\ntable.create_fts_index(\"text\", use_tantivy=False)\ntable.search(\"puppy\").limit(10).select([\"text\"]).to_list()\n# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]\n# ...\n"; +export const PyBasicFts = "uri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n\ntable = db.create_table(\n \"my_table_fts\",\n data=[\n {\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"},\n {\"vector\": [5.9, 26.5], \"text\": \"There are several kittens playing\"},\n ],\n mode=\"overwrite\",\n)\n\n# passing `use_tantivy=False` to use lance FTS index\n# `use_tantivy=True` by default\ntable.create_fts_index(\"text\", use_tantivy=False)\ntable.search(\"puppy\").limit(10).select([\"text\"]).to_list()\n# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]\n# ...\n"; -export const PyBasicFtsAsync = "uri = \"data/sample-lancedb\"\nasync_db = await lancedb.connect_async(uri)\n\nasync_tbl = await async_db.create_table(\n \"my_table_fts_async\",\n data=[\n {\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"},\n {\"vector\": [5.9, 26.5], \"text\": \"There are several kittens playing\"},\n ],\n)\n\n# async API uses our native FTS algorithm\nawait async_tbl.create_index(\"text\", config=FTS())\nawait (await async_tbl.search(\"puppy\")).select([\"text\"]).limit(10).to_list()\n# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]\n# ...\n"; +export const PyBasicFtsAsync = "uri = \"data/sample-lancedb\"\nasync_db = await lancedb.connect_async(uri)\n\nasync_tbl = await async_db.create_table(\n \"my_table_fts_async\",\n data=[\n {\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"},\n {\"vector\": [5.9, 26.5], \"text\": \"There are several kittens playing\"},\n ],\n mode=\"overwrite\",\n)\n\n# async API uses our native FTS algorithm\nawait async_tbl.create_index(\"text\", config=FTS())\nawait (await async_tbl.search(\"puppy\")).select([\"text\"]).limit(10).to_list()\n# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]\n# ...\n"; export const PyBasicHybridSearch = "data = [\n {\"text\": \"rebel spaceships striking from a hidden base\"},\n {\"text\": \"have won their first victory against the evil Galactic Empire\"},\n {\"text\": \"during the battle rebel spies managed to steal secret plans\"},\n {\"text\": \"to the Empire's ultimate weapon the Death Star\"},\n]\nuri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\ntable = db.create_table(\"documents\", schema=Documents)\n# ingest docs with auto-vectorization\ntable.add(data)\n# Create a fts index before the hybrid search\ntable.create_fts_index(\"text\")\n# hybrid search with default re-ranker\ntable.search(\"flower moon\", query_type=\"hybrid\").to_pandas()\n"; @@ -12,13 +12,13 @@ export const PyClassDefinition = "class Metadata(BaseModel):\n source: str\n export const PyClassDocuments = "class Documents(LanceModel):\n vector: Vector(embeddings.ndims()) = embeddings.VectorField()\n text: str = embeddings.SourceField()\n"; -export const PyCreateTableAsyncWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n LanceSchema(\n id=f\"id{i}\",\n vector=np.random.randn(1536),\n payload=Document(\n content=f\"document{i}\",\n meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n ),\n )\n for i in range(100)\n]\n\nasync_tbl = await async_db.create_table(\"documents_async\", data=data)\n"; +export const PyCreateTableAsyncWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n LanceSchema(\n id=f\"id{i}\",\n vector=np.random.randn(1536),\n payload=Document(\n content=f\"document{i}\",\n meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n ),\n )\n for i in range(100)\n]\n\nasync_tbl = await async_db.create_table(\n \"documents_async\", data=data, mode=\"overwrite\"\n)\n"; -export const PyCreateTableWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n LanceSchema(\n id=f\"id{i}\",\n vector=np.random.randn(1536),\n payload=Document(\n content=f\"document{i}\",\n meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n ),\n )\n for i in range(100)\n]\n\n# Synchronous client\ntbl = db.create_table(\"documents\", data=data)\n"; +export const PyCreateTableWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n LanceSchema(\n id=f\"id{i}\",\n vector=np.random.randn(1536),\n payload=Document(\n content=f\"document{i}\",\n meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n ),\n )\n for i in range(100)\n]\n\n# Synchronous client\ntbl = db.create_table(\"documents\", data=data, mode=\"overwrite\")\n"; -export const PyExhaustiveSearch = "uri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\ndata = [\n {\"vector\": row, \"item\": f\"item {i}\"}\n for i, row in enumerate(np.random.random((10_000, 1536)).astype(\"float32\"))\n]\ntbl = db.create_table(\"vector_search\", data=data)\ntbl.search(np.random.random((1536))).limit(10).to_list()\n"; +export const PyExhaustiveSearch = "uri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\ndata = [\n {\"vector\": row, \"item\": f\"item {i}\"}\n for i, row in enumerate(np.random.random((10_000, 1536)).astype(\"float32\"))\n]\ntbl = db.create_table(\"vector_search\", data=data, mode=\"overwrite\")\ntbl.search(np.random.random((1536))).limit(10).to_list()\n"; -export const PyExhaustiveSearchAsync = "uri = \"data/sample-lancedb\"\nasync_db = await lancedb.connect_async(uri)\ndata = [\n {\"vector\": row, \"item\": f\"item {i}\"}\n for i, row in enumerate(np.random.random((10_000, 1536)).astype(\"float32\"))\n]\nasync_tbl = await async_db.create_table(\"vector_search_async\", data=data)\n(await (await async_tbl.search(np.random.random((1536)))).limit(10).to_list())\n"; +export const PyExhaustiveSearchAsync = "uri = \"data/sample-lancedb\"\nasync_db = await lancedb.connect_async(uri)\ndata = [\n {\"vector\": row, \"item\": f\"item {i}\"}\n for i, row in enumerate(np.random.random((10_000, 1536)).astype(\"float32\"))\n]\nasync_tbl = await async_db.create_table(\n \"vector_search_async\", data=data, mode=\"overwrite\"\n)\n(await (await async_tbl.search(np.random.random((1536)))).limit(10).to_list())\n"; export const PyExhaustiveSearchAsyncCosine = "(\n await (await async_tbl.search(np.random.random((1536))))\n .distance_type(\"cosine\")\n .limit(10)\n .to_list()\n)\n"; diff --git a/docs/tables/index.mdx b/docs/tables/index.mdx index 818ed74..d84e463 100644 --- a/docs/tables/index.mdx +++ b/docs/tables/index.mdx @@ -66,14 +66,10 @@ explore some more table operations you'll typically need when working with Lance - **Interoperate with DuckDB** and run traditional SQL queries on an Arrow table (Python) -This page uses synchronous Python snippets for readability. If your app uses `asyncio`, +This page uses **synchronous** Python snippets. If your Python app uses `asyncio`, the same flow works with `connect_async(...)` and `await`-based table/query calls. Use the example below as a template, and see [Quickstart](/quickstart#python-sync-and-async-apis) -for the general mapping. - - -{PyBasicAsyncApi} - +for example snippets on both sync and async Python usage. ## Dataset diff --git a/docs/tables/namespaces.mdx b/docs/tables/namespaces.mdx index e2b96e4..daa941e 100644 --- a/docs/tables/namespaces.mdx +++ b/docs/tables/namespaces.mdx @@ -1,5 +1,5 @@ --- -title: "Using Namespaces in SDKs" +title: "Using Namespaces" sidebarTitle: "Namespaces" description: "Use LanceDB's namespace-aware table and catalog APIs in Python, TypeScript, and Rust." icon: "folder-tree" @@ -9,22 +9,26 @@ keywords: ["namespace", "create_table", "open_table", "list_tables", "catalog"] import { PyNamespaceTableOps, PyNamespaceAdminOps, - TsNamespaceTableOps, + RsNamespaceAdminOps, RsNamespaceTableOps, } from '/snippets/connection.mdx'; -As described in the [Namespaces and Catalog Model](/namespaces) section, namespaces are LanceDB's way of generalizing catalog specs, providing developers a clean way to manage hierarchical organization of tables in the catalog. The SDKs treat `namespace` as a path and can use it for table resolution when you use LanceDB outside the root namespace. - As your table organization needs grow over time and your projects become more complex, you can use namespaces to organize your tables in a way that reflects your business domains, teams, or environments. +As described in the [Namespaces and Catalog Model](/namespaces) section, namespaces are LanceDB's way of generalizing catalog specs, providing developers a clean way to manage hierarchical organization of tables in the catalog. The SDKs treat `namespace` as a path and can use it for table resolution when you use LanceDB outside the root namespace. + ## Table operations with namespace paths Let's imagine a scenario where your table management needs have evolved, and you now have the following multi-level structure to organize your tables outside the root namespace. ``` -(root) +./local_lancedb (root) └── prod └── search - └── users (table) + └── user (table) + └── data (table) + └── recommendations + └── user (table) + └── data (data) ``` Below, we show how you would express table operations within that namespace. Each item in the namespace @@ -36,19 +40,15 @@ specified when you create, open, or drop it. {PyNamespaceTableOps} - - {TsNamespaceTableOps} - - {RsNamespaceTableOps} -Namespaces are optional parameters in LanceDB, and most basic use cases do not require you to specify them. +Using namespaces is **optional** in LanceDB, and most basic use cases do not require to work with them. An empty namespace (`[]`), which is the default, means "root namespace", and the data will be stored in -the `data/` directory. +the `data/` directory under the specified root path. ## Namespace management APIs @@ -57,18 +57,59 @@ You can open/create/drop tables inside a namespace path (like `["prod", "search" The Python and Rust SDKs expose namespace lifecycle operations directly. In Python, use `lancedb.connect_namespace(...)` when calling namespace lifecycle methods such as `create_namespace`, `list_namespaces`, `describe_namespace`, and `drop_namespace`. +In Rust, use `lancedb::connect_namespace(...)` and call `create_namespace`, `list_namespaces`, +and `drop_namespace`. {PyNamespaceAdminOps} + + + {RsNamespaceAdminOps} + -In TypeScript, namespace lifecycle management is not on `Connection`, so namespaces usually need to be -created through another admin surface (for example REST/admin tooling) before use. +In TypeScript, namespace lifecycle and namespace-scoped table operations are not currently exposed on `Connection`. In practice, namespaces in TypeScript are managed through a namespace-aware admin surface (for example [REST](/api-reference/rest/namespace/create-a-new-namespace)/admin tooling), and the Connection APIs operate at the root namespace. +## Namespaces in LanceDB Enterprise + +In LanceDB Enterprise deployments, configure namespace-backed federated databases in a TOML file under your deployment's `config` directory. +LanceDB Enterprise supports both directory-based (`ns_impl = "dir"`) and REST-based (`ns_impl = "rest"`) namespace implementations. +The example below shows how to configure a directory-based namespace implementation in LanceDB Enterprise. +```toml +# Federated database configuration for DirectoryNamespace +# This example uses minio storage +[federated_dbs.federated_dir_test] +ns_impl = "dir" +root = "s3:///" +"storage.region" = "us-east-1" +"storage.endpoint" = "http://localhost:9000" +"storage.access_key_id" = "minioadmin" +"storage.secret_access_key" = "minioadmin" +"storage.allow_http" = "true" +# Far future expiration (year 2100) +"storage.expires_at_millis" = "4102444800000" +``` + +The example above uses MinIO, but the same approach applies to other cloud object storage platforms based on your deployment. + +For REST-based namespace servers, you can specify the namespace implementation as `"rest"` with forwarding prefixed headers +for authentication and context propagation. + +```toml +[federated_dbs.federated_rest_test] +ns_impl = "rest" +uri = "http://.internal.catalog.com" +forward_header_prefixes = ["X-forward"] +``` + +With `forward_header_prefixes = ["X-forward"]`, any incoming header starting with `X-forward` is forwarded to +`http://.internal.catalog.com`. This is useful for auth propagation, for example sending +`X-forward-authorization: Bearer xxxx`. + ## Related references - [Client SDK API references](/api-reference) diff --git a/pyproject.toml b/pyproject.toml index fb2c553..387c378 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = ">=3.13" dependencies = [ "lancedb>=0.29.2", + "lance-namespace>=0.5.2", "pandas>=2.3.3", "polars>=1.35.2", "pydantic>=2.12.4", diff --git a/tests/py/test_connection.py b/tests/py/test_connection.py index 4d7c1fe..9221145 100644 --- a/tests/py/test_connection.py +++ b/tests/py/test_connection.py @@ -58,41 +58,52 @@ def namespace_table_ops_example(): # --8<-- [start:namespace_table_ops] import lancedb - db = lancedb.connect_namespace("dir", {"root": "./data/sample-lancedb"}) - namespace = ["prod", "search"] + db = lancedb.connect_namespace("dir", {"root": "./local_lancedb"}) - for i in range(1, len(namespace) + 1): - db.create_namespace(namespace[:i], mode="exist_ok") + # Create namespace tree: prod/search + db.create_namespace(["prod"], mode="exist_ok") + db.create_namespace(["prod", "search"], mode="exist_ok") + db.create_namespace(["prod", "recommendations"], mode="exist_ok") db.create_table( - "users", + "user", data=[{"id": 1, "vector": [0.1, 0.2], "name": "alice"}], - mode="overwrite", - namespace=namespace, + namespace=["prod", "search"], + mode="create", # use "overwrite" only if you want to replace existing table ) - table = db.open_table("users", namespace=namespace) - tables = db.list_tables(namespace=namespace).tables + db.create_table( + "user", + data=[{"id": 2, "vector": [0.3, 0.4], "name": "bob"}], + namespace=["prod", "recommendations"], + mode="create", # use "overwrite" only if you want to replace existing table + ) - db.drop_table("users", namespace=namespace) - # drop_all_tables is namespace-aware as well: - # db.drop_all_tables(namespace=namespace) + # Verify + print(db.list_namespaces()) # ['prod'] + print(db.list_namespaces(namespace=["prod"])) # ['recommendations', 'search'] + print(db.list_tables(namespace=["prod", "search"])) # ['user'] + print(db.list_tables(namespace=["prod", "recommendations"])) # ['user'] # --8<-- [end:namespace_table_ops] - return table, tables def namespace_admin_ops_example(): # --8<-- [start:namespace_admin_ops] import lancedb - db = lancedb.connect_namespace("dir", {"root": "./data/sample-lancedb"}) + db = lancedb.connect_namespace("dir", {"root": "./local_lancedb"}) namespace = ["prod", "search"] - for i in range(1, len(namespace) + 1): - db.create_namespace(namespace[:i], mode="exist_ok") + db.create_namespace(["prod"]) + db.create_namespace(["prod", "search"]) child_namespaces = db.list_namespaces(namespace=["prod"]).namespaces + print(f"Child namespaces under {namespace}: {child_namespaces}") + # Child namespaces under ['prod', 'search']: ['search'] + metadata = db.describe_namespace(["prod", "search"]) + print(f"Metadata for namespace {namespace}: {metadata}") + # Metadata for namespace ['prod', 'search']: properties=None db.drop_namespace(["prod", "search"], mode="skip") db.drop_namespace(["prod"], mode="skip") diff --git a/tests/py/test_multimodal.py b/tests/py/test_multimodal.py index 69d5c97..4069a77 100644 --- a/tests/py/test_multimodal.py +++ b/tests/py/test_multimodal.py @@ -107,7 +107,6 @@ def test_blob_api_definition(db_path_factory): # --8<-- [start:blob_api_ingest] import lancedb - import lance db = lancedb.connect(db_path_factory("blob_db")) diff --git a/tests/py/test_search.py b/tests/py/test_search.py index dfa8ff0..c202d91 100644 --- a/tests/py/test_search.py +++ b/tests/py/test_search.py @@ -66,7 +66,7 @@ def test_vector_search(): {"vector": row, "item": f"item {i}"} for i, row in enumerate(np.random.random((10_000, 1536)).astype("float32")) ] - tbl = db.create_table("vector_search", data=data) + tbl = db.create_table("vector_search", data=data, mode="overwrite") tbl.search(np.random.random((1536))).limit(10).to_list() # --8<-- [end:exhaustive_search] # --8<-- [start:exhaustive_search_cosine] @@ -87,7 +87,7 @@ def test_vector_search(): ] # Synchronous client - tbl = db.create_table("documents", data=data) + tbl = db.create_table("documents", data=data, mode="overwrite") # --8<-- [end:create_table_with_nested_schema] # --8<-- [start:search_result_as_pyarrow] tbl.search(np.random.randn(1536)).to_arrow() @@ -118,7 +118,9 @@ async def test_vector_search_async(): {"vector": row, "item": f"item {i}"} for i, row in enumerate(np.random.random((10_000, 1536)).astype("float32")) ] - async_tbl = await async_db.create_table("vector_search_async", data=data) + async_tbl = await async_db.create_table( + "vector_search_async", data=data, mode="overwrite" + ) (await (await async_tbl.search(np.random.random((1536)))).limit(10).to_list()) # --8<-- [end:exhaustive_search_async] # --8<-- [start:exhaustive_search_async_cosine] @@ -143,7 +145,9 @@ async def test_vector_search_async(): for i in range(100) ] - async_tbl = await async_db.create_table("documents_async", data=data) + async_tbl = await async_db.create_table( + "documents_async", data=data, mode="overwrite" + ) # --8<-- [end:create_table_async_with_nested_schema] # --8<-- [start:search_result_async_as_pyarrow] await (await async_tbl.search(np.random.randn(1536))).to_arrow() @@ -245,6 +249,7 @@ def test_fts_native(): {"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}, {"vector": [5.9, 26.5], "text": "There are several kittens playing"}, ], + mode="overwrite", ) # passing `use_tantivy=False` to use lance FTS index @@ -294,6 +299,7 @@ async def test_fts_native_async(): {"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}, {"vector": [5.9, 26.5], "text": "There are several kittens playing"}, ], + mode="overwrite", ) # async API uses our native FTS algorithm diff --git a/tests/rs/connection.rs b/tests/rs/connection.rs index bd528a1..f9e64de 100644 --- a/tests/rs/connection.rs +++ b/tests/rs/connection.rs @@ -45,35 +45,80 @@ fn connect_object_storage_config() -> &'static str { async fn namespace_table_ops_example(uri: &str) -> lancedb::Result<()> { // --8<-- [start:namespace_table_ops] let conn = connect(uri).execute().await?; - let namespace = vec!["prod".to_string(), "search".to_string()]; + let search_namespace = vec!["prod".to_string(), "search".to_string()]; + let recommendations_namespace = vec!["prod".to_string(), "recommendations".to_string()]; let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![ arrow_schema::Field::new("id", arrow_schema::DataType::Int64, false), ])); - conn.create_empty_table("users", schema) - .namespace(namespace.clone()) + conn.create_empty_table("user", schema.clone()) + .namespace(search_namespace.clone()) .execute() .await?; - let _table = conn - .open_table("users") - .namespace(namespace.clone()) + conn.create_empty_table("user", schema) + .namespace(recommendations_namespace.clone()) .execute() .await?; - let _table_names = conn + + let search_table_names = conn .table_names() - .namespace(namespace.clone()) + .namespace(search_namespace) + .execute() + .await?; + let recommendation_table_names = conn + .table_names() + .namespace(recommendations_namespace) .execute() .await?; - conn.drop_table("users", &namespace).await?; - // drop_all_tables is namespace-aware as well: - // conn.drop_all_tables(&namespace).await?; + println!("{search_table_names:?}"); // ["user"] + println!("{recommendation_table_names:?}"); // ["user"] // --8<-- [end:namespace_table_ops] Ok(()) } +async fn namespace_admin_ops_example() -> lancedb::Result<()> { + // --8<-- [start:namespace_admin_ops] + let mut properties = std::collections::HashMap::new(); + properties.insert("root".to_string(), "./local_lancedb".to_string()); + let db = lancedb::connect_namespace("dir", properties).execute().await?; + let namespace = vec!["prod".to_string(), "search".to_string()]; + + db.create_namespace(lancedb::database::CreateNamespaceRequest { + namespace: vec!["prod".to_string()], + }) + .await?; + db.create_namespace(lancedb::database::CreateNamespaceRequest { + namespace: namespace.clone(), + }) + .await?; + + let child_namespaces = db + .list_namespaces(lancedb::database::ListNamespacesRequest { + namespace: vec!["prod".to_string()], + ..Default::default() + }) + .await?; + println!( + "Child namespaces under {:?}: {:?}", + namespace, child_namespaces + ); + // Child namespaces under ["prod", "search"]: ["search"] + + db.drop_namespace(lancedb::database::DropNamespaceRequest { + namespace: namespace.clone(), + }) + .await?; + db.drop_namespace(lancedb::database::DropNamespaceRequest { + namespace: vec!["prod".to_string()], + }) + .await?; + // --8<-- [end:namespace_admin_ops] + Ok(()) +} + #[allow(dead_code)] fn repo_root() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("..") diff --git a/tests/ts/connection.test.ts b/tests/ts/connection.test.ts index 7d383da..8055a94 100644 --- a/tests/ts/connection.test.ts +++ b/tests/ts/connection.test.ts @@ -35,26 +35,4 @@ async function connectObjectStorageExample() { } // --8<-- [end:connect_object_storage] -async function namespaceTableOpsExample(uri: string) { - // --8<-- [start:namespace_table_ops] - const db = await lancedb.connect(uri); - const namespace = ["prod", "search"]; - - await db.createTable( - "users", - [{ id: 1, name: "alice" }], - namespace, - { mode: "overwrite" }, - ); - - const table = await db.openTable("users", namespace); - const tableNames = await db.tableNames(namespace); - - await db.dropTable("users", namespace); - // dropAllTables is namespace-aware as well: - // await db.dropAllTables(namespace); - // --8<-- [end:namespace_table_ops] - return { table, tableNames }; -} - -void [uri, apiKey, region, connectObjectStorageExample, namespaceTableOpsExample]; +void [uri, apiKey, region, connectObjectStorageExample]; diff --git a/uv.lock b/uv.lock index 1ef0a13..5403faa 100644 --- a/uv.lock +++ b/uv.lock @@ -37,6 +37,7 @@ name = "docs" version = "0.1.0" source = { virtual = "." } dependencies = [ + { name = "lance-namespace" }, { name = "lancedb" }, { name = "pandas" }, { name = "pillow" }, @@ -48,6 +49,7 @@ dependencies = [ [package.metadata] requires-dist = [ + { name = "lance-namespace", specifier = ">=0.5.2" }, { name = "lancedb", specifier = ">=0.29.2" }, { name = "pandas", specifier = ">=2.3.3" }, { name = "pillow", specifier = ">=11.0.0" }, @@ -68,19 +70,19 @@ wheels = [ [[package]] name = "lance-namespace" -version = "0.4.5" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "lance-namespace-urllib3-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b4/b5/0c3c55cf336b1e90392c2e24ac833551659e8bb3c61644b2d94825eb31bd/lance_namespace-0.4.5.tar.gz", hash = "sha256:0aee0abed3a1fa762c2955c7d12bb3004cea5c82ba28f6fcb9fe79d0cc19e317", size = 9827, upload-time = "2026-01-07T19:20:23.005Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/c6/aec0d7752e15536564b50cf9a8926f0e5d7780aa3ab8ce8bca46daa55659/lance_namespace-0.5.2.tar.gz", hash = "sha256:566cc33091b5631793ab411f095d46c66391db0a62343cd6b4470265bb04d577", size = 10274, upload-time = "2026-02-20T03:14:31.777Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/88/173687dad72baf819223e3b506898e386bc88c26ff8da5e8013291e02daf/lance_namespace-0.4.5-py3-none-any.whl", hash = "sha256:cd1a4f789de03ba23a0c16f100b1464cca572a5d04e428917a54d09db912d548", size = 11703, upload-time = "2026-01-07T19:20:25.394Z" }, + { url = "https://files.pythonhosted.org/packages/d6/3d/737c008d8fb2861e7ce260e2ffab0d5058eae41556181f80f1a1c3b52ef5/lance_namespace-0.5.2-py3-none-any.whl", hash = "sha256:6ccaf5649bf6ee6aa92eed9c535a114b7b4eb08e89f40426f58bc1466cbcffa3", size = 12087, upload-time = "2026-02-20T03:14:35.261Z" }, ] [[package]] name = "lance-namespace-urllib3-client" -version = "0.4.5" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, @@ -88,9 +90,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/97/a9/4e527c2f05704565618b239b0965f829d1a194837f01234af3f8e2f33d92/lance_namespace_urllib3_client-0.4.5.tar.gz", hash = "sha256:184deda8cf8700926d994618187053c644eb1f2866a4479e7b80843cacc92b1c", size = 159726, upload-time = "2026-01-07T19:20:24.025Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/64/51622c93ec8c164483c83b68764e5e76e52286c0137a8247bc6a7fac25f4/lance_namespace_urllib3_client-0.5.2.tar.gz", hash = "sha256:8a3a238006e6eabc01fc9d385ac3de22ba933aef0ae8987558f3c3199c9b3799", size = 172578, upload-time = "2026-02-20T03:14:33.031Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ca/86/0adee7190408a28dcc5a0562c674537457e3de59ee51d1c724ecdc4a9930/lance_namespace_urllib3_client-0.4.5-py3-none-any.whl", hash = "sha256:2ee154d616ba4721f0bfdf043d33c4fef2e79d380653e2f263058ab00fb4adf4", size = 277969, upload-time = "2026-01-07T19:20:26.597Z" }, + { url = "https://files.pythonhosted.org/packages/2a/10/f86d994498b37f7f35d0b8c2f7626a16fe4cb1949b518c1e5d5052ecf95f/lance_namespace_urllib3_client-0.5.2-py3-none-any.whl", hash = "sha256:83cefb6fd6e5df0b99b5e866ee3d46300d375b75e8af32c27bc16fbf7c1a5978", size = 300351, upload-time = "2026-02-20T03:14:34.236Z" }, ] [[package]]