diff --git a/integrations/llms/anthropic.mdx b/integrations/llms/anthropic.mdx index c53aa9d5..f81c74d2 100644 --- a/integrations/llms/anthropic.mdx +++ b/integrations/llms/anthropic.mdx @@ -63,7 +63,7 @@ const response = await portkey.chat.completions.create({ console.log(response.choices[0].message.content) ``` -```python OpenAI Py icon="openai" +```python OpenAI Py icon="python" from openai import OpenAI from portkey_ai import PORTKEY_GATEWAY_URL @@ -85,7 +85,7 @@ response = client.chat.completions.create( print(response.choices[0].message.content) ``` -```js OpenAI JS icon="openai" +```js OpenAI JS icon="square-js" import OpenAI from "openai" import { PORTKEY_GATEWAY_URL } from "portkey-ai" @@ -380,7 +380,7 @@ portkey = Portkey(api_key="PORTKEY_API_KEY") # Fetch and encode the PDF pdf_url = "https://assets.anthropic.com/m/1cd9d098ac3e6467/original/Claude-3-Model-Card-October-Addendum.pdf" -pdf_data = base64.standard_b64encode(httpx.get(pdf_url).content).decode("utf-8") +pdf_data = "data:application/pdf;base64," + base64.standard_b64encode(httpx.get(pdf_url).content).decode("utf-8") response = portkey.chat.completions.create( model="@anthropic/claude-sonnet-4-5-20250929", @@ -412,6 +412,7 @@ const portkey = new Portkey({ const pdfUrl = "https://assets.anthropic.com/m/1cd9d098ac3e6467/original/Claude-3-Model-Card-October-Addendum.pdf" const pdfResponse = await axios.get(pdfUrl, { responseType: 'arraybuffer' }) const pdfBase64 = Buffer.from(pdfResponse.data).toString('base64') +const pdfData = `data:application/pdf;base64,${pdfBase64}` const response = await portkey.chat.completions.create({ model: "@anthropic/claude-sonnet-4-5-20250929", @@ -422,7 +423,7 @@ const response = await portkey.chat.completions.create({ role: "user", content: [ { type: "text", text: "What are the key findings in this document?" }, - { type: "file", file: { mime_type: "application/pdf", file_data: pdfBase64 } } + { type: "file", file: { mime_type: "application/pdf", file_data: pdfData } } ] } ] diff --git a/integrations/llms/gemini.mdx b/integrations/llms/gemini.mdx index 34f6229f..7b32efd4 100644 --- a/integrations/llms/gemini.mdx +++ b/integrations/llms/gemini.mdx @@ -4,95 +4,136 @@ title: "Google Gemini" Portkey provides a robust and secure gateway to facilitate the integration of various Large Language Models (LLMs) into your applications, including [Google Gemini APIs](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini). -With Portkey, you can take advantage of features like fast AI gateway access, observability, prompt management, and more, all while ensuring the secure management of your LLM API keys through a [virtual key](/product/ai-gateway/virtual-keys) system. -Provider Slug. `google` +With Portkey, you can take advantage of features like fast AI gateway access, observability, prompt management, and more, all while ensuring the secure management of your LLM API keys through [Model Catalog](/product/model-catalog). +## Quick Start -## Portkey SDK Integration with Google Gemini Models - -Portkey provides a consistent API to interact with models from various providers. To integrate Google Gemini with Portkey: + -### 1\. Install the Portkey SDK +```python Python icon="python" +from portkey_ai import Portkey -Add the Portkey SDK to your application to interact with Google Gemini's API through Portkey's gateway. - - +# 1. Install: pip install portkey-ai +# 2. Add @google provider in Model Catalog +# 3. Use it: -```sh -npm install --save portkey-ai -``` - - -```sh -pip install portkey-ai -``` - - +portkey = Portkey(api_key="PORTKEY_API_KEY") +response = portkey.chat.completions.create( + model="@google/gemini-1.5-pro", + messages=[{"role": "user", "content": "Say this is a test"}] +) -### 2\. Initialize Portkey with the Virtual Key +print(response.choices[0].message.content) +``` -To use Gemini with Portkey, [get your API key from here](https://aistudio.google.com/app/apikey), then add it to Portkey to create the virtual key. - - -```js +```js Javascript icon="square-js" import Portkey from 'portkey-ai' +// 1. Install: npm install portkey-ai +// 2. Add @google provider in Model Catalog +// 3. Use it: + const portkey = new Portkey({ - apiKey: "PORTKEY_API_KEY", // defaults to process.env["PORTKEY_API_KEY"] - provider:"@PROVIDER" // Your Google Virtual Key + apiKey: "PORTKEY_API_KEY" }) + +const response = await portkey.chat.completions.create({ + model: "@google/gemini-1.5-pro", + messages: [{ role: "user", content: "Say this is a test" }] +}) + +console.log(response.choices[0].message.content) ``` - - - ```python - from portkey_ai import Portkey - portkey = Portkey( - api_key="PORTKEY_API_KEY", # Replace with your Portkey API key - provider="@PROVIDER" # Replace with your virtual key for Google - ) - ``` - +```python OpenAI Py icon="python" +from openai import OpenAI +from portkey_ai import PORTKEY_GATEWAY_URL - +# 1. Install: pip install openai portkey-ai +# 2. Add @google provider in Model Catalog +# 3. Use it: +client = OpenAI( + api_key="PORTKEY_API_KEY", # Portkey API key + base_url=PORTKEY_GATEWAY_URL +) +response = client.chat.completions.create( + model="@google/gemini-1.5-pro", + messages=[{"role": "user", "content": "Say this is a test"}] +) -### **3\. Invoke Chat Completions with** Google Gemini +print(response.choices[0].message.content) +``` -Use the Portkey instance to send requests to Google Gemini. You can also override the virtual key directly in the API call if needed. - - - ```js - const chatCompletion = await portkey.chat.completions.create({ - messages: [ - { role: 'system', content: 'You are not a helpful assistant' }, - { role: 'user', content: 'Say this is a test' } - ], - model: 'gemini-1.5-pro', - }); +```js OpenAI JS icon="square-js" +import OpenAI from "openai" +import { PORTKEY_GATEWAY_URL } from "portkey-ai" - console.log(chatCompletion.choices); - ``` - - +// 1. Install: npm install openai portkey-ai +// 2. Add @google provider in Model Catalog +// 3. Use it: -```python -completion = portkey.chat.completions.create( - messages= [ - { "role": 'system', "content": 'You are not a helpful assistant' }, - { "role": 'user', "content": 'Say this is a test' } - ], - model= 'gemini-1.5-pro' -) +const client = new OpenAI({ + apiKey: "PORTKEY_API_KEY", // Portkey API key + baseURL: PORTKEY_GATEWAY_URL +}) + +const response = await client.chat.completions.create({ + model: "@google/gemini-1.5-pro", + messages: [{ role: "user", content: "Say this is a test" }] +}) + +console.log(response.choices[0].message.content) +``` -print(completion) +```sh cURL icon="square-terminal" +# 1. Add @google provider in Model Catalog +# 2. Use it: + +curl https://api.portkey.ai/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "x-portkey-api-key: $PORTKEY_API_KEY" \ + -d '{ + "model": "@google/gemini-1.5-pro", + "messages": [{"role": "user", "content": "Say this is a test"}] + }' ``` - - + + +--- + +## Add Provider in Model Catalog + + + + + +Go to [**Model Catalog → Add Provider**](https://app.portkey.ai/model-catalog/providers) in your Portkey dashboard. + + + + + +Find and select **Google** from the provider list. + + + + + +Get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey) and enter it in Model Catalog. + + + + +Save your configuration. Your provider slug will be `@google` (or a custom name you specify). + + + + Portkey supports the `system_instructions` parameter for Google Gemini 1.5 - allowing you to control the behavior and output of your Gemini-powered applications with ease. @@ -100,14 +141,17 @@ Portkey supports the `system_instructions` parameter for Google Gemini 1.5 - all Simply include your Gemini system prompt as part of the `{"role":"system"}` message within the `messages` array of your request body. Portkey Gateway will automatically transform your message to ensure seamless compatibility with the Google Gemini API. -## Function Calling +--- -Portkey supports function calling mode on Google's Gemini Models. Explore this Cookbook for a deep dive and examples: +## Gemini Capabilities -[Function Calling](/guides/getting-started/function-calling) +### Function Calling +Portkey supports function calling mode on Google's Gemini Models. Explore this cookbook for a deep dive and examples: +[Function Calling](/guides/getting-started/function-calling) +--- ## Advanced Multimodal Capabilities with Gemini @@ -1961,13 +2005,13 @@ Note that you will have to set [`strict_open_ai_compliance=False`](/product/ai-g // Initialize the Portkey client const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY", // Replace with your Portkey API key - provider:"@PROVIDER", // your vertex-ai virtual key + provider:"@PROVIDER", // your Vertex AI provider slug strictOpenAiCompliance: false }); // Generate a chat completion async function getChatCompletionFunctions() { - const response = await portkey.chat.completions.create({ +const response = await portkey.chat.completions.create({ model: "gemini-2.5-flash-preview-04-17", max_tokens: 3000, thinking: { @@ -2133,46 +2177,14 @@ Note that you will have to set [`strict_open_ai_compliance=False`](/product/ai-g ``` -### Using reasoning_effort Parameter - -You can also control thinking using the OpenAI-compatible `reasoning_effort` parameter instead of `thinking.budget_tokens`: - -```python -response = portkey.chat.completions.create( - model="gemini-2.5-flash-preview-04-17", - max_tokens=3000, - reasoning_effort="medium", # Options: "none", "low", "medium", "high" - messages=[{"role": "user", "content": "Explain quantum computing"}] -) -``` - -#### Gemini 2.5 Models - -For Gemini 2.5 models, `reasoning_effort` maps to `thinking_budget` with specific token allocations: - -| reasoning_effort | thinking_budget (tokens) | -|------------------|--------------------------| -| `none` | Disabled | -| `low` | 1,024 | -| `medium` | 8,192 | -| `high` | 24,576 | - -#### Gemini 3.0+ Models - -For Gemini 3.0 and later models, `reasoning_effort` maps directly to `thinkingLevel`: - -| reasoning_effort | Gemini thinkingLevel | -|------------------|---------------------| -| `none` | Disabled | -| `minimal` | `minimal` | -| `low` | `low` | -| `medium` | `medium` | -| `high` | `high` | - Gemini grounding mode may not work via Portkey SDK. Contact support@portkey.ai for assistance. +--- + + + ## Image Generation (nano banana 🍌) Gemini models like `gemini-3-pro-image-preview` support native image generation capabilities. You can generate images by setting `modalities` to include `"image"` in your request. @@ -2200,10 +2212,6 @@ response = portkey.chat.completions.create( max_tokens=32768, stream=False, modalities=["image"], - image_config={ # optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages=[ { "role": "system", @@ -2243,10 +2251,6 @@ async function generateImage() { max_tokens: 32768, stream: false, modalities: ["image"], - image_config: { // optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages: [ { role: "system", @@ -2292,10 +2296,6 @@ async function generateImage() { max_tokens: 32768, stream: false, modalities: ["image"], - image_config: { // optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages: [ { role: "system", @@ -2337,10 +2337,6 @@ response = openai.chat.completions.create( max_tokens=32768, stream=False, modalities=["image"], - image_config={ # optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages=[ { "role": "system", @@ -2372,10 +2368,6 @@ curl "https://api.portkey.ai/v1/chat/completions" \ "max_tokens": 32768, "stream": false, "modalities": ["image"], - "image_config": { - "aspect_ratio": "16:9", - "image_size": "2K" - }, "messages": [ { "role": "system", @@ -2406,10 +2398,6 @@ response = portkey.chat.completions.create( max_tokens=32768, stream=False, modalities=["text", "image"], # Include both text and image - image_config={ # optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages=[ { "role": "user", @@ -2430,10 +2418,6 @@ const response = await portkey.chat.completions.create({ max_tokens: 32768, stream: false, modalities: ["text", "image"], // Include both text and image - image_config: { // optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages: [ { role: "user", @@ -2460,10 +2444,6 @@ curl "https://api.portkey.ai/v1/chat/completions" \ "max_tokens": 32768, "stream": false, "modalities": ["text", "image"], - "image_config": { - "aspect_ratio": "16:9", - "image_size": "2K" - }, "messages": [ { "role": "user", @@ -2490,10 +2470,6 @@ initial_response = portkey.chat.completions.create( model="gemini-3-pro-image-preview", max_tokens=32768, modalities=["text", "image"], - image_config={ # optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages=[ { "role": "user", @@ -2515,10 +2491,6 @@ edit_response = portkey.chat.completions.create( model="gemini-3-pro-image-preview", max_tokens=32768, modalities=["text", "image"], - image_config={ # optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages=[ { "role": "user", @@ -2549,10 +2521,6 @@ const initialResponse = await portkey.chat.completions.create({ model: "gemini-3-pro-image-preview", max_tokens: 32768, modalities: ["text", "image"], - image_config: { // optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages: [ { role: "user", @@ -2574,10 +2542,6 @@ const editResponse = await portkey.chat.completions.create({ model: "gemini-3-pro-image-preview", max_tokens: 32768, modalities: ["text", "image"], - image_config: { // optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages: [ { role: "user", @@ -2606,13 +2570,26 @@ console.log(editResponse); ## Next Steps -The complete list of features supported in the SDK are available on the link below. - + + + +Complete SDK documentation and API reference + + + +Add metadata to your Gemini requests -You'll find more information in the relevant sections: + +Configure advanced gateway features + + + +Trace and monitor your Gemini requests + + + +Create fallback configurations between providers + -1. [Add metadata to your requests](/product/observability/metadata) -2. [Add gateway configs to your Gemini requests](/product/ai-gateway/configs) -3. [Tracing Google Gemini requests](/product/observability/traces) -4. [Setup a fallback from OpenAI to Gemini APIs](/product/ai-gateway/fallbacks) + diff --git a/integrations/llms/openai.mdx b/integrations/llms/openai.mdx index 7424d1ce..36036f3a 100644 --- a/integrations/llms/openai.mdx +++ b/integrations/llms/openai.mdx @@ -1,460 +1,307 @@ --- title: "OpenAI" -description: "Learn to integrate OpenAI with Portkey, enabling seamless completions, prompt management, and advanced functionalities like streaming, function calling and fine-tuning." +description: "Integrate OpenAI's GPT models with Portkey's AI Gateway" --- -Portkey has native integrations with OpenAI SDKs for Node.js, Python, and its REST APIs. For OpenAI integration using other frameworks, explore our partnerships, including [Langchain](/integrations/libraries/langchain-python), [LlamaIndex](/integrations/libraries/llama-index-python), among [others](/integrations/llms). - -Provider Slug. `openai` - -## Using the Portkey Gateway +Portkey provides a robust and secure gateway to integrate [OpenAI's APIs](https://platform.openai.com/docs/api-reference/introduction) into your applications, including GPT-4o, o1, DALL·E, Whisper, and more. -To integrate the Portkey gateway with OpenAI, +With Portkey, take advantage of features like fast AI gateway access, observability, prompt management, and more, while securely managing API keys through [Model Catalog](/product/model-catalog). -* Set the `baseURL` to the Portkey Gateway URL -* Include Portkey-specific headers such as `provider`, `apiKey`, 'virtualKey' and others. + + + Full support for GPT-4o, o1, GPT-4, GPT-3.5, and all OpenAI models + + + Chat, completions, embeddings, audio, images, and more fully supported + + + Use with OpenAI SDK, Portkey SDK, or popular frameworks like LangChain + + -Here's how to apply it to a **chat completion** request: +## Quick Start +Get OpenAI working in 3 steps: - - -Install the Portkey SDK with npm -```sh -npm install portkey-ai -``` -```ts Chat Completions -import Portkey from 'portkey-ai'; +```python Python icon="python" +from portkey_ai import Portkey -const client = new Portkey({ - apiKey: 'PORTKEY_API_KEY', - provider:'@PROVIDER' -}); +# 1. Install: pip install portkey-ai +# 2. Add @openai provider in model catalog +# 3. Use it: -async function main() { - const response = await client.chat.completions.create({ - messages: [{ role: "user", content: "Bob the builder.." }], - model: "gpt-4o", - }); +portkey = Portkey(api_key="PORTKEY_API_KEY") - console.log(response.choices[0].message.content); -} +response = portkey.chat.completions.create( + model="@openai/gpt-4o", + messages=[{"role": "user", "content": "Say this is a test"}] +) -main(); +print(response.choices[0].message.content) ``` - - - -Install the Portkey SDK with pip -```sh -pip install portkey-ai -``` - -```py Chat Completions -from portkey_ai import Portkey +```js Javascript icon="square-js" +import Portkey from 'portkey-ai' -client = Portkey( - api_key = "PORTKEY_API_KEY", - virtual_key = "PROVIDER" -) +// 1. Install: npm install portkey-ai +// 2. Add @openai provider in model catalog +// 3. Use it: -response = client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Hello!"} - ] -) +const portkey = new Portkey({ + apiKey: "PORTKEY_API_KEY" +}) -print(response.choices[0].message) -``` +const response = await portkey.chat.completions.create({ + model: "@openai/gpt-4o", + messages: [{ role: "user", content: "Say this is a test" }] +}) - - - - -```sh Chat Completions -curl https://api.portkey.ai/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "x-portkey-api-key: $PORTKEY_API_KEY" \ - -H "x-portkey-provider: $PORTKEY_PROVIDER" \ - -d '{ - "model": "gpt-4o", - "messages": [ - { "role": "user", "content": "Hello!" } - ] - }' +console.log(response.choices[0].message.content) ``` - - - -Install the OpenAI & Portkey SDKs with pip -```sh -pip install openai portkey-ai -``` - -```py Chat Completions +```python OpenAI Py icon="python" from openai import OpenAI -from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL +from portkey_ai import PORTKEY_GATEWAY_URL + +# 1. Install: pip install openai portkey-ai +# 2. Add @openai provider in model catalog +# 3. Use it: client = OpenAI( - api_key="xx", - base_url=PORTKEY_GATEWAY_URL, - default_headers=createHeaders( - api_key="PORTKEY_API_KEY", - provider="@OPENAI_PROVIDER" - ) + api_key="PORTKEY_API_KEY", # Portkey API key + base_url=PORTKEY_GATEWAY_URL ) -completion = client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Hello!"} - ] +response = client.chat.completions.create( + model="@openai/gpt-4o", + messages=[{"role": "user", "content": "Say this is a test"}] ) -print(completion.choices[0].message) +print(response.choices[0].message.content) ``` - - - -Install the OpenAI & Portkey SDKs with npm -```sh -npm install openai portkey-ai -``` - -```ts Chat Completions -import OpenAI from 'openai'; -import { PORTKEY_GATEWAY_URL, createHeaders } from 'portkey-ai' -const openai = new OpenAI({ - apiKey: 'xx', - baseURL: PORTKEY_GATEWAY_URL, - defaultHeaders: createHeaders({ - apiKey: "PORTKEY_API_KEY", - provider:"@OPENAI_PROVIDER" - }) -}); +```js OpenAI JS icon="square-js" +import OpenAI from "openai" +import { PORTKEY_GATEWAY_URL } from "portkey-ai" -async function main() { - const completion = await openai.chat.completions.create({ - messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'gpt-4o', - }); +// 1. Install: npm install openai portkey-ai +// 2. Add @openai provider in model catalog +// 3. Use it: - console.log(chatCompletion.choices); -} +const client = new OpenAI({ + apiKey: "PORTKEY_API_KEY", // Portkey API key + baseURL: PORTKEY_GATEWAY_URL +}) -main(); -``` - - +const response = await client.chat.completions.create({ + model: "@openai/gpt-4o", + messages: [{ role: "user", content: "Say this is a test" }] +}) - - -```java Chat Completions -package com.example; - -import com.openai.client.OpenAIClient; -import com.openai.client.okhttp.OpenAIOkHttpClient; -import com.openai.models.chat.completions.ChatCompletion; -import com.openai.models.chat.completions.ChatCompletionCreateParams; - -public class App { - - private static final String PORTKEY_API_KEY = "PK_API_KEY"; - private static final String PROVIDER = "< @OPENAI_PROVIDER/gpt-4o >"; - private static final String PORTKEY_BASE_URL = "https://api.portkey.ai/v1"; - - public static void main(String[] args) { - - // Create client with proxy, Portkey API, and custom headers - OpenAIClient client = OpenAIOkHttpClient.builder() - .baseUrl(PORTKEY_BASE_URL) - .apiKey("API_KEY") - .putHeader("x-portkey-api-key", PORTKEY_API_KEY) - .build(); - - ChatCompletionCreateParams params = ChatCompletionCreateParams.builder() - .addUserMessage("Say this is a test") - .model(PROVIDER) - .maxCompletionTokens(10) - .build(); - - ChatCompletion chatCompletion = client.chat().completions().create(params); - System.out.println(chatCompletion); - } -} +console.log(response.choices[0].message.content) ``` - - -See the [OpenAI Examples](https://github.com/openai/openai-java/tree/main/openai-java-example/src/main/java/com/openai/example) directory for complete and runnable examples. - - - +```sh cURL icon="square-terminal" +# 1. Add @openai provider in model catalog +# 2. Use it: -This request will be automatically logged by Portkey. You can view this in your logs dashboard. Portkey logs the tokens utilized, execution time, and cost for each request. Additionally, you can delve into the details to review the precise request and response data. +curl https://api.portkey.ai/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "x-portkey-api-key: $PORTKEY_API_KEY" \ + -H "x-portkey-provider: @openai" \ + -d '{ + "model": "gpt-4o", + "messages": [ + { "role": "user", "content": "Say this is a test" } + ] + }' +``` + -Portkey supports [OpenAI's new "developer" role](https://platform.openai.com/docs/api-reference/chat/create#chat-create-messages) in chat completions. With o1 models and newer, the `developer` role replaces the previous `system` role. +**Tip:** You can also set `provider="@openai"` in `Portkey()` and use just `model="gpt-4o"` in the request. + +**Legacy support:** The `virtual_key` parameter still works for backwards compatibility. +## Add Provider in Model Catalog -### Using the Responses API +1. Go to [**Model Catalog → Add Provider**](https://app.portkey.ai/model-catalog/providers) +2. Select **OpenAI** +3. Choose existing credentials or create new by entering your [OpenAI API key](https://platform.openai.com/api-keys) +4. (Optional) Add your OpenAI **Organization ID** and **Project ID** for better cost tracking +5. Name your provider (e.g., `openai-prod`) -OpenAI has released a new Responses API that combines the best of both Chat Completions and Assistants APIs. Portkey fully supports this new API, enabling you to use it with both the Portkey SDK and OpenAI SDK. + + See all setup options, code examples, and detailed instructions + - - - ```python - from portkey_ai import Portkey +## Basic Usage - portkey = Portkey( - api_key="PORTKEY_API_KEY", - provider="@OPENAI_PROVIDER" - ) +### Streaming - response = portkey.responses.create( - model="gpt-4.1", - input="Tell me a three sentence bedtime story about a unicorn." - ) +Stream responses for real-time output in your applications: - print(response) - ``` - - - ```js - import Portkey from 'portkey-ai'; + +```python Python icon="python" +response = portkey.chat.completions.create( + model="@openai/gpt-4o", + messages=[{"role": "user", "content": "Tell me a story"}], + stream=True +) - const portkey = new Portkey({ - apiKey: "PORTKEY_API_KEY", - provider:"@OPENAI_PROVIDER" - }); +for chunk in response: + if chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="", flush=True) +``` - async function main() { - const response = await portkey.responses.create({ - model: "gpt-4.1", - input: "Tell me a three sentence bedtime story about a unicorn." - }); +```js Javascript icon="square-js" +const response = await portkey.chat.completions.create({ + model: "@openai/gpt-4o", + messages: [{ role: "user", content: "Tell me a story" }], + stream: true +}) - console.log(response); +for await (const chunk of response) { + if (chunk.choices[0]?.delta?.content) { + process.stdout.write(chunk.choices[0].delta.content) } +} +``` + - main(); - ``` - - - ```python - from openai import OpenAI - from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders - - client = OpenAI( - api_key="OPENAI_API_KEY", - base_url=PORTKEY_GATEWAY_URL, - default_headers=createHeaders( - provider="openai", - api_key="PORTKEY_API_KEY", - provider="@OPENAI_PROVIDER" - ) - ) - - response = client.responses.create( - model="gpt-4.1", - input="Tell me a three sentence bedtime story about a unicorn." - ) - - print(response) - ``` - - - ```js - import OpenAI from 'openai'; - import { PORTKEY_GATEWAY_URL, createHeaders } from 'portkey-ai' - - const openai = new OpenAI({ - baseURL: PORTKEY_GATEWAY_URL, - defaultHeaders: createHeaders({ - provider: "openai", - apiKey: "PORTKEY_API_KEY", - provider:"@OPENAI_PROVIDER" - }) - }); - - async function main() { - const response = await openai.responses.create({ - model: "gpt-4.1", - input: "Tell me a three sentence bedtime story about a unicorn." - }); - - console.log(response); - } +## Advanced Features - main(); - ``` - - +### Responses API - -The Responses API provides a more flexible foundation for building agentic applications with built-in tools that execute automatically. - +OpenAI's Responses API combines the best of both Chat Completions and Assistants APIs. Portkey fully supports this API with both the Portkey SDK and OpenAI SDK. + +```python Python icon="python" +from portkey_ai import Portkey - - Portkey supports Remote MCP support by OpenAI on it's Responses API. Learn More - +portkey = Portkey(api_key="PORTKEY_API_KEY") +response = portkey.responses.create( + model="@openai/gpt-4.1", + input="Tell me a three sentence bedtime story about a unicorn." +) +print(response) +``` -## Track End-User IDs +```js Javascript icon="square-js" +import Portkey from 'portkey-ai' -Portkey allows you to track user IDs passed with the `user` parameter in OpenAI requests, enabling you to monitor user-level costs, requests, and more. - - +const portkey = new Portkey({ + apiKey: "PORTKEY_API_KEY" +}) +const response = await portkey.responses.create({ + model: "@openai/gpt-4.1", + input: "Tell me a three sentence bedtime story about a unicorn." +}) -```js -const chatCompletion = await portkey.chat.completions.create({ - messages: [{ role: "user", content: "Say this is a test" }], - model: "gpt-4o", - user: "user_12345", -}); +console.log(response) ``` - +```python OpenAI Py icon="python" +from openai import OpenAI +from portkey_ai import PORTKEY_GATEWAY_URL - -```py -response = portkey.chat.completions.create( - model="gpt-4o", - messages=[{ role: "user", content: "Say this is a test" }] - user="user_123456" +client = OpenAI( + api_key="PORTKEY_API_KEY", + base_url=PORTKEY_GATEWAY_URL ) -``` - - -When you include the `user` parameter in your requests, Portkey logs will display the associated user ID, as shown in the image below: - - -logs - - -In addition to the `user` parameter, Portkey allows you to send arbitrary custom metadata with your requests. This powerful feature enables you to associate additional context or information with each request, which can be useful for analysis, debugging, or other custom use cases. - - - -* The same integration approach applies to APIs for [completions](https://platform.openai.com/docs/guides/text-generation/completions-api), [embeddings](https://platform.openai.com/docs/api-reference/embeddings/create), [vision](https://platform.openai.com/docs/guides/vision/quick-start), [moderation](https://platform.openai.com/docs/api-reference/moderations/create), [transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription), [translation](https://platform.openai.com/docs/api-reference/audio/createTranslation), [speech](https://platform.openai.com/docs/api-reference/audio/createSpeech) and [files](https://platform.openai.com/docs/api-reference/files/create). -* If you are looking for a way to add your **Org ID** & **Project ID** to the requests, head over to [this section](/integrations/llms/openai#managing-openai-projects-and-organizations-in-portkey). - - - -## Using the Prompts API - -Portkey also supports creating and managing prompt templates in the [prompt library](/product/prompt-library). This enables the collaborative development of prompts directly through the user interface. - -1. Create a prompt template with variables and set the hyperparameters. - - -prompt - - -2. Use this prompt in your codebase using the Portkey SDK. +response = client.responses.create( + model="@openai/gpt-4.1", + input="Tell me a three sentence bedtime story about a unicorn." +) - - +print(response) +``` -```js -import Portkey from 'portkey-ai' +```js OpenAI JS icon="square-js" +import OpenAI from 'openai' +import { PORTKEY_GATEWAY_URL } from 'portkey-ai' -const portkey = new Portkey({ +const openai = new OpenAI({ apiKey: "PORTKEY_API_KEY", + baseURL: PORTKEY_GATEWAY_URL }) -// Make the prompt creation call with the variables - -const promptCompletion = await portkey.prompts.completions.create({ - promptID: "Your Prompt ID", - variables: { - // The variables specified in the prompt - } +const response = await openai.responses.create({ + model: "@openai/gpt-4.1", + input: "Tell me a three sentence bedtime story about a unicorn." }) -``` - -```js -// We can also override the hyperparameters - -const promptCompletion = await portkey.prompts.completions.create({ - promptID: "Your Prompt ID", - variables: { - // The variables specified in the prompt - }, - max_tokens: 250, - presence_penalty: 0.2 -}) +console.log(response) ``` + - + +The Responses API provides a more flexible foundation for building agentic applications with built-in tools that execute automatically. + - -```python -from portkey_ai import Portkey + + Portkey supports Remote MCP support by OpenAI on its Responses API. Learn More + -client = Portkey( - api_key="PORTKEY_API_KEY", # defaults to os.environ.get("PORTKEY_API_KEY") -) +#### Streaming with Responses API -prompt_completion = client.prompts.completions.create( - prompt_id="Your Prompt ID", - variables={ - # The variables specified in the prompt - } + +```python Python icon="python" +response = portkey.responses.create( + model="@openai/gpt-4.1", + instructions="You are a helpful assistant.", + input="Hello!", + stream=True ) -print(prompt_completion) +for event in response: + print(event) +``` -# We can also override the hyperparameters +```js Javascript icon="square-js" +const response = await portkey.responses.create({ + model: "@openai/gpt-4.1", + instructions: "You are a helpful assistant.", + input: "Hello!", + stream: true +}) + +for await (const event of response) { + console.log(event) +} +``` -prompt_completion = client.prompts.completions.create( - prompt_id="Your Prompt ID", - variables={ - # The variables specified in the prompt - }, - max_tokens=250, - presence_penalty=0.2 +```python OpenAI Py icon="python" +response = client.responses.create( + model="gpt-4.1", + instructions="You are a helpful assistant.", + input="Hello!", + stream=True ) -print(prompt_completion) -``` - - - -```sh -curl -X POST "https://api.portkey.ai/v1/prompts/:PROMPT_ID/completions" \ --H "Content-Type: application/json" \ --H "x-portkey-api-key: $PORTKEY_API_KEY" \ --d '{ - "variables": { - # The variables to use - }, - "max_tokens": 250, # Optional - "presence_penalty": 0.2 # Optional -}' +for event in response: + print(event) ``` - - -Observe how this streamlines your code readability and simplifies prompt updates via the UI without altering the codebase. +```js OpenAI JS icon="square-js" +const response = await openai.responses.create({ + model: "gpt-4.1", + instructions: "You are a helpful assistant.", + input: "Hello!", + stream: true +}) -## Advanced Use Cases +for await (const event of response) { + console.log(event) +} +``` + ### Realtime API @@ -462,129 +309,6 @@ Portkey supports OpenAI's Realtime API with a seamless integration. This allows -### Streaming Responses - -Portkey supports streaming responses using Server Sent Events (SSE). - - - - ```js - import OpenAI from 'openai'; - - import { PORTKEY_GATEWAY_URL, createHeaders } from 'portkey-ai' - const openai = new OpenAI({ - baseURL: PORTKEY_GATEWAY_URL, - defaultHeaders: createHeaders({ - provider: "openai", - apiKey: "PORTKEY_API_KEY" // defaults to process.env["PORTKEY_API_KEY"] - }) - }); - - async function main() { - const stream = await openai.chat.completions.create({ - model: 'gpt-4', - messages: [{ role: 'user', content: 'Say this is a test' }], - stream: true, - }); - - for await (const chunk of stream) { - process.stdout.write(chunk.choices[0]?.delta?.content || ''); - } - } - - main(); - ``` - - - ```python - from openai import OpenAI - from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders - - client = OpenAI( - api_key="OPENAI_API_KEY", # defaults to os.environ.get("OPENAI_API_KEY") - base_url=PORTKEY_GATEWAY_URL, - default_headers=createHeaders( - provider="openai", - api_key="PORTKEY_API_KEY" # defaults to os.environ.get("PORTKEY_API_KEY") - ) - ) - - chat_complete = client.chat.completions.create( - model="gpt-4", - messages=[{"role": "user", "content": "Say this is a test"}], - stream=True - ) - - for chunk in chat_complete: - print(chunk.choices[0].delta.content, end="", flush=True) - ``` - - - - - - -#### Streaming with the Responses API - -You can also stream responses from the Responses API: - - - - ```python - response = portkey.responses.create( - model="gpt-4.1", - instructions="You are a helpful assistant.", - input="Hello!", - stream=True - ) - - for event in response: - print(event) - ``` - - - ```js - const response = await portkey.responses.create({ - model: "gpt-4.1", - instructions: "You are a helpful assistant.", - input: "Hello!", - stream: true - }); - - for await (const event of response) { - console.log(event); - } - ``` - - - ```python - response = client.responses.create( - model="gpt-4.1", - instructions="You are a helpful assistant.", - input="Hello!", - stream=True - ) - - for event in response: - print(event) - ``` - - - ```js - const response = await openai.responses.create({ - model: "gpt-4.1", - instructions: "You are a helpful assistant.", - input: "Hello!", - stream: true - }); - - for await (const event of response) { - console.log(event); - } - ``` - - - ### Using Vision Models @@ -597,94 +321,89 @@ Portkey's multimodal Gateway fully supports OpenAI vision models as well. See th #### Vision with the Responses API -You can also use the Responses API to process images alongside text: - - - - ```python - response = portkey.responses.create( - model="gpt-4.1", - input=[ - { - "role": "user", - "content": [ - { "type": "input_text", "text": "What is in this image?" }, - { - "type": "input_image", - "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - } - ] - } - ] - ) - - print(response) - ``` - - - ```js - const response = await portkey.responses.create({ - model: "gpt-4.1", - input: [ - { - role: "user", - content: [ - { type: "input_text", text: "What is in this image?" }, - { - type: "input_image", - image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - } - ] - } - ] - }); - - console.log(response); - ``` - - - ```python - response = client.responses.create( - model="gpt-4.1", - input=[ - { - "role": "user", - "content": [ - { "type": "input_text", "text": "What is in this image?" }, - { - "type": "input_image", - "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - } - ] - } - ] - ) - - print(response) - ``` - - - ```js - const response = await openai.responses.create({ - model: "gpt-4.1", - input: [ - { - role: "user", - content: [ - { type: "input_text", text: "What is in this image?" }, - { - type: "input_image", - image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - } - ] - } - ] - }); - - console.log(response); - ``` - - +The Responses API also processes images alongside text: + + +```python Python icon="python" +response = portkey.responses.create( + model="@openai/gpt-4.1", + input=[ + { + "role": "user", + "content": [ + { "type": "input_text", "text": "What is in this image?" }, + { + "type": "input_image", + "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + ] + } + ] +) + +print(response) +``` + +```js Javascript icon="square-js" +const response = await portkey.responses.create({ + model: "@openai/gpt-4.1", + input: [ + { + role: "user", + content: [ + { type: "input_text", text: "What is in this image?" }, + { + type: "input_image", + image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + ] + } + ] +}) + +console.log(response) +``` + +```python OpenAI Py icon="python" +response = client.responses.create( + model="gpt-4.1", + input=[ + { + "role": "user", + "content": [ + { "type": "input_text", "text": "What is in this image?" }, + { + "type": "input_image", + "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + ] + } + ] +) + +print(response) +``` + +```js OpenAI JS icon="square-js" +const response = await openai.responses.create({ + model: "gpt-4.1", + input: [ + { + role: "user", + content: [ + { type: "input_text", text: "What is in this image?" }, + { + type: "input_image", + image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + ] + } + ] +}) + +console.log(response) +``` + @@ -699,132 +418,127 @@ Additionally, you can define functions within your prompts and invoke the `portk The Responses API also supports function calling with the same powerful capabilities: - - - ```python - tools = [ - { - "type": "function", - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA" - }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + +```python Python icon="python" +tools = [ + { + "type": "function", + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" }, - "required": ["location", "unit"] - } + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + }, + "required": ["location", "unit"] } - ] + } +] - response = portkey.responses.create( - model="gpt-4.1", - tools=tools, - input="What is the weather like in Boston today?", - tool_choice="auto" - ) - - print(response) - ``` - - - ```js - const tools = [ - { - type: "function", - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA" - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] } +response = portkey.responses.create( + model="@openai/gpt-4.1", + tools=tools, + input="What is the weather like in Boston today?", + tool_choice="auto" +) + +print(response) +``` + +```js Javascript icon="square-js" +const tools = [ + { + type: "function", + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA" }, - required: ["location", "unit"] - } + unit: { type: "string", enum: ["celsius", "fahrenheit"] } + }, + required: ["location", "unit"] } - ]; - - const response = await portkey.responses.create({ - model: "gpt-4.1", - tools: tools, - input: "What is the weather like in Boston today?", - tool_choice: "auto" - }); - - console.log(response); - ``` - - - ```python - tools = [ - { - "type": "function", - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA" - }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + } +] + +const response = await portkey.responses.create({ + model: "@openai/gpt-4.1", + tools: tools, + input: "What is the weather like in Boston today?", + tool_choice: "auto" +}) + +console.log(response) +``` + +```python OpenAI Py icon="python" +tools = [ + { + "type": "function", + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" }, - "required": ["location", "unit"] - } + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + }, + "required": ["location", "unit"] } - ] + } +] - response = client.responses.create( - model="gpt-4.1", - tools=tools, - input="What is the weather like in Boston today?", - tool_choice="auto" - ) - - print(response) - ``` - - - ```js - const tools = [ - { - type: "function", - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA" - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] } +response = client.responses.create( + model="gpt-4.1", + tools=tools, + input="What is the weather like in Boston today?", + tool_choice="auto" +) + +print(response) +``` + +```js OpenAI JS icon="square-js" +const tools = [ + { + type: "function", + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA" }, - required: ["location", "unit"] - } + unit: { type: "string", enum: ["celsius", "fahrenheit"] } + }, + required: ["location", "unit"] } - ]; + } +] - const response = await openai.responses.create({ - model: "gpt-4.1", - tools: tools, - input: "What is the weather like in Boston today?", - tool_choice: "auto" - }); +const response = await openai.responses.create({ + model: "gpt-4.1", + tools: tools, + input: "What is the weather like in Boston today?", + tool_choice: "auto" +}) - console.log(response); - ``` - - +console.log(response) +``` + @@ -834,33 +548,29 @@ Please refer to our fine-tuning guides to take advantage of Portkey's advanced [ ### Image Generation -Portkey supports multiple modalities for OpenAI and you can make image generation requests through Portkey's AI Gateway the same way as making completion calls. - - - - ```js - // Define the OpenAI client as shown above - - const image = await openai.images.generate({ - model:"dall-e-3", - prompt:"Lucy in the sky with diamonds", - size:"1024x1024" - }) - ``` - - - - ```python - # Define the OpenAI client as shown above - - image = openai.images.generate( - model="dall-e-3", - prompt="Lucy in the sky with diamonds", - size="1024x1024" - ) - ``` - - +Portkey supports multiple modalities for OpenAI. Make image generation requests through Portkey's AI Gateway the same way as making completion calls. + + +```js Javascript icon="square-js" +// Define the OpenAI client as shown above + +const image = await openai.images.generate({ + model:"dall-e-3", + prompt:"Lucy in the sky with diamonds", + size:"1024x1024" +}) +``` + +```python Python icon="python" +# Define the OpenAI client as shown above + +image = openai.images.generate( + model="dall-e-3", + prompt="Lucy in the sky with diamonds", + size="1024x1024" +) +``` + Portkey's fast AI gateway captures the information about the request on your Portkey Dashboard. On your logs screen, you'd be able to see this request with the request and response. @@ -871,27 +581,22 @@ More information on image generation is available in the [API Reference](/provid ### Video Generation with Sora -Portkey supports OpenAI's Sora video generation models through the AI Gateway. You can generate videos using the Portkey Python SDK. +Portkey supports OpenAI's Sora video generation models through the AI Gateway. Generate videos using the Portkey Python SDK: - - - ```python - from portkey_ai import Portkey +```python +from portkey_ai import Portkey - client = Portkey( - api_key="your-portkey-api-key", - provider="@your-portkey-provider-slug", - ) +client = Portkey( + api_key="PORTKEY_API_KEY" +) - video = client.videos.create( - model="sora-2", - prompt="A video of a cool cat on a motorcycle in the night", - ) +video = client.videos.create( + model="@openai/sora-2", + prompt="A video of a cool cat on a motorcycle in the night", +) - print("Video generation started:", video) - ``` - - +print("Video generation started:", video) +``` Pricing for video generation requests will be visible on your Portkey dashboard, allowing you to track costs alongside your other API usage. @@ -910,54 +615,51 @@ Check out the below guides for more info: --- -## Intergrated Tools with Repsponses API +## Integrated Tools with Responses API ### Web Search Tool Web search delivers accurate and clearly-cited answers from the web, using the same tool as search in ChatGPT: - - - ```python - response = portkey.responses.create( - model="gpt-4.1", - tools=[{ - "type": "web_search_preview", - "search_context_size": "medium", # Options: "high", "medium" (default), or "low" - "user_location": { # Optional - for localized results - "type": "approximate", - "country": "US", - "city": "San Francisco", - "region": "California" - } - }], - input="What was a positive news story from today?" - ) - - print(response) - ``` - - - ```js - const response = await portkey.responses.create({ - model: "gpt-4.1", - tools: [{ - type: "web_search_preview", - search_context_size: "medium", // Options: "high", "medium" (default), or "low" - user_location: { // Optional - for localized results - type: "approximate", - country: "US", - city: "San Francisco", - region: "California" - } - }], - input: "What was a positive news story from today?" - }); - - console.log(response); - ``` - - + +```python Python icon="python" +response = portkey.responses.create( + model="@openai/gpt-4.1", + tools=[{ + "type": "web_search_preview", + "search_context_size": "medium", # Options: "high", "medium" (default), or "low" + "user_location": { # Optional - for localized results + "type": "approximate", + "country": "US", + "city": "San Francisco", + "region": "California" + } + }], + input="What was a positive news story from today?" +) + +print(response) +``` + +```js Javascript icon="square-js" +const response = await portkey.responses.create({ + model: "@openai/gpt-4.1", + tools: [{ + type: "web_search_preview", + search_context_size: "medium", // Options: "high", "medium" (default), or "low" + user_location: { // Optional - for localized results + type: "approximate", + country: "US", + city: "San Francisco", + region: "California" + } + }], + input: "What was a positive news story from today?" +}) + +console.log(response) +``` + **Options for `search_context_size`:** @@ -972,48 +674,45 @@ Web search delivers accurate and clearly-cited answers from the web, using the s File search enables quick retrieval from your knowledge base across multiple file types: - - - ```python - response = portkey.responses.create( - model="gpt-4.1", - tools=[{ - "type": "file_search", - "vector_store_ids": ["vs_1234567890"], - "max_num_results": 20, - "filters": { # Optional - filter by metadata - "type": "eq", - "key": "document_type", - "value": "report" - } - }], - input="What are the attributes of an ancient brown dragon?" - ) - - print(response) - ``` - - - ```js - const response = await portkey.responses.create({ - model: "gpt-4.1", - tools: [{ - type: "file_search", - vector_store_ids: ["vs_1234567890"], - max_num_results: 20, - filters: { // Optional - filter by metadata - type: "eq", - key: "document_type", - value: "report" - } - }], - input: "What are the attributes of an ancient brown dragon?" - }); - - console.log(response); - ``` - - + +```python Python icon="python" +response = portkey.responses.create( + model="@openai/gpt-4.1", + tools=[{ + "type": "file_search", + "vector_store_ids": ["vs_1234567890"], + "max_num_results": 20, + "filters": { # Optional - filter by metadata + "type": "eq", + "key": "document_type", + "value": "report" + } + }], + input="What are the attributes of an ancient brown dragon?" +) + +print(response) +``` + +```js Javascript icon="square-js" +const response = await portkey.responses.create({ + model: "@openai/gpt-4.1", + tools: [{ + type: "file_search", + vector_store_ids: ["vs_1234567890"], + max_num_results: 20, + filters: { // Optional - filter by metadata + type: "eq", + key: "document_type", + value: "report" + } + }], + input: "What are the attributes of an ancient brown dragon?" +}) + +console.log(response) +``` + This tool requires you to first create a vector store and upload files to it. Supports various file formats including PDFs, DOCXs, TXT, and more. Results include file citations in the response. @@ -1023,34 +722,31 @@ File search enables quick retrieval from your knowledge base across multiple fil Control the depth of model reasoning for more comprehensive analysis: - - - ```python - response = portkey.responses.create( - model="o3-mini", - input="How much wood would a woodchuck chuck?", - reasoning={ - "effort": "high" # Options: "high", "medium", or "low" - } - ) - - print(response) - ``` - - - ```js - const response = await portkey.responses.create({ - model: "o3-mini", - input: "How much wood would a woodchuck chuck?", - reasoning: { - effort: "high" // Options: "high", "medium", or "low" - } - }); + +```python Python icon="python" +response = portkey.responses.create( + model="@openai/o3-mini", + input="How much wood would a woodchuck chuck?", + reasoning={ + "effort": "high" # Options: "high", "medium", or "low" + } +) - console.log(response); - ``` - - +print(response) +``` + +```js Javascript icon="square-js" +const response = await portkey.responses.create({ + model: "@openai/o3-mini", + input: "How much wood would a woodchuck chuck?", + reasoning: { + effort: "high" // Options: "high", "medium", or "low" + } +}) + +console.log(response) +``` + ### Computer Use Assistant @@ -1065,42 +761,42 @@ Portkey also supports the Computer Use Assistant (CUA) tool, which helps agents ## Managing OpenAI Projects & Organizations in Portkey -When integrating OpenAI with Portkey, you can specify your OpenAI organization and project IDs along with your API key. This is particularly useful if you belong to multiple organizations or are accessing projects through a legacy user API key. +When integrating OpenAI with Portkey, specify your OpenAI organization and project IDs along with your API key. This is particularly useful if you belong to multiple organizations or are accessing projects through a legacy user API key. Specifying the organization and project IDs helps you maintain better control over your access rules, usage, and costs. -In Portkey, you can add your Org & Project details by, +Add your Org & Project details using: -1. Creating your Virtual Key +1. Adding in Model Catalog (Recommended) 2. Defining a Gateway Config 3. Passing Details in a Request Let's explore each method in more detail. -### Using Virtual Keys +### Using Model Catalog -When selecting OpenAI from the dropdown menu while creating a virtual key, Portkey automatically displays optional fields for the organization ID and project ID alongside the API key field. +When adding OpenAI from the Model Catalog, Portkey automatically displays optional fields for the organization ID and project ID alongside the API key field. -[Get your OpenAI API key from here](https://platform.openai.com/api-keys), then add it to Portkey to create the virtual key that can be used throughout Portkey. +[Get your OpenAI API key from here](https://platform.openai.com/api-keys), then add it to Portkey along with your org/project details. ![LOGO](/images/llms/virtual.png) -[Virtual Keys](/product/ai-gateway/virtual-keys) +[Model Catalog](/product/model-catalog) -Portkey takes budget management a step further than OpenAI. While OpenAI allows setting budget limits per project, Portkey enables you to set budget limits for each virtual key you create. For more information on budget limits, refer to this documentation: +Portkey takes budget management a step further than OpenAI. While OpenAI allows setting budget limits per project, Portkey enables you to set budget limits for each provider you create. For more information on budget limits, refer to this documentation: -[Budget Limits](/product/ai-gateway/virtual-keys/budget-limits) +[Budget Limits](/product/ai-gateway/budget-limits) -### Using The Gateway Config + +### Using the Gateway Config You can also specify the organization and project details in the gateway config, either at the root level or within a specific target. ```json { - "provider": "openai", - "api_key": "OPENAI_API_KEY", + "provider": "@openai", "openai_organization": "org-xxxxxx", "openai_project": "proj_xxxxxxxx" } @@ -1109,179 +805,118 @@ You can also specify the organization and project details in the gateway config, ### While Making a Request You can also pass your organization and project details directly when making a request using curl, the OpenAI SDK, or the Portkey SDK. - - - ```python - from openai import OpenAI - from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders - - client = OpenAI( - api_key="OPENAI_API_KEY", - organization="org-xxxxxxxxxx", - project="proj_xxxxxxxxx", - base_url=PORTKEY_GATEWAY_URL, - default_headers=createHeaders( - provider="openai", - api_key="PORTKEY_API_KEY" - ) - ) - - chat_complete = client.chat.completions.create( - model="gpt-4o", - messages=[{"role": "user", "content": "Say this is a test"}], - ) - - print(chat_complete.choices[0].message.content) - ``` - - - - -```js -import OpenAI from "openai"; -import { PORTKEY_GATEWAY_URL, createHeaders } from "portkey-ai"; + + +```python OpenAI Py icon="python" +from openai import OpenAI +from portkey_ai import PORTKEY_GATEWAY_URL + +client = OpenAI( + api_key="PORTKEY_API_KEY", + organization="org-xxxxxxxxxx", + project="proj_xxxxxxxxx", + base_url=PORTKEY_GATEWAY_URL +) + +chat_complete = client.chat.completions.create( + model="@openai/gpt-4o", + messages=[{"role": "user", "content": "Say this is a test"}], +) + +print(chat_complete.choices[0].message.content) +``` + +```js OpenAI JS icon="square-js" +import OpenAI from "openai" +import { PORTKEY_GATEWAY_URL } from "portkey-ai" const openai = new OpenAI({ - apiKey: "OPENAI_API_KEY", + apiKey: "PORTKEY_API_KEY", organization: "org-xxxxxx", project: "proj_xxxxxxx", - baseURL: PORTKEY_GATEWAY_URL, - defaultHeaders: createHeaders({ - provider: "openai", - apiKey: "PORTKEY_API_KEY", - }), -}); + baseURL: PORTKEY_GATEWAY_URL +}) async function main() { const chatCompletion = await openai.chat.completions.create({ messages: [{ role: "user", content: "Say this is a test" }], - model: "gpt-4o", - }); + model: "@openai/gpt-4o", + }) - console.log(chatCompletion.choices); + console.log(chatCompletion.choices) } -main(); +main() ``` - - - -```sh +```sh cURL icon="square-terminal" curl https://api.portkey.ai/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $OPENAI_API_KEY" \ -H "x-portkey-openai-organization: org-xxxxxxx" \ -H "x-portkey-openai-project: proj_xxxxxxx" \ -H "x-portkey-api-key: $PORTKEY_API_KEY" \ - -H "x-portkey-provider: openai" \ + -H "x-portkey-provider: @openai" \ -d '{ "model": "gpt-4o", "messages": [{"role": "user","content": "Hello!"}] }' ``` - - -```python +```python Python icon="python" from portkey_ai import Portkey portkey = Portkey( api_key="PORTKEY_API_KEY", - provider="openai", Authorization="Bearer OPENAI_API_KEY", openai_organization="org-xxxxxxxxx", openai_project="proj_xxxxxxxxx", ) chat_complete = portkey.chat.completions.create( - model="gpt-4o", + model="@openai/gpt-4o", messages=[{"role": "user", "content": "Say this is a test"}], ) print(chat_complete.choices[0].message.content) ``` - - -```js -import Portkey from "portkey-ai"; +```js Javascript icon="square-js" +import Portkey from "portkey-ai" const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY", - provider: "openai", Authorization: "Bearer OPENAI_API_KEY", openaiOrganization: "org-xxxxxxxxxxx", openaiProject: "proj_xxxxxxxxxxxxx", -}); +}) async function main() { const chatCompletion = await portkey.chat.completions.create({ messages: [{ role: "user", content: "Say this is a test" }], - model: "gpt-4o", - }); + model: "@openai/gpt-4o", + }) - console.log(chatCompletion.choices); + console.log(chatCompletion.choices) } -main(); +main() ``` - - + --- -### mTLS Support [Enterprise Only] [Beta] - -Enabling mTLS will enable mTLS for all requests made to OpenAI through portkey, and would break existing workflows. -Additionally, mTLS is not supported for certain routes. PLease refer to the [OpenAI documentation](https://help.openai.com/en/articles/10876024-openai-mutual-tls-beta-program) for more details. - - -
-Generating Client Key and Certificate -Here's a quick script to generate server and client key and certificate: -```bash -# 1. create CA key + self-signed CA cert -openssl genrsa -out my_ca.key 4096 -openssl req -x509 -new -nodes -key my_ca.key -sha256 -days 3650 \ - -subj "/CN=My Custom CA" \ - -addext "subjectKeyIdentifier=hash" \ - -addext "authorityKeyIdentifier=keyid:always,issuer:always" \ - -out my_ca.pem - -# 2. client key + CSR (include SAN) -openssl genrsa -out client.key 4096 -openssl req -new -key client.key \ - -subj "/CN=my-client" \ - -addext "subjectAltName=DNS:localhost,IP:127.0.0.1" \ - -out client.csr - -# 3. sign CSR with CA to get client cert -openssl x509 -req -in client.csr -CA my_ca.pem -CAkey my_ca.key \ - -CAcreateserial -days 365 -sha256 \ - -extfile <(printf "subjectKeyIdentifier=hash\nauthorityKeyIdentifier=keyid,issuer:always\nkeyUsage=digitalSignature,keyEncipherment\nextendedKeyUsage=clientAuth\nsubjectAltName=DNS:localhost,IP:127.0.0.1") \ - -out client.pem - -# you will need to upload the file titled my_ca.pem to OpenAI as your CA certificate -# you can use the client.key and client.pem files for your client key and certificate in your gateway -``` -
- -To upload the CA certificate to OpenAI, you can use the [APIs](https://platform.openai.com/docs/api-reference/certificates) or upload it via the OpenAI dashboard. - -For self hosted enterprise deployments, you can configure the client key and certificate with the following environment variables: -```bash -export OPENAI_MTLS_KEY_PATH="/path/to/client.key" -export OPENAI_MTLS_CERT_PATH="/path/to/client.pem" -``` - -### Portkey Features - -Portkey supports the complete host of it's functionality via the OpenAI SDK so you don't need to migrate away from it. - -Please find more information in the relevant sections: - -1. [Add metadata to your requests](/product/observability/metadata) -2. [Add gateway configs to the OpenAI client or a single request](/product/ai-gateway/configs) -3. [Tracing OpenAI requests](/product/observability/traces) -4. [Setup a fallback to Azure OpenAI](/product/ai-gateway/fallbacks) +## Frequently Asked Questions + +### General FAQs + + +You can sign up to OpenAI [here](https://platform.openai.com/docs/overview) and grab your scoped API key [here](https://platform.openai.com/api-keys). + + +The OpenAI API can be used by signing up to the OpenAI platform. You can find the pricing info [here](https://openai.com/api/pricing/) + + +You can find your current rate limits imposed by OpenAI [here](https://platform.openai.com/settings/organization/limits). For more tips, check out [this guide](/guides/getting-started/tackling-rate-limiting). + + diff --git a/integrations/llms/openai/prompt-caching-openai.mdx b/integrations/llms/openai/prompt-caching-openai.mdx index 73277479..a1e6fdd0 100644 --- a/integrations/llms/openai/prompt-caching-openai.mdx +++ b/integrations/llms/openai/prompt-caching-openai.mdx @@ -65,7 +65,7 @@ Portkey supports OpenAI's prompt caching feature out of the box. Here is an exam const portkey = new Portkey({ apiKey: "PORTKEY_API_KEY", // defaults to process.env["PORTKEY_API_KEY"] - provider:"@PROVIDER" // Your OpenAI Virtual Key + provider:"@PROVIDER" // Your OpenAI Provider Slug }) // Define tools (for function calling example) diff --git a/integrations/llms/openai3.mdx b/integrations/llms/openai3.mdx index a6aa5d81..ee8f0d71 100644 --- a/integrations/llms/openai3.mdx +++ b/integrations/llms/openai3.mdx @@ -1103,7 +1103,7 @@ When selecting OpenAI from the Virtual Key dropdown menu while creating a virtua -Portkey takes budget management a step further than OpenAI. While OpenAI allows setting budget limits per project, Portkey enables you to set budget limits for each virtual key you create. For more information on budget limits, [refer to this documentation](/product/ai-gateway/virtual-keys/budget-limits) +Portkey takes budget management a step further than OpenAI. While OpenAI allows setting budget limits per project, Portkey enables you to set budget limits for each virtual key you create. For more information on budget limits, [refer to this documentation](/product/model-catalog/integrations#3-budget-%26-rate-limits) diff --git a/integrations/llms/vertex-ai.mdx b/integrations/llms/vertex-ai.mdx index cd106bd0..9006c5aa 100644 --- a/integrations/llms/vertex-ai.mdx +++ b/integrations/llms/vertex-ai.mdx @@ -4,113 +4,129 @@ title: "Google Vertex AI" Portkey provides a robust and secure gateway to facilitate the integration of various Large Language Models (LLMs), and embedding models into your apps, including [Google Vertex AI](https://cloud.google.com/vertex-ai?hl=en). -With Portkey, you can take advantage of features like fast AI gateway access, observability, prompt management, and more, all while ensuring the secure management of your Vertex auth through a Portkey's [Model Catalog](/product/model-catalog) +With Portkey, you can take advantage of features like fast AI gateway access, observability, prompt management, and more, all while ensuring the secure management of your Vertex auth through [Model Catalog](/product/model-catalog). - -Provider Slug. `vertex-ai` - +## Quick Start -## Portkey SDK Integration with Google Vertex AI + -Portkey provides a consistent API to interact with models from various providers. To integrate Google Vertex AI with Portkey: +```python Python icon="python" +from portkey_ai import Portkey -### 1\. Install the Portkey SDK +# 1. Install: pip install portkey-ai +# 2. Add @vertex-ai provider in Model Catalog with Service Account JSON +# 3. Use it: -Add the Portkey SDK to your application to interact with Google Vertex AI API through Portkey's gateway. - - +portkey = Portkey(api_key="PORTKEY_API_KEY") - ```sh - npm install --save portkey-ai - ``` - - - ```sh -pip install portkey-ai +response = portkey.chat.completions.create( + model="@vertex-ai/gemini-3-pro-preview", + messages=[{"role": "user", "content": "Say this is a test"}] +) + +print(response.choices[0].message.content) ``` - - +```js Javascript icon="square-js" +import Portkey from 'portkey-ai' +// 1. Install: npm install portkey-ai +// 2. Add @vertex-ai provider in Model Catalog with Service Account JSON +// 3. Use it: +const portkey = new Portkey({ + apiKey: "PORTKEY_API_KEY" +}) +const response = await portkey.chat.completions.create({ + model: "@vertex-ai/gemini-3-pro-preview", + messages: [{ role: "user", content: "Say this is a test" }] +}) -### 2\. Initialize Portkey Client +console.log(response.choices[0].message.content) +``` -To integrate Vertex AI with Portkey, you'll need your `Vertex Project Id` Or `Service Account JSON` & `Vertex Region`, with which you can set up the Portkey's AI Provider. +```sh cURL icon="square-terminal" +# 1. Add @vertex-ai provider in Model Catalog with Service Account JSON +# 2. Use it: -[Here's a guide on how to find your Vertex Project details](/integrations/llms/vertex-ai#how-to-find-your-google-vertex-project-details) +curl https://api.portkey.ai/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "x-portkey-api-key: $PORTKEY_API_KEY" \ + -d '{ + "model": "@vertex-ai/gemini-3-pro-preview", + "messages": [{"role": "user", "content": "Say this is a test"}] + }' +``` -If you are integrating through Service Account File, [refer to this guide](/integrations/llms/vertex-ai#get-your-service-account-json). + - - - ```js - import Portkey from 'portkey-ai' + +**Authentication Note:** When you configure Vertex AI in Model Catalog with **Service Account JSON** (recommended), authentication is handled automatically. If you only configure with **Project ID and Region**, you'll need to pass an OAuth2 access token with each request using the `Authorization` header. See the [Making Requests Without Model Catalog](#making-requests-without-portkeys-model-catalog) section for details. + - const portkey = new Portkey({ - apiKey: "PORTKEY_API_KEY", // defaults to process.env["PORTKEY_API_KEY"] +--- - }) - ``` - - - ```python - from portkey_ai import Portkey +## Add Provider in Model Catalog - portkey = Portkey( - api_key="PORTKEY_API_KEY", # Replace with your Portkey API key - ) - ``` - - + + -If you do not want to add your Vertex AI details to Portkey vault, you can directly pass them while instantiating the Portkey client. [More on that here](/integrations/llms/vertex-ai#making-requests-without-portkeys-model-catalog). +Go to [**Model Catalog → Add Provider**](https://app.portkey.ai/model-catalog/providers) in your Portkey dashboard. -### **3\. Invoke Chat Completions with** Vertex AI + -Use the Portkey instance to send requests to any models hosted on Vertex AI. You can also override the Portkey's AI Provider directly in the API call if needed. - -Vertex AI uses OAuth2 to authenticate its requests, so you need to send the **access token** additionally along with the request. - + - - -```js -const chatCompletion = await portkey.chat.completions.create({ - messages: [{ role: 'user', content: 'Say this is a test' }], - model: '@VERTEX_PROVIDER/gemini-1.5-pro-latest', // your model slug from Portkey's Model Catalog -}, {Authorization: "Bearer $YOUR_VERTEX_ACCESS_TOKEN"}); +Find and select **Google Vertex AI** from the provider list. -console.log(chatCompletion.choices); -``` + - - -```python -completion = portkey.with_options(Authorization="Bearer $YOUR_VERTEX_ACCESS_TOKEN").chat.completions.create( - messages= [{ "role": 'user', "content": 'Say this is a test' }], - model= '@VERTEX_PROVIDER/gemini-1.5-pro-latest' # your model slug from Portkey's Model Catalog -) + -print(completion) -``` - - +You'll need your `Vertex Project ID` and `Vertex Region`. You can authenticate using either: + +**Option 1: Service Account JSON** (Recommended for self-deployed models) +- Upload your Google Cloud service account JSON file +- Specify the Vertex Region +- Required for custom endpoints (must have `aiplatform.endpoints.predict` permission) + +**Option 2: Project ID and Region** +- Enter your Vertex Project ID +- Enter your Vertex Region +- Simpler but may not support all features + +[Here's a guide on how to find your Vertex Project details](#how-to-find-your-google-vertex-project-details) + +If you're using Service Account File, [refer to this guide](#get-your-service-account-json). + + + + + +Save your configuration. Your provider slug will be `@vertex-ai` (or a custom name you specify). + + + + -To use Anthopic models on Vertex AI, prepend `anthropic.` to the model name.
-Example: `@VERTEX_PROVIDER/anthropic.claude-3-5-sonnet@20240620` +**To use Anthropic models on Vertex AI**, prepend `anthropic.` to the model name. +Example: `@vertex-ai/anthropic.claude-3-5-sonnet@20240620` -Similarly, for Meta models, prepend `meta.` to the model name.
-Example: `@VERTEX_PROVIDER/meta.llama-3-8b-8192` +Similarly, **for Meta models**, prepend `meta.` to the model name. +Example: `@vertex-ai/meta.llama-3-8b-8192`
**Anthropic Beta Header Support**: When using Anthropic models on Vertex AI, you can pass the `anthropic-beta` header (or `x-portkey-anthropic-beta`) to enable beta features. This header is forwarded to the underlying Anthropic API. +--- + +## Vertex AI Capabilities + @@ -154,7 +170,7 @@ This route only works with Claude models. For other models, use the standard Ope ```python Anthropic Python SDK - import anthropic +import anthropic client = anthropic.Anthropic( api_key="dummy", # we will use portkey's provider slug @@ -173,15 +189,15 @@ This route only works with Claude models. For other models, use the standard Ope ```typescript Anthropic TS SDK - import Anthropic from '@anthropic-ai/sdk'; + import Anthropic from '@anthropic-ai/sdk'; - const anthropic = new Anthropic({ + const anthropic = new Anthropic({ apiKey: 'dummy', // we will use portkey's provider slug baseURL: "https://api.portkey.ai/v1", defaultHeaders: { "x-portkey-api-key": "YOUR_PORTKEY_API_KEY" } }); - const msg = await anthropic.messages.create({ + const msg = await anthropic.messages.create({ model: "@your-provider-slug/your-model-name", max_tokens: 1024, messages: [{ role: "user", content: "Hello, Claude" }], @@ -195,122 +211,7 @@ This route only works with Claude models. For other models, use the standard Ope Portkey supports the [Google Vertex AI CountTokens API](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/count-tokens) to estimate token usage before sending requests. Check out the count-tokens guide for more details.
-## Explicit context caching - -Vertex AI supports [context caching](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create) to reduce costs and latency for repeated prompts with large amounts of context. You can explicitly create a cache and then reference it in subsequent inference requests. - -### Step 1: Create a context cache - -Use the Vertex AI `cachedContents` endpoint through Portkey to create a cache: - -```sh cURL -curl --location 'https://api.portkey.ai/v1/projects/{{YOUR_PROJECT_ID}}/locations/{{LOCATION}}/cachedContents' \ ---header 'x-portkey-provider: {{@my-vertex-ai-provider}}' \ ---header 'Content-Type: application/json' \ ---header 'x-portkey-api-key: {{your_api_key}}' \ ---header 'x-portkey-custom-host: https://aiplatform.googleapis.com/v1' \ ---data '{ - "model": "projects/{{YOUR_PROJECT_ID}}/locations/{{LOCATION}}/publishers/google/models/{{MODEL_ID}}", - "displayName": "{{my-cache-display-name}}", - "contents": [{ - "role": "user", - "parts": [{ - "text": "This is sample text to demonstrate explicit caching. (you need a minimum of 1024 tokens)" - }] - }, - { - "role": "model", - "parts": [{ - "text": "thankyou I am your helpful assistant" - }] - }] -}' -``` - -**Request variables:** - -| Variable | Description | -|----------|-------------| -| `YOUR_PROJECT_ID` | Your Google Cloud project ID. | -| `LOCATION` | The region where your model is deployed (e.g., `us-central1`). | -| `MODEL_ID` | The model identifier (e.g., `gemini-1.5-pro-001`). | -| `my-cache-display-name` | A unique name to identify your cache. | -| `your_api_key` | Your Portkey API key. | -| `@my-vertex-ai-provider` | Your Vertex AI provider slug from Portkey's Model Catalog. | - - -Context caching requires a minimum of 1024 tokens in the cached content. The cache has a default TTL (time-to-live) which you can configure using the `ttl` parameter. - - -### Step 2: Use the cache in inference requests - -Once the cache is created, reference it in your chat completion requests using the `cached_content` parameter: - - - -```sh -curl 'https://api.portkey.ai/v1/chat/completions' \ --H 'Content-Type: application/json' \ --H 'x-portkey-api-key: {{your_api_key}}' \ ---data '{ - "model": "@my-vertex-ai-provider/gemini-1.5-pro-001", - "cached_content": "{{my-cache-display-name}}", - "messages": [ - { - "role": "user", - "content": "Based on the context I provided earlier, answer my question." - } - ] -}' -``` - - -```python -from portkey_ai import Portkey - -portkey = Portkey( - api_key="PORTKEY_API_KEY", -) -completion = portkey.chat.completions.create( - model="@my-vertex-ai-provider/gemini-1.5-pro-001", - cached_content="my-cache-display-name", - messages=[ - {"role": "user", "content": "Based on the context I provided earlier, answer my question."} - ] -) - -print(completion) -``` - - -```javascript -import Portkey from 'portkey-ai'; - -const portkey = new Portkey({ - apiKey: "PORTKEY_API_KEY", -}); - -const completion = await portkey.chat.completions.create({ - model: "@my-vertex-ai-provider/gemini-1.5-pro-001", - cached_content: "my-cache-display-name", - messages: [ - { role: "user", content: "Based on the context I provided earlier, answer my question." } - ] -}); - -console.log(completion); -``` - - - - -The model and region used in the inference request must match the model and region used when creating the cache. - - -For more details on context caching options like TTL configuration and cache management, refer to the [Vertex AI context caching documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create). - ---- ## Using Self-Deployed Models on Vertex AI (Hugging Face, Custom Models) @@ -332,17 +233,17 @@ To use self-deployed models on Vertex AI through Portkey: ```js const chatCompletion = await portkey.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'endpoints.my_custom_llm', // Notice the 'endpoints.' prefix -}, {Authorization: "Bearer $YOUR_VERTEX_ACCESS_TOKEN"}); + model: '@vertex-ai/endpoints.my_custom_llm', // Use Model Catalog slug with 'endpoints.' prefix +}); console.log(chatCompletion.choices); ``` ```python -completion = portkey.with_options(Authorization="Bearer $YOUR_VERTEX_ACCESS_TOKEN").chat.completions.create( +completion = portkey.chat.completions.create( messages= [{ "role": 'user', "content": 'Say this is a test' }], - model= 'endpoints.my_huggingface_model' # Notice the 'endpoints.' prefix + model= '@vertex-ai/endpoints.my_huggingface_model' # Use Model Catalog slug with 'endpoints.' prefix ) print(completion) @@ -364,7 +265,7 @@ This approach works for all models you can self-deploy on Vertex AI Model Garden Vertex AI supports attaching various file types to your Gemini messages including documents (`pdf`), images (`jpg`, `png`), videos (`webm`, `mp4`), and audio files. -**Supported Audio Formats:** `mp3`, `wav`, `opus`, `ogg`, `flac`, `pcm`, `aac`, `m4a`, `mpeg`, `mpga`, `mp4`, `webm` +**Supported Audio Formats:** `mp3`, `wav`, `opus`, `flac`, `pcm`, `aac`, `m4a`, `mpeg`, `mpga`, `mp4`, `webm` Gemini Docs: * [Document Processing](https://ai.google.dev/gemini-api/docs/document-processing?lang=python) @@ -392,7 +293,7 @@ const chatCompletion = await portkey.chat.completions.create({ } ]} ], - model: 'gemini-1.5-pro-001', + model: '@vertex-ai/gemini-3-pro-preview', max_tokens: 200 }); ``` @@ -420,7 +321,7 @@ completion = portkey.chat.completions.create( ] } ], - model='gemini-1.5-pro-001', + model='@vertex-ai/gemini-3-pro-preview', max_tokens=200 ) @@ -429,13 +330,10 @@ print(completion) ```sh cURL curl --location 'https://api.portkey.ai/v1/chat/completions' \ ---header 'x-portkey-provider: vertex-ai' \ ---header 'x-portkey-vertex-region: us-central1' \ --header 'Content-Type: application/json' \ --header 'x-portkey-api-key: PORTKEY_API_KEY' \ ---header 'Authorization: VERTEX_AI_ACCESS_TOKEN' \ --data '{ - "model": "gemini-1.5-pro-001", + "model": "@vertex-ai/gemini-3-pro-preview", "max_tokens": 200, "stream": false, "messages": [ @@ -480,7 +378,7 @@ Upload your PDF using the Files API to get a Google Files URL. ```javascript NodeJS const chatCompletion = await portkey.chat.completions.create({ - model: 'gemini-1.5-pro', + model: '@vertex-ai/gemini-3-pro-preview', messages: [{ role: 'user', content: [ @@ -498,7 +396,7 @@ console.log(chatCompletion.choices[0].message.content); ``` ```python Python completion = portkey.chat.completions.create( - model='gemini-1.5-pro', + model='@vertex-ai/gemini-3-pro-preview', messages=[{ "role": "user", "content": [ @@ -516,10 +414,8 @@ print(completion.choices[0].message.content) ``` ```sh cURL curl --location 'https://api.portkey.ai/v1/chat/completions' \ ---header 'x-portkey-provider: google' \ --header 'x-portkey-api-key: YOUR_PORTKEY_API_KEY' \ --header 'Content-Type: application/json' \ ---header 'Authorization: VERTEX_AI_ACCESS_TOKEN' \ --data '{ "model": "@VERTEX_PROVIDER/MODEL_NAME", "messages": [{ @@ -584,10 +480,8 @@ print(completion.choices[0].message.content) # Then use the encoded content in the request curl --location 'https://api.portkey.ai/v1/chat/completions' \ ---header 'x-portkey-provider: google' \ --header 'x-portkey-api-key: YOUR_PORTKEY_API_KEY' \ --header 'Content-Type: application/json' \ ---header 'Authorization: VERTEX_AI_ACCESS_TOKEN' \ --data '{ "model": "@VERTEX_PROVIDER/MODEL_NAME", "messages": [{ @@ -676,7 +570,7 @@ Note that you will have to set [`strict_open_ai_compliance=False`](/product/ai-g // Generate a chat completion async function getChatCompletionFunctions() { - const response = await portkey.chat.completions.create({ +const response = await portkey.chat.completions.create({ model: "@VERTEX_PROVIDER/anthropic.claude-3-7-sonnet@20250219", // your model slug from Portkey's Model Catalog max_tokens: 3000, thinking: { @@ -836,42 +730,6 @@ Note that you will have to set [`strict_open_ai_compliance=False`](/product/ai-g ``` -### Using reasoning_effort Parameter - -You can also control thinking using the OpenAI-compatible `reasoning_effort` parameter instead of `thinking.budget_tokens`: - -```python -response = portkey.chat.completions.create( - model="@VERTEX_PROVIDER/google.gemini-2.5-flash-preview-04-17", - max_tokens=3000, - reasoning_effort="medium", # Options: "none", "low", "medium", "high" - messages=[{"role": "user", "content": "Explain quantum computing"}] -) -``` - -#### Gemini 2.5 Models - -For Gemini 2.5 models, `reasoning_effort` maps to `thinking_budget` with specific token allocations: - -| reasoning_effort | thinking_budget (tokens) | -|------------------|--------------------------| -| `none` | Disabled | -| `low` | 1,024 | -| `medium` | 8,192 | -| `high` | 24,576 | - -#### Gemini 3.0+ Models - -For Gemini 3.0 and later models, `reasoning_effort` maps directly to `thinkingLevel`: - -| reasoning_effort | Vertex thinkingLevel | -|------------------|---------------------| -| `none` | Disabled | -| `minimal` | `minimal` | -| `low` | `low` | -| `medium` | `medium` | -| `high` | `high` | - ### Multi turn conversation @@ -932,7 +790,7 @@ For Gemini 3.0 and later models, `reasoning_effort` maps directly to `thinkingLe // Generate a chat completion async function getChatCompletionFunctions() { - const response = await portkey.chat.completions.create({ +const response = await portkey.chat.completions.create({ model: "@VERTEX_PROVIDER/anthropic.claude-3-7-sonnet@20250219", // your model slug from Portkey's Model Catalog max_tokens: 3000, thinking: { @@ -1153,7 +1011,7 @@ async function getEmbeddings() { model: "@VERTEX_PROVIDER/text-multilingual-embedding-002", // your model slug from Portkey's Model Catalog // @ts-ignore (if using typescript) task_type: "CLASSIFICATION", // Optional - }, {Authorization: "Bearer $YOUR_VERTEX_ACCESS_TOKEN"}); + }); console.log(embeddings); } @@ -1171,7 +1029,7 @@ portkey = Portkey( # Generate embeddings def get_embeddings(): - embeddings = portkey.with_options(Authorization="Bearer $YOUR_VERTEX_ACCESS_TOKEN").embeddings.create( + embeddings = portkey.embeddings.create( input='The vector representation for this text', model='@VERTEX_PROVIDER/text-embedding-004', # your model slug from Portkey's Model Catalog task_type="CLASSIFICATION" # Optional @@ -1218,10 +1076,9 @@ Portkey supports the `Imagen API` on Vertex AI for image generations, letting yo curl https://api.portkey.ai/v1/images/generations \ -H "Content-Type: application/json" \ -H "x-portkey-api-key: $PORTKEY_API_KEY" \ - -H "x-portkey-provider: $PORTKEY_PROVIDER" \ -d '{ "prompt": "Cat flying to mars from moon", - "model":"@your-model-slug" + "model":"@vertex-ai/imagen-3.0-generate-001" }' ``` ```py Python @@ -1233,7 +1090,7 @@ client = Portkey( client.images.generate( prompt = "Cat flying to mars from moon", - model = "@VERTEX_PROVIDER/imagen-3.0-generate-001" # your model slug from Portkey's Model Catalog + model = "@vertex-ai/imagen-3.0-generate-001" # your model slug from Portkey's Model Catalog ) ``` ```ts JavaScript @@ -1246,7 +1103,7 @@ const client = new Portkey({ async function main() { const image = await client.images.generate({ prompt: "Cat flying to mars from moon", - model: "@VERTEX_PROVIDER/imagen-3.0-generate-001" # your model slug from Portkey's Model Catalog + model: "@vertex-ai/imagen-3.0-generate-001" # your model slug from Portkey's Model Catalog }); console.log(image.data); @@ -1276,7 +1133,7 @@ Pass labels in your request body or configure them in your gateway config using ```python completion = portkey.chat.completions.create( messages=[{"role": "user", "content": "Say this is a test"}], - model="@VERTEX_PROVIDER/gemini-1.5-pro-latest", + model="@VERTEX_PROVIDER/gemini-3-pro-preview", labels={"service_id": "backend-api", "environment": "production"} ) ``` @@ -1285,7 +1142,7 @@ completion = portkey.chat.completions.create( ```javascript const completion = await portkey.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], - model: '@VERTEX_PROVIDER/gemini-1.5-pro-latest', + model: '@VERTEX_PROVIDER/gemini-3-pro-preview', labels: { service_id: "backend-api", environment: "production" } }); ``` @@ -1319,139 +1176,8 @@ Grounding is invoked by passing the `google_search` tool (for newer models like }] ``` -## Grounding with Google Maps - -Vertex AI supports grounding with Google Maps for location-based queries—places, directions, ratings, and geographic information. - -Pass the `google_maps` (or `googleMaps`) tool in the `tools` array: - - -```sh cURL -curl --location 'https://api.portkey.ai/v1/chat/completions' \ ---header 'x-portkey-api-key: YOUR_PORTKEY_API_KEY' \ ---header 'Content-Type: application/json' \ ---data '{ - "model": "@YOUR_VERTEX_PROVIDER/gemini-2.5-pro", - "messages": [{"role": "user", "content": "What are the best Italian restaurants near Times Square?"}], - "tools": [{"type": "function", "function": {"name": "google_maps"}}] -}' -``` -```python Python -from portkey_ai import Portkey - -portkey = Portkey(api_key="YOUR_PORTKEY_API_KEY") - -response = portkey.chat.completions.create( - model="@YOUR_VERTEX_PROVIDER/gemini-2.5-pro", - messages=[{"role": "user", "content": "What are the best Italian restaurants near Times Square?"}], - tools=[{"type": "function", "function": {"name": "google_maps"}}] -) - -print(response) -``` -```typescript NodeJS -import Portkey from 'portkey-ai'; - -const portkey = new Portkey({ apiKey: "YOUR_PORTKEY_API_KEY" }); - -const response = await portkey.chat.completions.create({ - model: "@YOUR_VERTEX_PROVIDER/gemini-2.5-pro", - messages: [{ role: "user", content: "What are the best Italian restaurants near Times Square?" }], - tools: [{ type: "function", function: { name: "google_maps" } }] -}); - -console.log(response); -``` - - -### With Retrieval Configuration - -Optionally configure location coordinates, language, and widget options by passing them inside the function `parameters`: - - -```sh cURL -curl --location 'https://api.portkey.ai/v1/chat/completions' \ ---header 'x-portkey-api-key: YOUR_PORTKEY_API_KEY' \ ---header 'Content-Type: application/json' \ ---data '{ - "model": "@YOUR_VERTEX_PROVIDER/gemini-2.5-pro", - "messages": [{"role": "user", "content": "What are the best coffee shops nearby?"}], - "tools": [{ - "type": "function", - "function": { - "name": "google_maps", - "parameters": { - "enableWidget": true, - "retrievalConfig": { - "latLng": {"latitude": 37.7749, "longitude": -122.4194}, - "languageCode": "en_US" - } - } - } - }] -}' -``` -```python Python -from portkey_ai import Portkey - -portkey = Portkey(api_key="YOUR_PORTKEY_API_KEY") - -response = portkey.chat.completions.create( - model="@YOUR_VERTEX_PROVIDER/gemini-2.5-pro", - messages=[{"role": "user", "content": "What are the best coffee shops nearby?"}], - tools=[{ - "type": "function", - "function": { - "name": "google_maps", - "parameters": { - "enableWidget": True, - "retrievalConfig": { - "latLng": {"latitude": 37.7749, "longitude": -122.4194}, - "languageCode": "en_US" - } - } - } - }] -) - -print(response) -``` -```typescript NodeJS -import Portkey from 'portkey-ai'; - -const portkey = new Portkey({ apiKey: "YOUR_PORTKEY_API_KEY" }); - -const response = await portkey.chat.completions.create({ - model: "@YOUR_VERTEX_PROVIDER/gemini-2.5-pro", - messages: [{ role: "user", content: "What are the best coffee shops nearby?" }], - tools: [{ - type: "function", - function: { - name: "google_maps", - parameters: { - enableWidget: true, - retrievalConfig: { - latLng: { latitude: 37.7749, longitude: -122.4194 }, - languageCode: "en_US" - } - } - } - }] -}); - -console.log(response); -``` - - -| Parameter | Description | -|-----------|-------------| -| `enableWidget` | Return a token to enable the Google Maps widget (default: `false`) | -| `retrievalConfig.latLng.latitude` | Latitude (e.g., `37.7749` for San Francisco) | -| `retrievalConfig.latLng.longitude` | Longitude (e.g., `-122.4194` for San Francisco) | -| `retrievalConfig.languageCode` | Language code for results (e.g., `en_US`) | - -Mixing regular tools with grounding tools may cause errors—use only one tool type per request. +If you mix regular tools with grounding tools, vertex might throw an error saying only one tool can be used at a time. ## gemini-2.0-flash-thinking-exp and other thinking/reasoning models @@ -1488,10 +1214,6 @@ The image data is available in the `content_parts` field in the response and it max_tokens=32768, stream=False, modalities=["text", "image"], - image_config={ # optional - "aspect_ratio": "16:9", - "image_size": "2K" - } messages= [ { "role": "system", @@ -1537,15 +1259,11 @@ The image data is available in the `content_parts` field in the response and it // Generate a chat completion async function getChatCompletionFunctions() { - const response = await portkey.chat.completions.create({ +const response = await portkey.chat.completions.create({ model: "gemini-2.5-flash-image-preview", // your model slug from Portkey's Model Catalog max_tokens: 32768, stream: false, modalities: ["text", "image"], - image_config: { // optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages: [ { role: "system", @@ -1602,10 +1320,6 @@ The image data is available in the `content_parts` field in the response and it max_tokens: 32768, stream: false, modalities: ["text", "image"], - image_config: { // optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages: [ { role: "system", @@ -1651,10 +1365,6 @@ The image data is available in the `content_parts` field in the response and it max_tokens=32768, stream=False, modalities=["text", "image"], - image_config = { # optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages=[ { "role": "system", @@ -1690,10 +1400,6 @@ The image data is available in the `content_parts` field in the response and it "max_tokens": 32768, "stream": false, "modalities": ["text", "image"], - "image_config": { - "aspect_ratio": "16:9", - "image_size": "2K" - }, "messages": [ { "role": "system", @@ -2599,7 +2305,7 @@ from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders openai = OpenAI( api_key='PORTKEY_API_KEY', base_url=PORTKEY_GATEWAY_URL, - default_headers=createHeaders(provider='vertex-ai', strict_open_ai_compliance=False) + default_headers=createHeaders(provider='@vertex-ai', strict_open_ai_compliance=False) ) response = openai.chat.completions.create( @@ -2709,7 +2415,7 @@ from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders openai = OpenAI( api_key='PORTKEY_API_KEY', base_url=PORTKEY_GATEWAY_URL, - default_headers=createHeaders(provider='vertex-ai', strict_open_ai_compliance=False) + default_headers=createHeaders(provider='@vertex-ai', strict_open_ai_compliance=False) ) response = openai.chat.completions.create( @@ -2763,10 +2469,6 @@ curl --location 'https://api.portkey.ai/v1/chat/completions' \ max_tokens=32768, stream=False, modalities=["text", "image"], - image_config={ # optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages= [ { "role": "system", @@ -2832,15 +2534,11 @@ curl --location 'https://api.portkey.ai/v1/chat/completions' \ // Generate a chat completion async function getChatCompletionFunctions() { - const response = await portkey.chat.completions.create({ +const response = await portkey.chat.completions.create({ model: "gemini-2.5-flash-image-preview", // your model slug from Portkey's Model Catalog max_tokens: 32768, stream: false, modalities: ["text", "image"], - image_config: { // optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages: [ { role: "system", @@ -2917,10 +2615,6 @@ curl --location 'https://api.portkey.ai/v1/chat/completions' \ max_tokens: 32768, stream: false, modalities: ["text", "image"], - image_config: { // optional - "aspect_ratio": "16:9", - "image_size": "2K" - }, messages: [ { role: "system", @@ -2985,10 +2679,6 @@ curl --location 'https://api.portkey.ai/v1/chat/completions' \ max_tokens=32768, stream=False, modalities=["text", "image"], - image_config={ # optional - aspect_ratio: "16:9", - image_size: "2K" - }, messages=[ { "role": "system", @@ -3043,10 +2733,6 @@ curl --location 'https://api.portkey.ai/v1/chat/completions' \ "max_tokens": 32768, "stream": false, "modalities": ["text", "image"], - "image_config": { - "aspect_ratio": "16:9", - "image_size": "2K" - }, "messages": [ { "role": "system", @@ -3092,222 +2778,6 @@ curl --location 'https://api.portkey.ai/v1/chat/completions' \ ``` -## Safety settings -Gemini models support [configuring safety settings](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-filters#unsafe_prompts) to block potentially harmful responses - - - ```ts TypeScript -import Portkey from 'portkey-ai'; - -const portkey = new Portkey({ - apiKey: "PORTKEY_API_KEY", -}); - -async function main() { - const response = await portkey.chat.completions.create({ - model: "@my-vertex-provider/gemini-2.5-flash", - temperature: 0, - stream: false, - messages: [ - { - role: "user", - content: "Speak explicitly like a gangster harrasing someone for money" - } - ], - safety_settings: [ - { - category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", - threshold: "BLOCK_LOW_AND_ABOVE" - }, - { - category: "HARM_CATEGORY_HATE_SPEECH", - threshold: "BLOCK_LOW_AND_ABOVE" - }, - { - category: "HARM_CATEGORY_HARASSMENT", - threshold: "BLOCK_LOW_AND_ABOVE" - }, - { - category: "HARM_CATEGORY_DANGEROUS_CONTENT", - threshold: "BLOCK_LOW_AND_ABOVE" - } - ] - }); - - console.log(response.choices); -} - -main(); - ``` - ```py Python -from portkey_ai import Portkey - -portkey = Portkey( - api_key="PORTKEY_API_KEY", -) - -response = portkey.chat.completions.create( - model="@my-vertex-provider/gemini-2.5-flash", - temperature=0, - stream=False, - messages=[ - { - "role": "user", - "content": "Speak explicitly like a gangster harrasing someone for money" - } - ], - safety_settings=[ - { - "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_HATE_SPEECH", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_HARASSMENT", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_DANGEROUS_CONTENT", - "threshold": "BLOCK_LOW_AND_ABOVE" - } - ] -) - -print(response.choices) - ``` - ```js OpenAI NodeJS -import OpenAI from "openai"; - -const openai = new OpenAI({ - apiKey: process.env.PORTKEY_API_KEY, // Portkey API Key - baseURL: "https://api.portkey.ai/v1", - defaultHeaders: { - "x-portkey-strict-open-ai-compliance": "false", - }, -}); - -async function main() { - const response = await openai.chat.completions.create({ - model: "@my-vertex-provider/gemini-2.5-flash", - temperature: 0, - stream: false, - messages: [ - { - role: "user", - content: "Speak explicitly like a gangster harrasing someone for money" - } - ], - safety_settings: [ - { - category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", - threshold: "BLOCK_LOW_AND_ABOVE" - }, - { - category: "HARM_CATEGORY_HATE_SPEECH", - threshold: "BLOCK_LOW_AND_ABOVE" - }, - { - category: "HARM_CATEGORY_HARASSMENT", - threshold: "BLOCK_LOW_AND_ABOVE" - }, - { - category: "HARM_CATEGORY_DANGEROUS_CONTENT", - threshold: "BLOCK_LOW_AND_ABOVE" - } - ] - }); - - console.log(response.choices); -} - -main(); - ``` - ```py OpenAI Python -import openai - -client = openai.OpenAI( - api_key="PORTKEY_API_KEY", - base_url="https://api.portkey.ai/v1", - default_headers={ - "x-portkey-strict-open-ai-compliance": "false" - } -) - -response = client.chat.completions.create( - model="@my-vertex-provider/gemini-2.5-flash", - temperature=0, - stream=False, - messages=[ - { - "role": "user", - "content": "Speak explicitly like a gangster harrasing someone for money" - } - ], - safety_settings=[ - { - "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_HATE_SPEECH", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_HARASSMENT", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_DANGEROUS_CONTENT", - "threshold": "BLOCK_LOW_AND_ABOVE" - } - ] -) - -print(response.choices) - ``` - ```sh cURL -curl --location 'https://api.portkey.ai/v1/chat/completions' \ ---header 'Content-Type: application/json' \ ---header 'x-portkey-api-key: $PORTKEY_API_KEY' \ ---header 'x-portkey-strict-open-ai-compliance: false' \ ---data '{ - "model": "@my-vertex-provider/gemini-2.5-flash", - "temperature": 0, - "stream": false, - "messages": [ - { - "role": "user", - "content": "Speak explicitly like a gangster harrasing someone for money" - } - ], - "safety_settings": [ - { - "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_HATE_SPEECH", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_HARASSMENT", - "threshold": "BLOCK_LOW_AND_ABOVE" - }, - { - "category": "HARM_CATEGORY_DANGEROUS_CONTENT", - "threshold": "BLOCK_LOW_AND_ABOVE" - } - ] -}' - ``` - - -The response contains the `saftetyRatings` keys in the choice object of the response. -You only need to pass the `strict-open-ai-compliance` flag if you need the `safetyRatings` in response. - ## Making Requests Without Portkey's Model Catalog You can also pass your Vertex AI details & secrets directly without using the Portkey's Model Catalog. @@ -3335,7 +2805,7 @@ Vertex AI expects a `region`, a `project ID` and the `access token` in the reque const chatCompletion = await portkey.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'gemini-pro', + model: 'gemini-3-pro-preview', }); console.log(chatCompletion.choices); @@ -3355,7 +2825,7 @@ portkey = Portkey( completion = portkey.chat.completions.create( messages= [{ "role": 'user', "content": 'Say this is a test' }], - model= 'gemini-1.5-pro-latest' + model= 'gemini-3-pro-preview' ) print(completion) @@ -3381,7 +2851,7 @@ const portkey = new OpenAI({ async function main() { const response = await portkey.chat.completions.create({ messages: [{ role: "user", content: "1729" }], - model: "gemini-1.5-flash-001", + model: "gemini-3-pro-preview", max_tokens: 32, }); @@ -3401,7 +2871,7 @@ curl 'https://api.portkey.ai/v1/chat/completions' \ -H 'x-portkey-vertex-project-id: sample-94994' \ -H 'x-portkey-vertex-region: us-central1' \ --data '{ - "model": "gemini-1.5-pro", + "model": "gemini-3-pro-preview", "messages": [ { "role": "system", @@ -3466,14 +2936,26 @@ This method is simpler but may not have all the permissions needed for custom en ## Next Steps -The complete list of features supported in the SDK are available on the link below. + + + +Complete SDK documentation and API reference + - + +Add metadata to your Vertex AI requests -You'll find more information in the relevant sections: + +Configure advanced gateway features + + + +Trace and monitor your Vertex AI requests + + + +Create fallback configurations between providers + -1. [Add metadata to your requests](/product/observability/metadata) -2. [Add gateway configs to your Vertex AI requests](/product/ai-gateway/configs) -3. [Tracing Vertex AI requests](/product/observability/traces) -4. [Setup a fallback from OpenAI to Vertex AI APIs](/product/ai-gateway/fallbacks) + diff --git a/integrations/llms/vertex-ai/batches.mdx b/integrations/llms/vertex-ai/batches.mdx index 16192676..570e9a8d 100644 --- a/integrations/llms/vertex-ai/batches.mdx +++ b/integrations/llms/vertex-ai/batches.mdx @@ -9,7 +9,7 @@ With Portkey, you can perform batch inference operations with Vertex AI models. - Perform A/B testing with different foundation models ## Before You Start -1. **Portkey API key** and a **Vertex virtual key** in your workspace. +1. **Portkey API key** and a **Vertex AI provider** configured in Model Catalog. 2. A **GCS bucket** in the same region as your model + `aiplatform-service-agent` permission on the file. 3. *(Only for Portkey-native batching)* A **Portkey File** (`input_file_id`). 4. Familiarity with the [Create Batch OpenAPI spec](/api-reference/inference-api/batch/create-batch). @@ -126,7 +126,7 @@ const openai = new OpenAI({ ```bash curl curl -X POST --header 'x-portkey-api-key: ' \ - --header 'x-portkey-provider: ' \ + --header 'x-portkey-provider: @your-vertex-provider' \ --header 'x-portkey-vertex-storage-bucket-name: ' \ --header 'x-portkey-provider-file-name: .jsonl' \ --header 'x-portkey-provider-model: ' \ @@ -234,7 +234,7 @@ const openai = new OpenAI({ ```bash curl curl -X POST --header 'Content-Type: application/json' \ --header 'x-portkey-api-key: ' \ - --header 'x-portkey-provider: ' \ + --header 'x-portkey-provider: @your-vertex-provider' \ --data \ $'{"input_file_id": "", "endpoint": "/v1/chat/completions", "completion_window": "24h", "model":"gemini-1.5-flash-001"}' \ 'https://api.portkey.ai/v1/batches' @@ -328,7 +328,7 @@ const openai = new OpenAI({ ```bash curl curl -X GET --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/batches' ``` @@ -418,7 +418,7 @@ const openai = new OpenAI({ ```bash curl curl -X GET --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/batches/' ``` @@ -428,7 +428,7 @@ curl -X GET --header 'x-portkey-api-key: ' \ ```bash curl curl -X GET --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/batches//output' ``` \ No newline at end of file diff --git a/integrations/llms/vertex-ai/files.mdx b/integrations/llms/vertex-ai/files.mdx index cf7ebf7a..68782414 100644 --- a/integrations/llms/vertex-ai/files.mdx +++ b/integrations/llms/vertex-ai/files.mdx @@ -58,7 +58,7 @@ uploadFile(); ```sh curl -X POST --header 'x-portkey-api-key: ' \ - --header 'x-portkey-provider: ' \ + --header 'x-portkey-provider: @your-vertex-provider' \ --header 'x-portkey-vertex-storage-bucket-name: ' \ --header 'x-portkey-provider-file-name: .jsonl' \ --header 'x-portkey-provider-model: ' \ @@ -165,7 +165,7 @@ getFile(); ```sh curl -X GET --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/files/' ``` @@ -254,7 +254,7 @@ getFileContent(); ```sh curl -X GET --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/files//content' ``` diff --git a/integrations/llms/vertex-ai/fine-tuning.mdx b/integrations/llms/vertex-ai/fine-tuning.mdx index 494e10d5..66d79039 100644 --- a/integrations/llms/vertex-ai/fine-tuning.mdx +++ b/integrations/llms/vertex-ai/fine-tuning.mdx @@ -111,7 +111,7 @@ const openai = new OpenAI({ ```sh curl -X POST --header 'x-portkey-api-key: ' \ - --header 'x-portkey-provider: ' \ + --header 'x-portkey-provider: @your-vertex-provider' \ --header 'x-portkey-vertex-storage-bucket-name: ' \ --header 'x-portkey-provider-file-name: .jsonl' \ --header 'x-portkey-provider-model: ' \ @@ -233,7 +233,7 @@ const openai = new OpenAI({ ```sh curl -X POST --header 'Content-Type: application/json' \ --header 'x-portkey-api-key: ' \ - --header 'x-portkey-provider: ' \ + --header 'x-portkey-provider: @your-vertex-provider' \ --data \ $'{"model": "", "suffix": "", "training_file": "gs:///.jsonl", "hyperparameters": {"n_epochs": 2}}\n' \ 'https://api.portkey.ai/v1/fine_tuning/jobs' @@ -331,7 +331,7 @@ const openai = new OpenAI({ ```sh curl -X GET --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/fine_tuning/jobs' ``` @@ -427,7 +427,7 @@ const openai = new OpenAI({ ```sh curl -X GET --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/fine_tuning/jobs/' ``` @@ -523,7 +523,7 @@ const openai = new OpenAI({ ```sh curl -X POST --header 'x-portkey-api-key: ' \ ---header 'x-portkey-provider: ' \ +--header 'x-portkey-provider: @your-vertex-provider' \ 'https://api.portkey.ai/v1/fine_tuning/jobs//cancel' ```