Method: projects.locations.endpoints.countTokens

Perform a token counting.

Endpoint

post https://{service-endpoint}/v1/{endpoint}:countTokens

Where {service-endpoint} is one of the supported service endpoints.

Path parameters

endpoint string

Required. The name of the Endpoint requested to perform token counting. Format: projects/{project}/locations/{location}/endpoints/{endpoint}

Request body

The request body contains data with the following structure:

Fields
model string

Optional. The name of the publisher model requested to serve the prediction. Format: projects/{project}/locations/{location}/publishers/*/models/*

instances[] value (Value format)

Optional. The instances that are the input to token counting call. Schema is identical to the prediction schema of the underlying model.

contents[] object (Content)

Optional. Input content.

tools[] object (Tool)

Optional. A list of Tools the model may use to generate the next response.

A Tool is a piece of code that enables the system to interact with external systems to perform an action, or set of actions, outside of knowledge and scope of the model.

systemInstruction object (Content)

Optional. The user provided system instructions for the model. Note: only text should be used in parts and content in each part will be in a separate paragraph.

Example request

Text

C#


using Google.Cloud.AIPlatform.V1;
using System;
using System.Threading.Tasks;

public class GetTokenCount
{
    public async Task<int> CountTokens(
        string projectId = "your-project-id",
        string location = "us-central1",
        string publisher = "google",
        string model = "gemini-1.5-flash-001"
    )
    {
        var client = new LlmUtilityServiceClientBuilder
        {
            Endpoint = $"{location}-aiplatform.googleapis.com"
        }.Build();

        var request = new CountTokensRequest
        {
            Endpoint = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
            Model = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}",
            Contents =
            {
                new Content
                {
                    Role = "USER",
                    Parts = { new Part { Text = "Why is the sky blue?" } }
                }
            }
        };

        var response = await client.CountTokensAsync(request);
        int tokenCount = response.TotalTokens;
        Console.WriteLine($"There are {tokenCount} tokens in the prompt.");
        return tokenCount;
    }
}

Go

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/vertexai/genai"
)

// countTokens returns the number of tokens for this prompt.
func countTokens(w io.Writer, prompt, projectID, location, modelName string) error {
	// prompt := "why is the sky blue?"
	// location := "us-central1"
	// modelName := "gemini-1.5-flash-001"

	ctx := context.Background()

	client, err := genai.NewClient(ctx, projectID, location)
	if err != nil {
		return fmt.Errorf("unable to create client: %w", err)
	}
	defer client.Close()

	model := client.GenerativeModel(modelName)

	resp, err := model.CountTokens(ctx, genai.Text(prompt))
	if err != nil {
		return err
	}

	fmt.Fprintf(w, "Number of tokens for the prompt: %d\n", resp.TotalTokens)

	return nil
}

Java

import com.google.cloud.vertexai.VertexAI;
import com.google.cloud.vertexai.api.CountTokensResponse;
import com.google.cloud.vertexai.api.GenerateContentResponse;
import com.google.cloud.vertexai.generativeai.GenerativeModel;
import java.io.IOException;

public class GetTokenCount {
  public static void main(String[] args) throws IOException {
    // TODO(developer): Replace these variables before running the sample.
    String projectId = "your-google-cloud-project-id";
    String location = "us-central1";
    String modelName = "gemini-1.5-flash-001";

    getTokenCount(projectId, location, modelName);
  }

  // Gets the number of tokens for the prompt and the model's response.
  public static int getTokenCount(String projectId, String location, String modelName)
      throws IOException {
    // Initialize client that will be used to send requests.
    // This client only needs to be created once, and can be reused for multiple requests.
    try (VertexAI vertexAI = new VertexAI(projectId, location)) {
      GenerativeModel model = new GenerativeModel(modelName, vertexAI);

      String textPrompt = "Why is the sky blue?";
      CountTokensResponse response = model.countTokens(textPrompt);

      int promptTokenCount = response.getTotalTokens();
      int promptCharCount = response.getTotalBillableCharacters();

      System.out.println("Prompt token Count: " + promptTokenCount);
      System.out.println("Prompt billable character count: " + promptCharCount);

      GenerateContentResponse contentResponse = model.generateContent(textPrompt);

      int tokenCount = contentResponse.getUsageMetadata().getPromptTokenCount();
      int candidateTokenCount = contentResponse.getUsageMetadata().getCandidatesTokenCount();
      int totalTokenCount = contentResponse.getUsageMetadata().getTotalTokenCount();

      System.out.println("Prompt token Count: " + tokenCount);
      System.out.println("Candidate Token Count: " + candidateTokenCount);
      System.out.println("Total token Count: " + totalTokenCount);

      return promptTokenCount;
    }
  }
}

Node.js

const {VertexAI} = require('@google-cloud/vertexai');

/**
 * TODO(developer): Update these variables before running the sample.
 */
async function countTokens(
  projectId = 'PROJECT_ID',
  location = 'us-central1',
  model = 'gemini-1.5-flash-001'
) {
  // Initialize Vertex with your Cloud project and location
  const vertexAI = new VertexAI({project: projectId, location: location});

  // Instantiate the model
  const generativeModel = vertexAI.getGenerativeModel({
    model: model,
  });

  const req = {
    contents: [{role: 'user', parts: [{text: 'How are you doing today?'}]}],
  };

  const countTokensResp = await generativeModel.countTokens(req);
  console.log('count tokens response: ', countTokensResp);
}

Python

import vertexai
from vertexai.generative_models import GenerativeModel

# TODO (developer): update project_id
vertexai.init(project=PROJECT_ID, location="us-central1")

model = GenerativeModel("gemini-1.5-flash-001")

prompt = "Why is the sky blue?"
# Prompt tokens count
response = model.count_tokens(prompt)
print(f"Prompt Token Count: {response.total_tokens}")
print(f"Prompt Character Count: {response.total_billable_characters}")

# Send text to Gemini
response = model.generate_content(prompt)

# Response tokens count
usage_metadata = response.usage_metadata
print(f"Prompt Token Count: {usage_metadata.prompt_token_count}")
print(f"Candidates Token Count: {usage_metadata.candidates_token_count}")
print(f"Total Token Count: {usage_metadata.total_token_count}")

Multi-modal

Go

import (
	"context"
	"fmt"
	"io"
	"mime"
	"path/filepath"

	"cloud.google.com/go/vertexai/genai"
)

// countTokensMultimodal finds the number of tokens for a multimodal prompt (video+text), and writes to w. Then,
// it calls the model with the multimodal prompt and writes token counts from the response metadata to w.
//
// video is a Google Cloud Storage path starting with "gs://"
func countTokensMultimodal(w io.Writer, projectID, location, modelName string) error {
	// location := "us-central1"
	// modelName := "gemini-1.5-flash-001"
	prompt := "Provide a description of the video."
	video := "gs://cloud-samples-data/generative-ai/video/pixel8.mp4"

	ctx := context.Background()

	client, err := genai.NewClient(ctx, projectID, location)
	if err != nil {
		return fmt.Errorf("unable to create client: %w", err)
	}
	defer client.Close()

	model := client.GenerativeModel(modelName)

	part1 := genai.Text(prompt)

	// Given a video file URL, prepare video file as genai.Part
	part2 := genai.FileData{
		MIMEType: mime.TypeByExtension(filepath.Ext(video)),
		FileURI:  video,
	}

	// Finds the total number of tokens for the 2 parts (text, video) of the multimodal prompt,
	// before actually calling the model for inference.
	resp, err := model.CountTokens(ctx, part1, part2)
	if err != nil {
		return err
	}

	fmt.Fprintf(w, "Number of tokens for the multimodal video prompt: %d\n", resp.TotalTokens)

	res, err := model.GenerateContent(ctx, part1, part2)
	if err != nil {
		return fmt.Errorf("unable to generate contents: %w", err)
	}

	// The token counts are also provided in the model response metadata, after inference.
	fmt.Fprintln(w, "\nModel response")
	md := res.UsageMetadata
	fmt.Fprintf(w, "Prompt Token Count: %d\n", md.PromptTokenCount)
	fmt.Fprintf(w, "Candidates Token Count: %d\n", md.CandidatesTokenCount)
	fmt.Fprintf(w, "Total Token Count: %d\n", md.TotalTokenCount)

	return nil
}

Python

import vertexai
from vertexai.generative_models import GenerativeModel, Part

# TODO (developer): update project_id
vertexai.init(project=PROJECT_ID, location="us-central1")

model = GenerativeModel("gemini-1.5-flash-001")

contents = [
    Part.from_uri(
        "gs://cloud-samples-data/generative-ai/video/pixel8.mp4",
        mime_type="video/mp4",
    ),
    "Provide a description of the video.",
]

# Prompt tokens count
response = model.count_tokens(contents)
print(f"Prompt Token Count: {response.total_tokens}")
print(f"Prompt Character Count: {response.total_billable_characters}")

# Send text to Gemini
response = model.generate_content(contents)
usage_metadata = response.usage_metadata

# Response tokens count
print(f"Prompt Token Count: {usage_metadata.prompt_token_count}")
print(f"Candidates Token Count: {usage_metadata.candidates_token_count}")
print(f"Total Token Count: {usage_metadata.total_token_count}")

Response body

If successful, the response body contains an instance of CountTokensResponse.