ONNX Runtime generate() C API

Note: this API is in preview and is subject to change.

Overview

This document describes the C API for ONNX Runtime GenAI.
Below are the main functions and types, with code snippets and descriptions for each.


Model API

OgaCreateModel

Creates a model from the given directory. The directory should contain a file called genai_config.json, which corresponds to the configuration specification.

OgaModel* model = NULL; OgaResult* result = OgaCreateModel("path/to/model_dir", &model); 

OgaDestroyModel

Destroys the given model.

OgaDestroyModel(model); 

OgaCreateModelWithRuntimeSettings

Creates a model with runtime settings.

OgaRuntimeSettings* settings = NULL; OgaCreateRuntimeSettings(&settings); // ... configure settings ... OgaModel* model = NULL; OgaResult* result = OgaCreateModelWithRuntimeSettings("path/to/model_dir", settings, &model); 

OgaCreateModelFromConfig

Creates a model from a config object.

OgaConfig* config = NULL; OgaCreateConfig("path/to/model_dir", &config); OgaModel* model = NULL; OgaResult* result = OgaCreateModelFromConfig(config, &model); 

OgaModelGetType

Gets the type of the model.

const char* type = NULL; OgaModelGetType(model, &type); 

OgaModelGetDeviceType

Gets the device type used by the model.

const char* device_type = NULL; OgaModelGetDeviceType(model, &device_type); 

Config API

OgaCreateConfig

Creates a configuration object from a config path.

OgaConfig* config = NULL; OgaResult* result = OgaCreateConfig("path/to/model_dir", &config); 

OgaConfigClearProviders

Clears all providers from the configuration.

OgaConfigClearProviders(config); 

OgaConfigAppendProvider

Appends a provider to the configuration.

OgaConfigAppendProvider(config, "CUDAExecutionProvider"); 

OgaConfigSetProviderOption

Sets a provider option in the configuration.

OgaConfigSetProviderOption(config, "CUDAExecutionProvider", "device_id", "0"); 

OgaConfigOverlay

Overlays a JSON string onto the configuration.

OgaConfigOverlay(config, "{\"option\": \"value\"}"); 

OgaDestroyConfig

Destroys the configuration object.

OgaDestroyConfig(config); 

Runtime Settings API

OgaCreateRuntimeSettings

Creates a runtime settings object.

OgaRuntimeSettings* settings = NULL; OgaCreateRuntimeSettings(&settings); 

OgaRuntimeSettingsSetHandle

Sets a named handle in the runtime settings.

OgaRuntimeSettingsSetHandle(settings, "custom_handle", handle_ptr); 

OgaDestroyRuntimeSettings

Destroys the runtime settings object.

OgaDestroyRuntimeSettings(settings); 

Tokenizer API

OgaCreateTokenizer

Creates a tokenizer for the given model.

OgaTokenizer* tokenizer = NULL; OgaResult* result = OgaCreateTokenizer(model, &tokenizer); 

OgaDestroyTokenizer

Destroys the tokenizer.

OgaDestroyTokenizer(tokenizer); 

OgaTokenizerEncode

Encodes a single string and adds the encoded sequence of tokens to the OgaSequences.

OgaSequences* sequences = NULL; OgaCreateSequences(&sequences); OgaTokenizerEncode(tokenizer, "Hello world", sequences); 

OgaTokenizerEncodeBatch

Encodes a batch of strings.

const char* texts[] = {"Hello", "World"}; OgaTensor* tensor = NULL; OgaTokenizerEncodeBatch(tokenizer, texts, 2, &tensor); 

OgaTokenizerToTokenId

Converts a string to its corresponding token ID.

int32_t token_id = 0; OgaTokenizerToTokenId(tokenizer, "Hello", &token_id); 

OgaTokenizerDecode

Decodes a sequence of tokens into a string.

const char* out_string = NULL; OgaTokenizerDecode(tokenizer, tokens, token_count, &out_string); // Use out_string, then: OgaDestroyString(out_string); 

OgaTokenizerApplyChatTemplate

Applies a chat template to messages and tools.

const char* result = NULL; OgaTokenizerApplyChatTemplate(tokenizer, "template", "messages", "tools", true, &result); OgaDestroyString(result); 

OgaTokenizerDecodeBatch

Decodes a batch of token sequences.

OgaStringArray* out_strings = NULL; OgaTokenizerDecodeBatch(tokenizer, tensor, &out_strings); // Use out_strings, then: OgaDestroyStringArray(out_strings); 

OgaCreateTokenizerStream

Creates a tokenizer stream for incremental decoding.

OgaTokenizerStream* stream = NULL; OgaCreateTokenizerStream(tokenizer, &stream); 

OgaDestroyTokenizerStream

Destroys the tokenizer stream.

OgaDestroyTokenizerStream(stream); 

OgaTokenizerStreamDecode

Decodes a single token in the stream.

const char* chunk = NULL; OgaTokenizerStreamDecode(stream, token, &chunk); // chunk is valid until next call or stream is destroyed 

Sequences API

OgaCreateSequences

Creates an empty OgaSequences object.

OgaSequences* sequences = NULL; OgaCreateSequences(&sequences); 

OgaDestroySequences

Destroys the given OgaSequences.

OgaDestroySequences(sequences); 

OgaSequencesCount

Returns the number of sequences.

size_t count = OgaSequencesCount(sequences); 

OgaSequencesGetSequenceCount

Returns the number of tokens in the sequence at the given index.

size_t token_count = OgaSequencesGetSequenceCount(sequences, 0); 

OgaSequencesGetSequenceData

Returns a pointer to the token data for the sequence at the given index.

const int32_t* data = OgaSequencesGetSequenceData(sequences, 0); 

Generator Params API

OgaCreateGeneratorParams

Creates generator parameters for the given model.

OgaGeneratorParams* params = NULL; OgaCreateGeneratorParams(model, &params); 

OgaDestroyGeneratorParams

Destroys the given generator params.

OgaDestroyGeneratorParams(params); 

OgaGeneratorParamsSetSearchNumber

Sets a numeric search option.

OgaGeneratorParamsSetSearchNumber(params, "max_length", 128); 

OgaGeneratorParamsSetSearchBool

Sets a boolean search option.

OgaGeneratorParamsSetSearchBool(params, "do_sample", true); 

OgaGeneratorParamsTryGraphCaptureWithMaxBatchSize

Attempts to enable graph capture mode with a maximum batch size.

OgaGeneratorParamsTryGraphCaptureWithMaxBatchSize(params, 8); 

OgaGeneratorParamsSetInputIDs

Sets the input ids for the generator params.

OgaGeneratorParamsSetInputIDs(params, input_ids, input_ids_count, sequence_length, batch_size); 

OgaGeneratorParamsSetInputSequences

Sets the input id sequences for the generator params.

OgaGeneratorParamsSetInputSequences(params, sequences); 

OgaGeneratorParamsSetModelInput

Sets an additional model input.

OgaGeneratorParamsSetModelInput(params, "input_name", tensor); 

OgaGeneratorParamsSetInputs

Sets named tensors as inputs.

OgaGeneratorParamsSetInputs(params, named_tensors); 

OgaGeneratorParamsSetGuidance

Sets guidance data.

OgaGeneratorParamsSetGuidance(params, "type", "data"); 

Generator API

OgaCreateGenerator

Creates a generator from the given model and generator params.

OgaGenerator* generator = NULL; OgaCreateGenerator(model, params, &generator); 

OgaDestroyGenerator

Destroys the given generator.

OgaDestroyGenerator(generator); 

OgaGenerator_IsDone

Checks if generation is complete.

bool done = OgaGenerator_IsDone(generator); 

OgaGenerator_AppendTokenSequences

Appends token sequences to the generator.

OgaGenerator_AppendTokenSequences(generator, sequences); 

OgaGenerator_AppendTokens

Appends tokens to the generator.

OgaGenerator_AppendTokens(generator, input_ids, input_ids_count); 

OgaGenerator_IsSessionTerminated

Checks if the session is terminated.

bool terminated = OgaGenerator_IsSessionTerminated(generator); 

OgaGenerator_GenerateNextToken

Generates the next token.

OgaGenerator_GenerateNextToken(generator); 

OgaGenerator_RewindTo

Rewinds the sequence to a new length.

OgaGenerator_RewindTo(generator, new_length); 

OgaGenerator_SetRuntimeOption

Sets a runtime option.

OgaGenerator_SetRuntimeOption(generator, "terminate_session", "1"); 

OgaGenerator_GetSequenceCount

Returns the number of tokens in the sequence at the given index.

size_t count = OgaGenerator_GetSequenceCount(generator, 0); 

OgaGenerator_GetSequenceData

Returns a pointer to the sequence data at the given index.

const int32_t* data = OgaGenerator_GetSequenceData(generator, 0); 

OgaGenerator_GetOutput

Gets a named output tensor.

OgaTensor* tensor = NULL; OgaGenerator_GetOutput(generator, "output_name", &tensor); 

OgaGenerator_GetLogits

Gets the logits tensor.

OgaTensor* logits = NULL; OgaGenerator_GetLogits(generator, &logits); 

OgaGenerator_SetLogits

Sets the logits tensor.

OgaGenerator_SetLogits(generator, tensor); 

OgaSetActiveAdapter

Sets the active adapter for the generator.

OgaSetActiveAdapter(generator, adapters, "adapter_name"); 

Adapter API

OgaCreateAdapters

Creates the object that manages the adapters.

OgaAdapters* adapters = NULL; OgaCreateAdapters(model, &adapters); 

OgaLoadAdapter

Loads the model adapter from the given adapter file path and adapter name.

OgaLoadAdapter(adapters, "adapter_file_path", "adapter_name"); 

OgaUnloadAdapter

Unloads the adapter with the given identifier.

OgaUnloadAdapter(adapters, "adapter_name"); 

Tensor API

OgaCreateTensorFromBuffer

Creates a tensor from a buffer.

OgaTensor* tensor = NULL; OgaCreateTensorFromBuffer(data, shape_dims, shape_dims_count, element_type, &tensor); 

OgaTensorGetType

Returns the element type of the tensor.

OgaElementType type; OgaTensorGetType(tensor, &type); 

OgaTensorGetShapeRank

Returns the rank (number of dimensions) of the tensor.

size_t rank; OgaTensorGetShapeRank(tensor, &rank); 

OgaTensorGetShape

Returns the shape of the tensor.

int64_t shape[rank]; OgaTensorGetShape(tensor, shape, rank); 

OgaTensorGetData

Returns a pointer to the tensor data.

void* data = NULL; OgaTensorGetData(tensor, &data); 

OgaDestroyTensor

Destroys the tensor.

OgaDestroyTensor(tensor); 

Images and Audios API

OgaLoadImages

Loads images from file paths.

OgaStringArray* image_paths = NULL; OgaCreateStringArrayFromStrings(paths, count, &image_paths); OgaImages* images = NULL; OgaLoadImages(image_paths, &images); OgaDestroyStringArray(image_paths); 

OgaLoadImagesFromBuffers

Loads images from memory buffers.

OgaImages* images = NULL; OgaLoadImagesFromBuffers(image_data, image_sizes, count, &images); 

OgaDestroyImages

Destroys the images object.

OgaDestroyImages(images); 

OgaLoadAudios

Loads audios from file paths.

OgaStringArray* audio_paths = NULL; OgaCreateStringArrayFromStrings(paths, count, &audio_paths); OgaAudios* audios = NULL; OgaLoadAudios(audio_paths, &audios); OgaDestroyStringArray(audio_paths); 

OgaLoadAudiosFromBuffers

Loads audios from memory buffers.

OgaAudios* audios = NULL; OgaLoadAudiosFromBuffers(audio_data, audio_sizes, count, &audios); 

OgaDestroyAudios

Destroys the audios object.

OgaDestroyAudios(audios); 

Named Tensors API

OgaCreateNamedTensors

Creates a named tensors object.

OgaNamedTensors* named_tensors = NULL; OgaCreateNamedTensors(&named_tensors); 

OgaNamedTensorsGet

Gets a tensor by name.

OgaTensor* tensor = NULL; OgaNamedTensorsGet(named_tensors, "input_name", &tensor); 

OgaNamedTensorsSet

Sets a tensor by name.

OgaNamedTensorsSet(named_tensors, "input_name", tensor); 

OgaNamedTensorsDelete

Deletes a tensor by name.

OgaNamedTensorsDelete(named_tensors, "input_name"); 

OgaNamedTensorsCount

Returns the number of named tensors.

size_t count = 0; OgaNamedTensorsCount(named_tensors, &count); 

OgaNamedTensorsGetNames

Gets the names of all tensors.

OgaStringArray* names = NULL; OgaNamedTensorsGetNames(named_tensors, &names); OgaDestroyStringArray(names); 

OgaDestroyNamedTensors

Destroys the named tensors object.

OgaDestroyNamedTensors(named_tensors); 

Utility Functions

OgaSetLogBool

Sets a boolean logging option.

OgaSetLogBool("option_name", true); 

OgaSetLogString

Sets a string logging option.

OgaSetLogString("option_name", "value"); 

OgaSetCurrentGpuDeviceId

Sets the current GPU device ID.

OgaSetCurrentGpuDeviceId(0); 

OgaGetCurrentGpuDeviceId

Gets the current GPU device ID.

int device_id = 0; OgaGetCurrentGpuDeviceId(&device_id); 

OgaResultGetError

Gets the error message from an OgaResult.

const char* error = OgaResultGetError(result); 

OgaDestroyResult

Destroys an OgaResult.

OgaDestroyResult(result); 

OgaDestroyString

Destroys a string returned by the API.

OgaDestroyString(str); 

OgaDestroyBuffer

Destroys a buffer.

OgaDestroyBuffer(buffer); 

OgaBufferGetType

Gets the type of the buffer.

OgaDataType type = OgaBufferGetType(buffer); 

OgaBufferGetDimCount

Gets the number of dimensions of a buffer.

size_t dim_count = OgaBufferGetDimCount(buffer); 

OgaBufferGetDims

Gets the dimensions of a buffer.

size_t dims[dim_count]; OgaBufferGetDims(buffer, dims, dim_count); 

OgaBufferGetData

Gets the data from a buffer.

const void* data = OgaBufferGetData(buffer);