{
  "components": {
    "schemas": {
      "AudioPayload": {
        "properties": {
          "data_b64": {
            "description": "Base64-encoded 16-bit PCM WAV (mono).",
            "title": "Data B64",
            "type": "string"
          },
          "format": {
            "default": "wav",
            "description": "Container/encoding of `data_b64` (16-bit PCM WAV).",
            "title": "Format",
            "type": "string"
          },
          "num_quantizers": {
            "description": "Number of RVQ levels decoded into this audio.",
            "title": "Num Quantizers",
            "type": "integer"
          },
          "sample_rate": {
            "description": "Sample rate of the audio in Hz.",
            "examples": [
              24000
            ],
            "title": "Sample Rate",
            "type": "integer"
          }
        },
        "required": [
          "sample_rate",
          "num_quantizers",
          "data_b64"
        ],
        "title": "AudioPayload",
        "type": "object"
      },
      "ConversationTurnModel": {
        "properties": {
          "audio_wav_b64": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Base64 16-bit PCM WAV of this turn's audio, if any.",
            "title": "Audio Wav B64"
          },
          "speaker": {
            "default": "0",
            "description": "Role label for this turn (one of the model's `speakers`).",
            "title": "Speaker",
            "type": "string"
          },
          "text": {
            "anyOf": [
              {
                "maxLength": 8000,
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Text spoken in this turn, if known.",
            "title": "Text"
          }
        },
        "title": "ConversationTurnModel",
        "type": "object"
      },
      "ConverseReply": {
        "properties": {
          "audio": {
            "anyOf": [
              {
                "$ref": "#/components/schemas/AudioPayload"
              },
              {
                "type": "null"
              }
            ]
          },
          "speaker": {
            "title": "Speaker",
            "type": "string"
          },
          "text": {
            "title": "Text",
            "type": "string"
          }
        },
        "required": [
          "speaker",
          "text"
        ],
        "title": "ConverseReply",
        "type": "object"
      },
      "ConverseRequest": {
        "description": "Complete the open (final) turn of a conversation.\n\nEvery turn except the last must carry ``text`` and/or ``audio_wav_b64`` (grounded history). The\n**last** turn is the one to generate: it may be ``speaker``-only (model authors text + audio) or\n``speaker`` + ``text`` (render that text as contextual TTS). A last turn already carrying both text\nand audio has nothing to generate → ``400``.",
        "properties": {
          "conversation": {
            "description": "The conversation, oldest turn first; the last turn is the open turn to complete.",
            "items": {
              "$ref": "#/components/schemas/ConversationTurnModel"
            },
            "maxItems": 64,
            "minItems": 1,
            "title": "Conversation",
            "type": "array"
          },
          "model": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Public model id (see GET /v1/models). Omit/null for the default model.",
            "examples": [
              "kalpa-conversational-v1"
            ],
            "title": "Model"
          },
          "params": {
            "$ref": "#/components/schemas/GenParamsModel"
          }
        },
        "required": [
          "conversation"
        ],
        "title": "ConverseRequest",
        "type": "object"
      },
      "ConverseResponse": {
        "properties": {
          "meta": {
            "additionalProperties": true,
            "title": "Meta",
            "type": "object"
          },
          "model": {
            "title": "Model",
            "type": "string"
          },
          "reply": {
            "$ref": "#/components/schemas/ConverseReply"
          },
          "request_id": {
            "title": "Request Id",
            "type": "string"
          },
          "usage": {
            "$ref": "#/components/schemas/Usage"
          }
        },
        "required": [
          "request_id",
          "model",
          "reply",
          "usage"
        ],
        "title": "ConverseResponse",
        "type": "object"
      },
      "ErrorBody": {
        "properties": {
          "message": {
            "title": "Message",
            "type": "string"
          },
          "request_id": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "title": "Request Id"
          },
          "type": {
            "title": "Type",
            "type": "string"
          }
        },
        "required": [
          "type",
          "message"
        ],
        "title": "ErrorBody",
        "type": "object"
      },
      "ErrorResponse": {
        "description": "The envelope returned for every 4xx/5xx.",
        "properties": {
          "error": {
            "$ref": "#/components/schemas/ErrorBody"
          }
        },
        "required": [
          "error"
        ],
        "title": "ErrorResponse",
        "type": "object"
      },
      "GenParamsModel": {
        "description": "Sampling knobs. Field names/defaults match ``studio.server.model.GenParams`` and map onto\n``Menka.generate``. Bounds match the studio's PARAM_SCHEMA.",
        "properties": {
          "depth_temperature": {
            "anyOf": [
              {
                "maximum": 1.5,
                "minimum": 0,
                "type": "number"
              },
              {
                "type": "null"
              }
            ],
            "description": "Acoustic temperature; null = follow temperature.",
            "title": "Depth Temperature"
          },
          "max_new_tokens": {
            "default": 512,
            "maximum": 2048,
            "minimum": 16,
            "title": "Max New Tokens",
            "type": "integer"
          },
          "penalty_window": {
            "default": 20,
            "maximum": 80,
            "minimum": 1,
            "title": "Penalty Window",
            "type": "integer"
          },
          "quantizers": {
            "anyOf": [
              {
                "minimum": 1,
                "type": "integer"
              },
              {
                "type": "null"
              }
            ],
            "description": "Decode only the first N RVQ levels; null = full depth.",
            "title": "Quantizers"
          },
          "repetition_penalty": {
            "default": 3,
            "maximum": 6,
            "minimum": 0,
            "title": "Repetition Penalty",
            "type": "number"
          },
          "temperature": {
            "default": 0.7,
            "maximum": 1.5,
            "minimum": 0,
            "title": "Temperature",
            "type": "number"
          },
          "top_k": {
            "anyOf": [
              {
                "minimum": 1,
                "type": "integer"
              },
              {
                "type": "null"
              }
            ],
            "description": "Backbone top-k; null = full vocabulary.",
            "title": "Top K"
          }
        },
        "title": "GenParamsModel",
        "type": "object"
      },
      "HealthResponse": {
        "properties": {
          "backend": {
            "title": "Backend",
            "type": "string"
          },
          "ready": {
            "title": "Ready",
            "type": "boolean"
          },
          "status": {
            "default": "ok",
            "title": "Status",
            "type": "string"
          }
        },
        "required": [
          "backend",
          "ready"
        ],
        "title": "HealthResponse",
        "type": "object"
      },
      "InfoResponse": {
        "properties": {
          "backend": {
            "additionalProperties": true,
            "description": "Active backend description (name, kind, sample_rate, …).",
            "title": "Backend",
            "type": "object"
          },
          "defaults": {
            "additionalProperties": true,
            "description": "Default generation params.",
            "title": "Defaults",
            "type": "object"
          },
          "limits": {
            "additionalProperties": true,
            "description": "Request-validation caps the gateway enforces.",
            "title": "Limits",
            "type": "object"
          },
          "param_schema": {
            "description": "UI metadata for the generation knobs.",
            "items": {},
            "title": "Param Schema",
            "type": "array"
          }
        },
        "required": [
          "backend",
          "defaults",
          "limits",
          "param_schema"
        ],
        "title": "InfoResponse",
        "type": "object"
      },
      "ModelCard": {
        "description": "One public model variant, as listed by ``GET /v1/models``.",
        "properties": {
          "default": {
            "default": false,
            "description": "True for the model used when `model` is omitted.",
            "title": "Default",
            "type": "boolean"
          },
          "description": {
            "default": "",
            "description": "What this model is for.",
            "title": "Description",
            "type": "string"
          },
          "display_name": {
            "description": "Human-readable model name.",
            "title": "Display Name",
            "type": "string"
          },
          "id": {
            "description": "Stable public model id used in the `model` request field.",
            "title": "Id",
            "type": "string"
          },
          "modes": {
            "description": "Supported modes: subset of [\"converse\", \"tts\"].",
            "items": {
              "type": "string"
            },
            "title": "Modes",
            "type": "array"
          },
          "speakers": {
            "description": "Valid role labels for a turn's `speaker`, in turn order (e.g. [\"0\", \"1\"]).",
            "items": {
              "type": "string"
            },
            "title": "Speakers",
            "type": "array"
          }
        },
        "required": [
          "id",
          "display_name",
          "modes",
          "speakers"
        ],
        "title": "ModelCard",
        "type": "object"
      },
      "ModelsResponse": {
        "properties": {
          "data": {
            "description": "The available public models.",
            "items": {
              "$ref": "#/components/schemas/ModelCard"
            },
            "title": "Data",
            "type": "array"
          }
        },
        "required": [
          "data"
        ],
        "title": "ModelsResponse",
        "type": "object"
      },
      "TtsRequest": {
        "properties": {
          "model": {
            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "description": "Public model id (see GET /v1/models). Omit/null for the default model.",
            "examples": [
              "kalpa-conversational-v1"
            ],
            "title": "Model"
          },
          "params": {
            "$ref": "#/components/schemas/GenParamsModel"
          },
          "speaker": {
            "default": "0",
            "description": "Speaker role to render the text as (one of the model's `speakers`; see GET /v1/models).",
            "title": "Speaker",
            "type": "string"
          },
          "text": {
            "description": "Text to speak.",
            "examples": [
              "Hey there! How are you doing today?"
            ],
            "maxLength": 8000,
            "minLength": 1,
            "title": "Text",
            "type": "string"
          }
        },
        "required": [
          "text"
        ],
        "title": "TtsRequest",
        "type": "object"
      },
      "TtsResponse": {
        "properties": {
          "audio": {
            "$ref": "#/components/schemas/AudioPayload"
          },
          "meta": {
            "additionalProperties": true,
            "description": "Backend-specific diagnostics (latency, frames, …).",
            "title": "Meta",
            "type": "object"
          },
          "model": {
            "title": "Model",
            "type": "string"
          },
          "request_id": {
            "title": "Request Id",
            "type": "string"
          },
          "text": {
            "description": "The text that was spoken (echoes the request).",
            "title": "Text",
            "type": "string"
          },
          "usage": {
            "$ref": "#/components/schemas/Usage"
          }
        },
        "required": [
          "request_id",
          "model",
          "text",
          "audio",
          "usage"
        ],
        "title": "TtsResponse",
        "type": "object"
      },
      "Usage": {
        "properties": {
          "input_audio_seconds": {
            "default": 0,
            "description": "Seconds of input audio supplied (converse).",
            "title": "Input Audio Seconds",
            "type": "number"
          },
          "input_chars": {
            "default": 0,
            "description": "Characters of input text billed for this request.",
            "title": "Input Chars",
            "type": "integer"
          },
          "output_audio_seconds": {
            "default": 0,
            "description": "Seconds of audio generated.",
            "title": "Output Audio Seconds",
            "type": "number"
          }
        },
        "title": "Usage",
        "type": "object"
      },
      "UsageSummaryResponse": {
        "properties": {
          "input_audio_seconds": {
            "title": "Input Audio Seconds",
            "type": "number"
          },
          "input_chars": {
            "title": "Input Chars",
            "type": "integer"
          },
          "key_id": {
            "title": "Key Id",
            "type": "string"
          },
          "last_request_ts": {
            "anyOf": [
              {
                "type": "number"
              },
              {
                "type": "null"
              }
            ],
            "title": "Last Request Ts"
          },
          "output_audio_seconds": {
            "title": "Output Audio Seconds",
            "type": "number"
          },
          "requests": {
            "title": "Requests",
            "type": "integer"
          }
        },
        "required": [
          "key_id",
          "requests",
          "input_chars",
          "input_audio_seconds",
          "output_audio_seconds"
        ],
        "title": "UsageSummaryResponse",
        "type": "object"
      }
    },
    "securitySchemes": {
      "ApiKeyAuth": {
        "description": "Your Kalpa API key, sent as `Authorization: Bearer <key>` (or the `X-API-Key` header).",
        "scheme": "bearer",
        "type": "http"
      }
    }
  },
  "info": {
    "contact": {
      "name": "Kalpa Labs",
      "url": "https://kalpalabs.ai/"
    },
    "description": "The Kalpa Speech API turns text into natural speech (**TTS**) and continues a multi-speaker\nconversation as the next speaker (**converse**), returning generated text + 24&nbsp;kHz audio.\n\n**Authentication.** Every `/v1` request needs an API key, sent either as\n`Authorization: Bearer <key>` (preferred) or an `X-API-Key: <key>` header.\n\n**Rate limits.** Requests are metered per key. When you exceed your limit you receive\n`429 Too Many Requests` with a `Retry-After` header; every response carries `X-RateLimit-*` headers.\n\n**Audio.** Audio crosses the wire as base64-encoded 16-bit PCM WAV (mono, 24&nbsp;kHz).\n\nThis document is the single source of truth for the contract — the docs reference, the studio's\ntyped client, and the SDKs are all generated from it.\n",
    "license": {
      "name": "Proprietary"
    },
    "title": "Kalpa Speech API",
    "version": "0.1.0"
  },
  "openapi": "3.1.0",
  "paths": {
    "/health": {
      "get": {
        "operationId": "health_health_get",
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/HealthResponse"
                }
              }
            },
            "description": "Successful Response"
          }
        },
        "summary": "Liveness probe",
        "tags": [
          "Meta"
        ]
      }
    },
    "/v1/converse": {
      "post": {
        "description": "Given a conversation, complete its last ('open') turn. A speaker-only open turn is authored (text + audio); an open turn with text is rendered as that speaker, conditioned on the prior turns (contextual TTS).",
        "operationId": "converse_v1_converse_post",
        "requestBody": {
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/ConverseRequest"
              }
            }
          },
          "required": true
        },
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ConverseResponse"
                }
              }
            },
            "description": "Successful Response"
          },
          "401": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Missing or invalid API key."
          },
          "422": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Request validation failed."
          },
          "429": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Rate limit exceeded."
          },
          "502": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "The inference backend failed."
          }
        },
        "security": [
          {
            "ApiKeyAuth": []
          }
        ],
        "summary": "Complete the open (final) turn of a conversation",
        "tags": [
          "Speech"
        ]
      }
    },
    "/v1/info": {
      "get": {
        "operationId": "info_v1_info_get",
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/InfoResponse"
                }
              }
            },
            "description": "Successful Response"
          }
        },
        "security": [
          {
            "ApiKeyAuth": []
          }
        ],
        "summary": "Backend info, default params, and limits",
        "tags": [
          "Meta"
        ]
      }
    },
    "/v1/models": {
      "get": {
        "operationId": "models_v1_models_get",
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ModelsResponse"
                }
              }
            },
            "description": "Successful Response"
          }
        },
        "security": [
          {
            "ApiKeyAuth": []
          }
        ],
        "summary": "List available public models",
        "tags": [
          "Meta"
        ]
      }
    },
    "/v1/tts": {
      "post": {
        "description": "Render the given text as speech (24 kHz mono WAV) in the requested speaker's voice.",
        "operationId": "tts_v1_tts_post",
        "requestBody": {
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/TtsRequest"
              }
            }
          },
          "required": true
        },
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/TtsResponse"
                }
              }
            },
            "description": "Successful Response"
          },
          "401": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Missing or invalid API key."
          },
          "422": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Request validation failed."
          },
          "429": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Rate limit exceeded."
          },
          "502": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "The inference backend failed."
          }
        },
        "security": [
          {
            "ApiKeyAuth": []
          }
        ],
        "summary": "Synthesize speech from text",
        "tags": [
          "Speech"
        ]
      }
    },
    "/v1/usage": {
      "get": {
        "description": "Running totals (requests, input characters, audio seconds) for the calling API key.",
        "operationId": "usage_v1_usage_get",
        "responses": {
          "200": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/UsageSummaryResponse"
                }
              }
            },
            "description": "Successful Response"
          },
          "401": {
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                }
              }
            },
            "description": "Missing or invalid API key."
          }
        },
        "security": [
          {
            "ApiKeyAuth": []
          }
        ],
        "summary": "Your metered usage",
        "tags": [
          "Usage"
        ]
      }
    }
  },
  "servers": [
    {
      "description": "Production",
      "url": "https://api.kalpalabs.ai"
    },
    {
      "description": "Local development",
      "url": "http://localhost:8080"
    }
  ],
  "tags": [
    {
      "description": "Text-to-speech and conversational generation.",
      "name": "Speech"
    },
    {
      "description": "Per-key usage and metering.",
      "name": "Usage"
    },
    {
      "description": "Health checks and capability discovery.",
      "name": "Meta"
    }
  ]
}
