go-ollama/generate_response.go

package ollama

import (
	"bytes"
	"encoding/json"
	"errors"
	"fmt"
	"net/http"
)

type GenerateResponseRequest struct {
	Model       string                          `json:"model"`
	Prompt      string                          `json:"prompt,omitempty"`
	Suffix      string                          `json:"suffix,omitempty"`
	Images      []string                        `json:"images,omitempty"`
	Format      string                          `json:"format,omitempty"`
	System      string                          `json:"system,omitempty"`
	Stream      *bool                           `json:"stream,omitempty"`
	Raw         *bool                           `json:"raw,omitempty"`
	KeepAlive   string                          `json:"keep_alive,omitempty"`
	Options     *GenerateResponseRequestOptions `json:"options,omitempty"`
	Logprobs    *bool                           `json:"logprobs,omitempty"`
	TopLogprobs *int                            `json:"top_logprobs,omitempty"`
}

type GenerateResponseRequestOptions struct {
	Seed        *int     `json:"seed,omitempty"`
	Temperature *float32 `json:"temperature,omitempty"`
	TopK        *int     `json:"top_k,omitempty"`
	TopP        *float32 `json:"top_p,omitempty"`
	MinP        *float32 `json:"min_p,omitempty"`
	Stop        []string `json:"stop,omitempty"`
	NumCtx      *int     `json:"num_ctx,omitempty"`
	NumPredict  *int     `json:"num_predict,omitempty"`
}

type GenerateResponseResponse struct {
	Model              string `json:"model"`
	CreatedAt          string `json:"created_at"`
	Response           string `json:"response"`
	Thinking           string `json:"thinking"`
	Done               bool   `json:"done"`
	DoneReason         string `json:"done_reason"`
	TotalDuration      int    `json:"total_duration"`
	LoadDuration       int    `json:"load_duration"`
	PromptEvalCount    int    `json:"prompt_eval_count"`
	PromptEvalDuration int    `json:"prompt_eval_duration"`
	EvalCount          int    `json:"eval_count"`
	EvalDuration       int    `json:"eval_duration"`
	Logprobs           []struct {
		Token       string `json:"token"`
		Logprob     int    `json:"logprob"`
		Bytes       []int  `json:"bytes"`
		TopLogprobs []struct {
			Token   string `json:"token"`
			Logprob int    `json:"logprob"`
			Bytes   []int  `json:"bytes"`
		} `json:"top_logprobs"`
	} `json:"logprobs"`
}

func (o Ollama) GenerateResponse(reqBody GenerateResponseRequest) (GenerateResponseResponse, int, error) {
	reqBodyBytes, err := json.Marshal(reqBody)
	if err != nil {
		return GenerateResponseResponse{}, -1, err
	}

	req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("%s/generate", o.baseUrl), bytes.NewReader(reqBodyBytes))
	if err != nil {
		return GenerateResponseResponse{}, -1, err
	}

	for key, val := range o.customHeaders {
		req.Header.Set(key, val)
	}
	req.Header.Set("Content-Type", "application/json")

	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return GenerateResponseResponse{}, -1, err
	}
	defer resp.Body.Close()

	if resp.StatusCode != 200 {
		return GenerateResponseResponse{}, resp.StatusCode, errors.New("status code is not 200")
	}

	var respBody GenerateResponseResponse
	if err := json.NewDecoder(resp.Body).Decode(&respBody); err != nil {
		return GenerateResponseResponse{}, -1, err
	}
	return respBody, resp.StatusCode, nil
}