init snippet text embed

This commit is contained in:
Travis Shears 2026-03-11 14:45:50 +01:00
parent ca13eb6edc
commit 9c0b8bef3c
Signed by: travisshears
GPG key ID: CB9BF1910F3F7469
4 changed files with 81 additions and 6 deletions

View file

@ -7,6 +7,10 @@
;; db
com.datomic/local {:mvn/version "1.0.291"}
;; http client
clj-http/clj-http {:mvn/version "3.13.1"}
cheshire/cheshire {:mvn/version "5.11.0"}
;; logging
com.taoensso/telemere {:mvn/version "1.0.0"}

View file

@ -6,11 +6,6 @@
[snippets.infra.config :as config]
[taoensso.telemere :as t]))
;; Initialize the Datomic Local client
;; :system "dev" groups your databases in the "dev" system
;; In production, you'd set :storage-dir to a persistent path
;; TODO: add save file location for prod
(def datomic-config (:datomic (config/get-config)))
(def client (d/client (merge {:server-type :datomic-local
@ -117,6 +112,16 @@
(throw (ex-info "Invalid snippet entity" {:entities entities})))))
;; read
(defn slug-to-db-id
"Get the database ID of a snippet given a slug."
[slug]
(let [conn (get-conn)
db (d/db conn)
query '[:find ?e
:in $ ?slug
:where [?e :snippet/slug ?slug]]]
(ffirst (d/q query db slug))))
(defn- get-snippet-by-slug-from-db
"Get a single snippet by its slug."
[slug]

View file

@ -0,0 +1,64 @@
(ns snippets.infra.text-embed
(:require [clj-http.client :as http]
[clojure.string :as str]
[snippets.infra.db :as db]
[snippets.infra.config]
[taoensso.telemere :as t]))
(defn config []
(let [c (snippets.infra.config/get-config)]
(hash-map
:qdrant-host (get-in c [:qdrant :host])
:qdrant-api-key (get-in c [:qdrant :api-key]),
:mistral-api-key (get-in c [:mistral :api-key]))))
(defn- parse
"Turn a snippet into a vector of strings to embed."
[snippet]
(vector
(:title snippet)
(:slug snippet)
(str/join "," (:tags snippet))
(:markdown snippet)))
(defn- gen-embed
"Call Mistral API to generate embeddings for the given input.
Args:
input - string or vector of strings to embed
Returns:
Embedding response from Mistral API
Requires mistral.api-key to be configured."
[input]
(let [api-key (:mistral-api-key (config))
url "https://api.mistral.ai/v1/embeddings"
body {:model "codestral-embed"
:output_dimension 1536
:output_dtype "float"
:input (if (string? input) [input] input)}]
(http/post url
{:headers {"Authorization" (str "Bearer " api-key)}
:content-type :json
:form-params body
:cookie-store false
:as :json})))
(defn get-embed [snippet]
(t/log! {:level :info :data {:slug (:slug snippet)}} "Getting embedding for snippet")
(let [res (gen-embed (parse snippet))
embed (get-in res [:body :data 0 :embedding])]
(if (not= (count embed) 1536)
(throw (ex-info "Unexpected embedding size" {:embed embed}))
embed)))
(defn save-embed
"Save an embedding to Qdrant"
[snippet embed]
(let [api-key (:qdrant-api-key (config))
id (db/slug-to-db-id (:slug snippet))]
(t/log! {:level :info :data {:slug (:slug snippet) :api-key api-key :id id}} "Saving embedding for snippet")
(http/put (str (:qdrant-host (config)) "/collections/snippets-dev/points")
{:headers {"api-key" api-key}
:content-type :json
:form-params {:points [{:id id :vector embed :payload {:slug (:slug snippet)}}]}
;; :cookie-store false
:as :json})))

View file

@ -6,4 +6,6 @@
(defn create-snippet [{:keys [title slug markdown tags]}]
(let [pub-date (java.util.Date.)]
(t/log! {:level :info, :data {:title title :slug slug}} "Creating snippet")
(db/create-snippets [{:title title :slug slug :markdown markdown :tags tags :pub-date pub-date}])))
(db/create-snippets [{:title title :slug slug :markdown markdown :tags tags :pub-date pub-date}])
;; TODO: caculate text embed vector
))