diff --git a/deps.edn b/deps.edn index ba45ae7..6897257 100644 --- a/deps.edn +++ b/deps.edn @@ -7,6 +7,10 @@ ;; db com.datomic/local {:mvn/version "1.0.291"} + ;; http client + clj-http/clj-http {:mvn/version "3.13.1"} + cheshire/cheshire {:mvn/version "5.11.0"} + ;; logging com.taoensso/telemere {:mvn/version "1.0.0"} diff --git a/src/snippets/infra/db.clj b/src/snippets/infra/db.clj index e89dc52..260fbae 100644 --- a/src/snippets/infra/db.clj +++ b/src/snippets/infra/db.clj @@ -6,11 +6,6 @@ [snippets.infra.config :as config] [taoensso.telemere :as t])) -;; Initialize the Datomic Local client -;; :system "dev" groups your databases in the "dev" system -;; In production, you'd set :storage-dir to a persistent path -;; TODO: add save file location for prod - (def datomic-config (:datomic (config/get-config))) (def client (d/client (merge {:server-type :datomic-local @@ -117,6 +112,16 @@ (throw (ex-info "Invalid snippet entity" {:entities entities}))))) ;; read +(defn slug-to-db-id + "Get the database ID of a snippet given a slug." + [slug] + (let [conn (get-conn) + db (d/db conn) + query '[:find ?e + :in $ ?slug + :where [?e :snippet/slug ?slug]]] + (ffirst (d/q query db slug)))) + (defn- get-snippet-by-slug-from-db "Get a single snippet by its slug." [slug] diff --git a/src/snippets/infra/text_embed.clj b/src/snippets/infra/text_embed.clj new file mode 100644 index 0000000..6be3143 --- /dev/null +++ b/src/snippets/infra/text_embed.clj @@ -0,0 +1,64 @@ +(ns snippets.infra.text-embed + (:require [clj-http.client :as http] + [clojure.string :as str] + [snippets.infra.db :as db] + [snippets.infra.config] + [taoensso.telemere :as t])) + +(defn config [] + (let [c (snippets.infra.config/get-config)] + (hash-map + :qdrant-host (get-in c [:qdrant :host]) + :qdrant-api-key (get-in c [:qdrant :api-key]), + :mistral-api-key (get-in c [:mistral :api-key])))) + +(defn- parse + "Turn a snippet into a vector of strings to embed." + [snippet] + (vector + (:title snippet) + (:slug snippet) + (str/join "," (:tags snippet)) + (:markdown snippet))) + +(defn- gen-embed + "Call Mistral API to generate embeddings for the given input. + Args: + input - string or vector of strings to embed + Returns: + Embedding response from Mistral API + Requires mistral.api-key to be configured." + [input] + (let [api-key (:mistral-api-key (config)) + url "https://api.mistral.ai/v1/embeddings" + body {:model "codestral-embed" + :output_dimension 1536 + :output_dtype "float" + :input (if (string? input) [input] input)}] + (http/post url + {:headers {"Authorization" (str "Bearer " api-key)} + :content-type :json + :form-params body + :cookie-store false + :as :json}))) + +(defn get-embed [snippet] + (t/log! {:level :info :data {:slug (:slug snippet)}} "Getting embedding for snippet") + (let [res (gen-embed (parse snippet)) + embed (get-in res [:body :data 0 :embedding])] + (if (not= (count embed) 1536) + (throw (ex-info "Unexpected embedding size" {:embed embed})) + embed))) + +(defn save-embed + "Save an embedding to Qdrant" + [snippet embed] + (let [api-key (:qdrant-api-key (config)) + id (db/slug-to-db-id (:slug snippet))] + (t/log! {:level :info :data {:slug (:slug snippet) :api-key api-key :id id}} "Saving embedding for snippet") + (http/put (str (:qdrant-host (config)) "/collections/snippets-dev/points") + {:headers {"api-key" api-key} + :content-type :json + :form-params {:points [{:id id :vector embed :payload {:slug (:slug snippet)}}]} + ;; :cookie-store false + :as :json}))) diff --git a/src/snippets/use_cases/create.clj b/src/snippets/use_cases/create.clj index 6f0b494..33a41ba 100644 --- a/src/snippets/use_cases/create.clj +++ b/src/snippets/use_cases/create.clj @@ -6,4 +6,6 @@ (defn create-snippet [{:keys [title slug markdown tags]}] (let [pub-date (java.util.Date.)] (t/log! {:level :info, :data {:title title :slug slug}} "Creating snippet") - (db/create-snippets [{:title title :slug slug :markdown markdown :tags tags :pub-date pub-date}]))) + (db/create-snippets [{:title title :slug slug :markdown markdown :tags tags :pub-date pub-date}]) + ;; TODO: caculate text embed vector + ))