init snippet text embed
This commit is contained in:
parent
ca13eb6edc
commit
9c0b8bef3c
4 changed files with 81 additions and 6 deletions
4
deps.edn
4
deps.edn
|
|
@ -7,6 +7,10 @@
|
||||||
;; db
|
;; db
|
||||||
com.datomic/local {:mvn/version "1.0.291"}
|
com.datomic/local {:mvn/version "1.0.291"}
|
||||||
|
|
||||||
|
;; http client
|
||||||
|
clj-http/clj-http {:mvn/version "3.13.1"}
|
||||||
|
cheshire/cheshire {:mvn/version "5.11.0"}
|
||||||
|
|
||||||
;; logging
|
;; logging
|
||||||
com.taoensso/telemere {:mvn/version "1.0.0"}
|
com.taoensso/telemere {:mvn/version "1.0.0"}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,11 +6,6 @@
|
||||||
[snippets.infra.config :as config]
|
[snippets.infra.config :as config]
|
||||||
[taoensso.telemere :as t]))
|
[taoensso.telemere :as t]))
|
||||||
|
|
||||||
;; Initialize the Datomic Local client
|
|
||||||
;; :system "dev" groups your databases in the "dev" system
|
|
||||||
;; In production, you'd set :storage-dir to a persistent path
|
|
||||||
;; TODO: add save file location for prod
|
|
||||||
|
|
||||||
(def datomic-config (:datomic (config/get-config)))
|
(def datomic-config (:datomic (config/get-config)))
|
||||||
|
|
||||||
(def client (d/client (merge {:server-type :datomic-local
|
(def client (d/client (merge {:server-type :datomic-local
|
||||||
|
|
@ -117,6 +112,16 @@
|
||||||
(throw (ex-info "Invalid snippet entity" {:entities entities})))))
|
(throw (ex-info "Invalid snippet entity" {:entities entities})))))
|
||||||
|
|
||||||
;; read
|
;; read
|
||||||
|
(defn slug-to-db-id
|
||||||
|
"Get the database ID of a snippet given a slug."
|
||||||
|
[slug]
|
||||||
|
(let [conn (get-conn)
|
||||||
|
db (d/db conn)
|
||||||
|
query '[:find ?e
|
||||||
|
:in $ ?slug
|
||||||
|
:where [?e :snippet/slug ?slug]]]
|
||||||
|
(ffirst (d/q query db slug))))
|
||||||
|
|
||||||
(defn- get-snippet-by-slug-from-db
|
(defn- get-snippet-by-slug-from-db
|
||||||
"Get a single snippet by its slug."
|
"Get a single snippet by its slug."
|
||||||
[slug]
|
[slug]
|
||||||
|
|
|
||||||
64
src/snippets/infra/text_embed.clj
Normal file
64
src/snippets/infra/text_embed.clj
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
(ns snippets.infra.text-embed
|
||||||
|
(:require [clj-http.client :as http]
|
||||||
|
[clojure.string :as str]
|
||||||
|
[snippets.infra.db :as db]
|
||||||
|
[snippets.infra.config]
|
||||||
|
[taoensso.telemere :as t]))
|
||||||
|
|
||||||
|
(defn config []
|
||||||
|
(let [c (snippets.infra.config/get-config)]
|
||||||
|
(hash-map
|
||||||
|
:qdrant-host (get-in c [:qdrant :host])
|
||||||
|
:qdrant-api-key (get-in c [:qdrant :api-key]),
|
||||||
|
:mistral-api-key (get-in c [:mistral :api-key]))))
|
||||||
|
|
||||||
|
(defn- parse
|
||||||
|
"Turn a snippet into a vector of strings to embed."
|
||||||
|
[snippet]
|
||||||
|
(vector
|
||||||
|
(:title snippet)
|
||||||
|
(:slug snippet)
|
||||||
|
(str/join "," (:tags snippet))
|
||||||
|
(:markdown snippet)))
|
||||||
|
|
||||||
|
(defn- gen-embed
|
||||||
|
"Call Mistral API to generate embeddings for the given input.
|
||||||
|
Args:
|
||||||
|
input - string or vector of strings to embed
|
||||||
|
Returns:
|
||||||
|
Embedding response from Mistral API
|
||||||
|
Requires mistral.api-key to be configured."
|
||||||
|
[input]
|
||||||
|
(let [api-key (:mistral-api-key (config))
|
||||||
|
url "https://api.mistral.ai/v1/embeddings"
|
||||||
|
body {:model "codestral-embed"
|
||||||
|
:output_dimension 1536
|
||||||
|
:output_dtype "float"
|
||||||
|
:input (if (string? input) [input] input)}]
|
||||||
|
(http/post url
|
||||||
|
{:headers {"Authorization" (str "Bearer " api-key)}
|
||||||
|
:content-type :json
|
||||||
|
:form-params body
|
||||||
|
:cookie-store false
|
||||||
|
:as :json})))
|
||||||
|
|
||||||
|
(defn get-embed [snippet]
|
||||||
|
(t/log! {:level :info :data {:slug (:slug snippet)}} "Getting embedding for snippet")
|
||||||
|
(let [res (gen-embed (parse snippet))
|
||||||
|
embed (get-in res [:body :data 0 :embedding])]
|
||||||
|
(if (not= (count embed) 1536)
|
||||||
|
(throw (ex-info "Unexpected embedding size" {:embed embed}))
|
||||||
|
embed)))
|
||||||
|
|
||||||
|
(defn save-embed
|
||||||
|
"Save an embedding to Qdrant"
|
||||||
|
[snippet embed]
|
||||||
|
(let [api-key (:qdrant-api-key (config))
|
||||||
|
id (db/slug-to-db-id (:slug snippet))]
|
||||||
|
(t/log! {:level :info :data {:slug (:slug snippet) :api-key api-key :id id}} "Saving embedding for snippet")
|
||||||
|
(http/put (str (:qdrant-host (config)) "/collections/snippets-dev/points")
|
||||||
|
{:headers {"api-key" api-key}
|
||||||
|
:content-type :json
|
||||||
|
:form-params {:points [{:id id :vector embed :payload {:slug (:slug snippet)}}]}
|
||||||
|
;; :cookie-store false
|
||||||
|
:as :json})))
|
||||||
|
|
@ -6,4 +6,6 @@
|
||||||
(defn create-snippet [{:keys [title slug markdown tags]}]
|
(defn create-snippet [{:keys [title slug markdown tags]}]
|
||||||
(let [pub-date (java.util.Date.)]
|
(let [pub-date (java.util.Date.)]
|
||||||
(t/log! {:level :info, :data {:title title :slug slug}} "Creating snippet")
|
(t/log! {:level :info, :data {:title title :slug slug}} "Creating snippet")
|
||||||
(db/create-snippets [{:title title :slug slug :markdown markdown :tags tags :pub-date pub-date}])))
|
(db/create-snippets [{:title title :slug slug :markdown markdown :tags tags :pub-date pub-date}])
|
||||||
|
;; TODO: caculate text embed vector
|
||||||
|
))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue