Fix mastodon scrape when no img discription is set

This commit is contained in:
Travis Shears 2025-08-18 10:05:16 +02:00
parent 57b4a13a7c
commit 3a0588dd65
3 changed files with 16 additions and 14 deletions

View file

@ -17,7 +17,7 @@
[:media_attachments [:vector [:map [:media_attachments [:vector [:map
[:url :string] [:url :string]
[:type [:= "image"]] [:type [:= "image"]]
[:description :string]]]]]]) [:description [:maybe :string]]]]]]])
(defn get-posts-until-id [id] (defn get-posts-until-id [id]
(let [limit 10 (let [limit 10
@ -50,7 +50,7 @@
:remoteId (:id raw-post) :remoteId (:id raw-post)
:authorId (get-in raw-post [:account :id]) :authorId (get-in raw-post [:account :id])
:tags (map :name (:tags raw-post)) :tags (map :name (:tags raw-post))
:images (map (fn [img] [(:url img) (:description img)]) (:media_attachments raw-post)) :images (map (fn [img] [(:url img) (or (:description img) "")]) (:media_attachments raw-post))
:posted (:created_at raw-post))) :posted (:created_at raw-post)))
(defn save-post [post] (defn save-post [post]

View file

@ -44,6 +44,10 @@
(defn valid-source? [source] (defn valid-source? [source]
(m/validate source-enum source)) (m/validate source-enum source))
(def post-schema [:map
[:id :string]
[:remoteId :string]])
(defn get-all-posts-by-source (defn get-all-posts-by-source
([source] (get-all-posts-by-source source [] 1)) ([source] (get-all-posts-by-source source [] 1))
([source carry page] ([source carry page]
@ -68,14 +72,11 @@
(concat carry rows) (concat carry rows)
(get-all-posts-by-source source (concat carry rows) (inc page)))))) (get-all-posts-by-source source (concat carry rows) (inc page))))))
(defn get-latest-post-remote-id-by-source [source] (defn get-latest-post-by-source [source]
(let [res-schema (let [res-schema
[:map [:map
[:items [:items
[:vector [:vector post-schema]]]]
[:map
[:id string?]
[:remoteId string?]]]]]]
(when (not (valid-source? source)) (when (not (valid-source? source))
(throw (ex-info "Invalid source" {:source source}))) (throw (ex-info "Invalid source" {:source source})))
(as-> (as->
@ -85,17 +86,17 @@
"perPage" 1 "perPage" 1
:sort "-posted" :sort "-posted"
:filter (str "source = '" (name source) "'") :filter (str "source = '" (name source) "'")
:fields (str/join "," ["remoteId" "id"]) ;; :fields (str/join "," ["remoteId" "id"])
"skipTotal" true} "skipTotal" true}
:content-type :json :content-type :json
:as :json}) x :as :json}) x
(:body x) (:body x)
(if (m/validate res-schema x) (utils/validate-with-throw x res-schema)
x (-> x :items first))))
(do
(m/explain res-schema x) (defn get-latest-post-remote-id-by-source [source]
(throw (ex-info "Res does not follow schema" {:res x})))) (tel/log! {:level :info :data {:source source}} "Fetching latest post remote ID for source")
(-> x :items first :remoteId)))) (:remoteId (get-latest-post-by-source source)))
(defn post-with-remote-id-already-saved? [remote-id] (defn post-with-remote-id-already-saved? [remote-id]
(-> (->

View file

@ -4,6 +4,7 @@
[malli.core :as m])) [malli.core :as m]))
(defn validate-with-throw [value schema] (defn validate-with-throw [value schema]
(tel/log! {:level :info :data {:value value :schema schema}} "Validating value")
(if (m/validate schema value) (if (m/validate schema value)
value value
(do (do