From 57b4a13a7ce5934cf02096dcd1e3b3ba58a7f979 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Fri, 15 Aug 2025 21:46:03 +0200 Subject: [PATCH 01/12] change isTech from bool to "yes_ai" --- deploy.sh | 5 +++++ src/micro_blog/is_tech.clj | 7 ++----- src/micro_blog/pocket_base.clj | 26 ++++++++++++++++++-------- 3 files changed, 25 insertions(+), 13 deletions(-) create mode 100755 deploy.sh diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..e6e200a --- /dev/null +++ b/deploy.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +export AWS_PROFILE=personal + +docker buildx build --platform linux/amd64,linux/arm64 -t 853019563312.dkr.ecr.eu-central-1.amazonaws.com/micro-blog-fetchers-homelabstack:latest --push . \ No newline at end of file diff --git a/src/micro_blog/is_tech.clj b/src/micro_blog/is_tech.clj index 03cd950..6a33047 100644 --- a/src/micro_blog/is_tech.clj +++ b/src/micro_blog/is_tech.clj @@ -4,7 +4,7 @@ [taoensso.telemere :as tel] [clj-http.client :as client])) -(defn call-mistral-api [post-text] +(defn is-tech? [post-text] (let [url (str (:mistral-host @config) "/v1/conversations") headers {"Content-Type" "application/json" "Accept" "application/json" @@ -22,7 +22,4 @@ :outputs first :content - (#(if (= "1" %) true false))))) - -(defn is-tech? [post-text] - (call-mistral-api post-text)) + (#(if (= "1" %) :yes_ai :no))))) diff --git a/src/micro_blog/pocket_base.clj b/src/micro_blog/pocket_base.clj index 05056a0..a4dc520 100644 --- a/src/micro_blog/pocket_base.clj +++ b/src/micro_blog/pocket_base.clj @@ -40,6 +40,7 @@ new-token)))) (def source-enum [:enum :pleroma :blue_sky :mastodon :pixelfed :nostr]) +(def is-tech-enum [:enum :yes_ai :no :yes_human]) (defn valid-source? [source] (m/validate source-enum source)) @@ -178,7 +179,7 @@ (let [save-post-schema [:map [:source source-enum] [:fullPost :any] - [:isTech :boolean] + [:isTech is-tech-enum] [:tags [:sequential :string]] [:images [:sequential [:tuple :string :string]]] [:remoteId :string] @@ -189,13 +190,22 @@ (tel/log! {:level :info :data {:remoteId (:remoteId post)}} "Post passed save validation") (if (post-with-remote-id-already-saved? (:remoteId post)) (println "post already saved") - (http-client/post (str (@config :pocket-base-host) "/api/collections/micro_blog_posts/records") - {:headers {"Authorization" (get-login-token-with-cache)} - :form-params (assoc post - :tags (map get-tag-id (:tags post)) - :images (map #(apply get-image-id %) (:images post))) - :content-type :json - :as :json})))) + (try + (http-client/post (str (@config :pocket-base-host) "/api/collections/micro_blog_posts/records") + {:headers {"Authorization" (get-login-token-with-cache)} + :form-params (assoc post + :tags (map get-tag-id (:tags post)) + :images (map #(apply get-image-id %) (:images post))) + :content-type :json + :as :json}) + (catch Exception e + (tel/log! {:level :error + :id :pocket-base/save-post-error + :data {:remoteId (:remoteId post) + :error (.getMessage e) + :exception e}} + "Error saving post to pocketbase") + (throw e)))))) (defn set-is-tech [post-id is-tech] (tel/log! {:level :info :data {:post-id post-id}} "Setting post.is-tech to yes_ai") From 3a0588dd65fdcf6702202c76484a7dec5c4fc782 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Mon, 18 Aug 2025 10:05:16 +0200 Subject: [PATCH 02/12] Fix mastodon scrape when no img discription is set --- src/micro_blog/mastodon.clj | 4 ++-- src/micro_blog/pocket_base.clj | 25 +++++++++++++------------ src/micro_blog/utils.clj | 1 + 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/micro_blog/mastodon.clj b/src/micro_blog/mastodon.clj index 8b7f027..24b7d0d 100644 --- a/src/micro_blog/mastodon.clj +++ b/src/micro_blog/mastodon.clj @@ -17,7 +17,7 @@ [:media_attachments [:vector [:map [:url :string] [:type [:= "image"]] - [:description :string]]]]]]) + [:description [:maybe :string]]]]]]]) (defn get-posts-until-id [id] (let [limit 10 @@ -50,7 +50,7 @@ :remoteId (:id raw-post) :authorId (get-in raw-post [:account :id]) :tags (map :name (:tags raw-post)) - :images (map (fn [img] [(:url img) (:description img)]) (:media_attachments raw-post)) + :images (map (fn [img] [(:url img) (or (:description img) "")]) (:media_attachments raw-post)) :posted (:created_at raw-post))) (defn save-post [post] diff --git a/src/micro_blog/pocket_base.clj b/src/micro_blog/pocket_base.clj index a4dc520..f74e8c4 100644 --- a/src/micro_blog/pocket_base.clj +++ b/src/micro_blog/pocket_base.clj @@ -44,6 +44,10 @@ (defn valid-source? [source] (m/validate source-enum source)) +(def post-schema [:map + [:id :string] + [:remoteId :string]]) + (defn get-all-posts-by-source ([source] (get-all-posts-by-source source [] 1)) ([source carry page] @@ -68,14 +72,11 @@ (concat carry rows) (get-all-posts-by-source source (concat carry rows) (inc page)))))) -(defn get-latest-post-remote-id-by-source [source] +(defn get-latest-post-by-source [source] (let [res-schema [:map [:items - [:vector - [:map - [:id string?] - [:remoteId string?]]]]]] + [:vector post-schema]]]] (when (not (valid-source? source)) (throw (ex-info "Invalid source" {:source source}))) (as-> @@ -85,17 +86,17 @@ "perPage" 1 :sort "-posted" :filter (str "source = '" (name source) "'") - :fields (str/join "," ["remoteId" "id"]) + ;; :fields (str/join "," ["remoteId" "id"]) "skipTotal" true} :content-type :json :as :json}) x (:body x) - (if (m/validate res-schema x) - x - (do - (m/explain res-schema x) - (throw (ex-info "Res does not follow schema" {:res x})))) - (-> x :items first :remoteId)))) + (utils/validate-with-throw x res-schema) + (-> x :items first)))) + +(defn get-latest-post-remote-id-by-source [source] + (tel/log! {:level :info :data {:source source}} "Fetching latest post remote ID for source") + (:remoteId (get-latest-post-by-source source))) (defn post-with-remote-id-already-saved? [remote-id] (-> diff --git a/src/micro_blog/utils.clj b/src/micro_blog/utils.clj index be84ba5..a675c4c 100644 --- a/src/micro_blog/utils.clj +++ b/src/micro_blog/utils.clj @@ -4,6 +4,7 @@ [malli.core :as m])) (defn validate-with-throw [value schema] + (tel/log! {:level :info :data {:value value :schema schema}} "Validating value") (if (m/validate schema value) value (do From 2c3d3cef2ff64f3a9b1db8347fa3886e8075913b Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Mon, 18 Aug 2025 10:05:16 +0200 Subject: [PATCH 03/12] add debug logging to validate-with-throw --- src/micro_blog/utils.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/micro_blog/utils.clj b/src/micro_blog/utils.clj index a675c4c..931028d 100644 --- a/src/micro_blog/utils.clj +++ b/src/micro_blog/utils.clj @@ -4,7 +4,7 @@ [malli.core :as m])) (defn validate-with-throw [value schema] - (tel/log! {:level :info :data {:value value :schema schema}} "Validating value") + (tel/log! {:level :debug :data {:value value :schema schema}} "Validating value") (if (m/validate schema value) value (do From 911f2f03307c9aed9aa7f6cfb0ab176b53c73f92 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Mon, 18 Aug 2025 09:39:23 +0200 Subject: [PATCH 04/12] init nostr with gniazdo --- deps.edn | 6 +++++- src/micro_blog/config.clj | 6 +++++- src/micro_blog/nostr.clj | 28 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 src/micro_blog/nostr.clj diff --git a/deps.edn b/deps.edn index 0a35e3d..bbb74f0 100644 --- a/deps.edn +++ b/deps.edn @@ -15,7 +15,11 @@ ;; json cheshire/cheshire {:mvn/version "6.0.0"} ;; metosin/muuntaja {:mvn/version "0.6.11"} - org.clojure/clojure {:mvn/version "1.12.1"}} + org.clojure/clojure {:mvn/version "1.12.1"} + ;; websockets + stylefruits/gniazdo {:mvn/version "1.2.2"} + ;; hato/hato {:mvn/version "1.0.0"} + } :aliases {;; Run with clj -T:build function-in-build diff --git a/src/micro_blog/config.clj b/src/micro_blog/config.clj index c12eaef..1a5354a 100644 --- a/src/micro_blog/config.clj +++ b/src/micro_blog/config.clj @@ -8,7 +8,11 @@ :mastodon-host "MASTODON_BASE_URL" :mastodon-account-id "MASTODON_ACCOUNT_ID" :api-host "API_HOST" - :api-port "API_PORT"}) + :api-port "API_PORT" + :nostr-fetcher-npub "NOSTR_FETCHER_NPUB" + :nostr-id "NOSTR_ID" + :nostr-relay "NOSTR_RELAY" + }) (defn- load-config [] (merge (read-string (slurp "config.edn")) diff --git a/src/micro_blog/nostr.clj b/src/micro_blog/nostr.clj new file mode 100644 index 0000000..1d952b8 --- /dev/null +++ b/src/micro_blog/nostr.clj @@ -0,0 +1,28 @@ +(ns micro-blog.nostr + (:require + [gniazdo.core :as ws] + [cheshire.core :as json] + [micro-blog.config :refer [config]])) + +;; :nostr-fetcher-npub "NOSTR_FETCHER_NPUB" +;; :nostr-id "NOSTR_ID" +;; :nostr-relay "NOSTR_RELAY" +(def socket (atom nil)) + +(defn connect [] + (reset! socket + (ws/connect + (@config :nostr-relay) + :on-receive #(prn 'received %)))) + +(defn subscribe-to-author [pubkey] + (let [sub-id "sub-1" + filter {:kinds [1] :authors [pubkey] :limit 10} + msg (json/generate-string ["REQ" sub-id filter])] + (send-msg msg))) + +(defn send-msg [msg] + (ws/send-msg @socket msg)) + +(defn close [] + (ws/close @socket)) From 33a6ccd33b8c1b04dbf6ffc7bffd983c2ad88407 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Mon, 18 Aug 2025 10:05:16 +0200 Subject: [PATCH 05/12] switch to hato for websockets --- deps.edn | 4 +--- src/micro_blog/config.clj | 6 +++--- src/micro_blog/nostr.clj | 45 +++++++++++++++++++++++++-------------- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/deps.edn b/deps.edn index bbb74f0..47e2f36 100644 --- a/deps.edn +++ b/deps.edn @@ -17,9 +17,7 @@ ;; metosin/muuntaja {:mvn/version "0.6.11"} org.clojure/clojure {:mvn/version "1.12.1"} ;; websockets - stylefruits/gniazdo {:mvn/version "1.2.2"} - ;; hato/hato {:mvn/version "1.0.0"} - } + hato/hato {:mvn/version "1.0.0"}} :aliases {;; Run with clj -T:build function-in-build diff --git a/src/micro_blog/config.clj b/src/micro_blog/config.clj index 1a5354a..ad9372b 100644 --- a/src/micro_blog/config.clj +++ b/src/micro_blog/config.clj @@ -9,10 +9,10 @@ :mastodon-account-id "MASTODON_ACCOUNT_ID" :api-host "API_HOST" :api-port "API_PORT" - :nostr-fetcher-npub "NOSTR_FETCHER_NPUB" + ;; :nostr-fetcher-npub "NOSTR_FETCHER_NPUB" + :nostr-fetcher-id "NOSTR_FETCHER_PUB_HEX" :nostr-id "NOSTR_ID" - :nostr-relay "NOSTR_RELAY" - }) + :nostr-relay "NOSTR_RELAY"}) (defn- load-config [] (merge (read-string (slurp "config.edn")) diff --git a/src/micro_blog/nostr.clj b/src/micro_blog/nostr.clj index 1d952b8..ed9adbb 100644 --- a/src/micro_blog/nostr.clj +++ b/src/micro_blog/nostr.clj @@ -1,28 +1,41 @@ (ns micro-blog.nostr (:require - [gniazdo.core :as ws] - [cheshire.core :as json] - [micro-blog.config :refer [config]])) + [micro-blog.pocket-base :as pb] + [hato.websocket :as ws] + [cheshire.core :as json] + [clojure.string :as str] + [micro-blog.config :refer [config]]) + (:import + [java.time Instant OffsetDateTime ZoneOffset] + [java.time.format DateTimeFormatter])) + +(defn pb-date-to-unix-timestamp-seconds [date-str] + (-> date-str + (str/replace " " "T") + (Instant/parse) + (.getEpochSecond))) + +(defn last-post-timestamp [] + (pb-date-to-unix-timestamp-seconds (:posted (pb/get-latest-post-by-source :nostr)))) ;; :nostr-fetcher-npub "NOSTR_FETCHER_NPUB" ;; :nostr-id "NOSTR_ID" ;; :nostr-relay "NOSTR_RELAY" (def socket (atom nil)) - (defn connect [] - (reset! socket - (ws/connect - (@config :nostr-relay) - :on-receive #(prn 'received %)))) + (reset! socket @(ws/websocket (@config :nostr-relay) + {:on-message (fn [_ws msg _last?] + (println "Received message:" msg)) + :on-close (fn [_ws _status _reason] + (println "WebSocket closed!"))}))) -(defn subscribe-to-author [pubkey] - (let [sub-id "sub-1" - filter {:kinds [1] :authors [pubkey] :limit 10} +;; (last-post-timestamp []) + +(defn subscribe-to-author [pubkey since] + (let [sub-id (@config :nostr-fetcher-id) + filter {:kinds [1] :authors [pubkey] :since since} msg (json/generate-string ["REQ" sub-id filter])] - (send-msg msg))) - -(defn send-msg [msg] - (ws/send-msg @socket msg)) + (.get (ws/send! @socket msg)))) (defn close [] - (ws/close @socket)) + (ws/close! @socket)) From 0b89cb40abb540ad6ef4b37bc57545b52d5c3422 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Thu, 21 Aug 2025 13:04:24 +0200 Subject: [PATCH 06/12] start nostr websockets on main app start --- src/micro_blog/main.clj | 3 +++ src/micro_blog/nostr.clj | 46 +++++++++++++++++++++++++++++----- src/micro_blog/pocket_base.clj | 1 - 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/micro_blog/main.clj b/src/micro_blog/main.clj index cb84b05..c4ec6b9 100644 --- a/src/micro_blog/main.clj +++ b/src/micro_blog/main.clj @@ -5,6 +5,7 @@ [taoensso.telemere :as tel] [micro-blog.logging.main :as logging] micro-blog.api + [micro-blog.nostr :as nostr] [micro-blog.blue-sky :as blue-sky] [micro-blog.mastodon :as masto])) @@ -15,6 +16,8 @@ (logging/setup-logging) (tel/log! :info "Setting up API") (micro-blog.api/start) + (tel/log! :info "Setting up nostr scraper") + (nostr/start) (tel/log! :info "Setting up crons") (doseq [[i cron] (map-indexed vector crons)] (let [start (.plus (Instant/now) (Duration/ofMinutes (* i 5)))] diff --git a/src/micro_blog/nostr.clj b/src/micro_blog/nostr.clj index ed9adbb..35afe68 100644 --- a/src/micro_blog/nostr.clj +++ b/src/micro_blog/nostr.clj @@ -1,9 +1,12 @@ (ns micro-blog.nostr (:require [micro-blog.pocket-base :as pb] + [micro-blog.is-tech] + [taoensso.telemere :as tel] [hato.websocket :as ws] [cheshire.core :as json] [clojure.string :as str] + [clojure.pprint :refer [pprint]] [micro-blog.config :refer [config]]) (:import [java.time Instant OffsetDateTime ZoneOffset] @@ -15,22 +18,49 @@ (Instant/parse) (.getEpochSecond))) +(defn nostr-date-to-pb [ts] + (let [instant (java.time.Instant/ofEpochSecond ts) + formatter (java.time.format.DateTimeFormatter/ofPattern "yyyy-MM-dd HH:mm:ss.SSSX") + zoned (.atZone instant java.time.ZoneOffset/UTC)] + (.format formatter zoned))) + (defn last-post-timestamp [] (pb-date-to-unix-timestamp-seconds (:posted (pb/get-latest-post-by-source :nostr)))) -;; :nostr-fetcher-npub "NOSTR_FETCHER_NPUB" -;; :nostr-id "NOSTR_ID" -;; :nostr-relay "NOSTR_RELAY" +(defn transform-post [post] + (tel/log! {:level :info :data {:post post}} "Transforming nostr post") + (hash-map :source :nostr + :fullPost post + :remoteId (get post "id") + :isTech (micro-blog.is-tech/is-tech? (get post "content")) + :authorId (get post "pubkey") + :tags (reduce (fn [acc tag] + (let [tag-type (first tag) + tag-value (second tag)] + (if (= tag-type "t") + (conj acc tag-value) + acc))) [] (get post "tags")) + :images [] + :posted (nostr-date-to-pb (get post "created_at")))) + +(defn process-msg [raw-msg] + (let [msg (json/parse-string (.toString raw-msg))] + (tel/log! {:level :info :data {:msg msg}} "Processing nostr message") + (let [msg-type (first msg) ;; ex: EVENT + event (nth msg 2)] + (when (and (= (get event "kind") 1) (= msg-type "EVENT")) + (-> event + transform-post + pb/save-post))))) + (def socket (atom nil)) (defn connect [] (reset! socket @(ws/websocket (@config :nostr-relay) {:on-message (fn [_ws msg _last?] - (println "Received message:" msg)) + (process-msg msg)) :on-close (fn [_ws _status _reason] (println "WebSocket closed!"))}))) -;; (last-post-timestamp []) - (defn subscribe-to-author [pubkey since] (let [sub-id (@config :nostr-fetcher-id) filter {:kinds [1] :authors [pubkey] :since since} @@ -39,3 +69,7 @@ (defn close [] (ws/close! @socket)) + +(defn start [] + (connect) + (subscribe-to-author (@config :nostr-id) (last-post-timestamp))) diff --git a/src/micro_blog/pocket_base.clj b/src/micro_blog/pocket_base.clj index f74e8c4..bd43e02 100644 --- a/src/micro_blog/pocket_base.clj +++ b/src/micro_blog/pocket_base.clj @@ -1,6 +1,5 @@ (ns micro-blog.pocket-base (:require - [clojure.pprint :refer [pprint]] [clojure.string :as str] [clj-http.client :as http-client] [malli.core :as m] From 7ccd356e8ca83038f7e3eef15460bd7b5ee926a2 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Thu, 21 Aug 2025 14:57:06 +0200 Subject: [PATCH 07/12] fix blue sky is tech --- src/micro_blog/blue_sky.clj | 4 +++- src/micro_blog/is_tech.clj | 3 ++- src/micro_blog/pocket_base.clj | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/micro_blog/blue_sky.clj b/src/micro_blog/blue_sky.clj index 1c9579c..d491dac 100644 --- a/src/micro_blog/blue_sky.clj +++ b/src/micro_blog/blue_sky.clj @@ -3,6 +3,7 @@ [clj-http.client :as http-client] [micro-blog.pocket-base :as pb] [micro-blog.utils :as utils] + [cheshire.core :as json] [micro-blog.is-tech] [taoensso.telemere :as tel] [micro-blog.config :refer [config]])) @@ -82,10 +83,11 @@ (map #(vector (:fullsize %) (:alt %)) images))) (defn transform-post [post] + (tel/log! {:level :info :data {:post post}} "Transforming post") (hash-map :source :blue_sky :fullPost post :remoteId (:cid post) - :isTech (micro-blog.is-tech/is-tech? (:record post)) + :isTech (micro-blog.is-tech/is-tech? (json/generate-string (:record post))) :authorId (get-in post [:author :handle]) :tags (extract-tags post) :images (extract-images post) diff --git a/src/micro_blog/is_tech.clj b/src/micro_blog/is_tech.clj index 6a33047..49eb22d 100644 --- a/src/micro_blog/is_tech.clj +++ b/src/micro_blog/is_tech.clj @@ -5,6 +5,7 @@ [clj-http.client :as client])) (defn is-tech? [post-text] + (when (not (string? post-text)) (throw (ex-info "Post text must be a string" {:post-text post-text}))) (let [url (str (:mistral-host @config) "/v1/conversations") headers {"Content-Type" "application/json" "Accept" "application/json" @@ -12,7 +13,7 @@ body {:inputs post-text :stream false :agent_id (@config :mistral-agent-id)}] - (tel/log! {:level :info :data {:url url :agent_id (:agent_id body)}} "making request to mistral agent") + (tel/log! {:level :info :data {:url url :agent_id (:agent_id body) :post-text post-text}} "making request to mistral agent") (-> (client/post url {:headers headers :form-params body diff --git a/src/micro_blog/pocket_base.clj b/src/micro_blog/pocket_base.clj index bd43e02..ab3f3ae 100644 --- a/src/micro_blog/pocket_base.clj +++ b/src/micro_blog/pocket_base.clj @@ -185,7 +185,6 @@ [:remoteId :string] [:authorId :string] [:posted :string]]] - (utils/validate-with-throw post save-post-schema) (tel/log! {:level :info :data {:remoteId (:remoteId post)}} "Post passed save validation") (if (post-with-remote-id-already-saved? (:remoteId post)) From 7fc8cd9dcdf1b480d462335cc6bd30b7d035abfa Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Fri, 22 Aug 2025 15:18:22 +0200 Subject: [PATCH 08/12] add better websocker related logging --- src/micro_blog/nostr.clj | 9 +++++---- src/micro_blog/pocket_base.clj | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/micro_blog/nostr.clj b/src/micro_blog/nostr.clj index 35afe68..1aecdc5 100644 --- a/src/micro_blog/nostr.clj +++ b/src/micro_blog/nostr.clj @@ -6,7 +6,6 @@ [hato.websocket :as ws] [cheshire.core :as json] [clojure.string :as str] - [clojure.pprint :refer [pprint]] [micro-blog.config :refer [config]]) (:import [java.time Instant OffsetDateTime ZoneOffset] @@ -55,11 +54,13 @@ (def socket (atom nil)) (defn connect [] + (tel/log! :info "Opening websocket connection to nostr relay") (reset! socket @(ws/websocket (@config :nostr-relay) - {:on-message (fn [_ws msg _last?] + {:headers {"User-Agent" "micro-blog-fetcher"} + :on-message (fn [_ws msg _last?] (process-msg msg)) - :on-close (fn [_ws _status _reason] - (println "WebSocket closed!"))}))) + :on-close (fn [_ws status reason] + (tel/log! {:level :warn :data {:status status :reason reason}} "WebSocket connection closed"))}))) (defn subscribe-to-author [pubkey since] (let [sub-id (@config :nostr-fetcher-id) diff --git a/src/micro_blog/pocket_base.clj b/src/micro_blog/pocket_base.clj index ab3f3ae..515ac85 100644 --- a/src/micro_blog/pocket_base.clj +++ b/src/micro_blog/pocket_base.clj @@ -188,7 +188,7 @@ (utils/validate-with-throw post save-post-schema) (tel/log! {:level :info :data {:remoteId (:remoteId post)}} "Post passed save validation") (if (post-with-remote-id-already-saved? (:remoteId post)) - (println "post already saved") + (tel/log! {:level :warn :data {:remoteId (:remoteId post)}} "Post already saved, skipping") (try (http-client/post (str (@config :pocket-base-host) "/api/collections/micro_blog_posts/records") {:headers {"Authorization" (get-login-token-with-cache)} From bea3e72c10c9a9bdbfe8d3247052943761449bcc Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Fri, 22 Aug 2025 23:00:05 +0200 Subject: [PATCH 09/12] add timeout to websocket reconnect --- deploy.sh | 6 +++++- src/micro_blog/nostr.clj | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/deploy.sh b/deploy.sh index e6e200a..a5f2a26 100755 --- a/deploy.sh +++ b/deploy.sh @@ -1,5 +1,9 @@ #!/bin/sh +set -e + export AWS_PROFILE=personal -docker buildx build --platform linux/amd64,linux/arm64 -t 853019563312.dkr.ecr.eu-central-1.amazonaws.com/micro-blog-fetchers-homelabstack:latest --push . \ No newline at end of file +docker buildx build --platform linux/amd64,linux/arm64 -t 853019563312.dkr.ecr.eu-central-1.amazonaws.com/micro-blog-fetchers-homelabstack:latest --push . + +echo "Image pushed to ECR" diff --git a/src/micro_blog/nostr.clj b/src/micro_blog/nostr.clj index 1aecdc5..3e751fa 100644 --- a/src/micro_blog/nostr.clj +++ b/src/micro_blog/nostr.clj @@ -53,6 +53,7 @@ pb/save-post))))) (def socket (atom nil)) +(declare start) (defn connect [] (tel/log! :info "Opening websocket connection to nostr relay") (reset! socket @(ws/websocket (@config :nostr-relay) @@ -60,7 +61,11 @@ :on-message (fn [_ws msg _last?] (process-msg msg)) :on-close (fn [_ws status reason] - (tel/log! {:level :warn :data {:status status :reason reason}} "WebSocket connection closed"))}))) + (tel/log! {:level :warn :data {:status status :reason reason}} "WebSocket connection closed") + (future + (Thread/sleep (* 5 60 1000)) + (tel/log! :info "Reconnecting WebSocket") + (start)))}))) (defn subscribe-to-author [pubkey since] (let [sub-id (@config :nostr-fetcher-id) From 7f6911b73eba4a8183a89e36ab048f3912965099 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Wed, 3 Sep 2025 10:19:32 +0200 Subject: [PATCH 10/12] add api endpoint to restart nostr --- deploy.sh => build.sh | 0 src/micro_blog/api.clj | 22 ++++++++++++++++------ src/micro_blog/nostr.clj | 1 + 3 files changed, 17 insertions(+), 6 deletions(-) rename deploy.sh => build.sh (100%) diff --git a/deploy.sh b/build.sh similarity index 100% rename from deploy.sh rename to build.sh diff --git a/src/micro_blog/api.clj b/src/micro_blog/api.clj index 38262e1..b200cb6 100644 --- a/src/micro_blog/api.clj +++ b/src/micro_blog/api.clj @@ -4,10 +4,11 @@ [micro-blog.config :refer [config]] micro-blog.mastodon micro-blog.blue-sky + micro-blog.nostr [taoensso.telemere :as tel])) (defn mastodon-proc-handler [_request] - (let [msg "Proceding Mastodon Scrape"] + (let [msg "Procding Mastodon Scrape"] (tel/log! :info msg) (micro-blog.mastodon/run) {:status 200 @@ -15,14 +16,23 @@ (defn blue-sky-proc-handler [_request] (let [msg "Procing BlueSky Scrape"] - (tel/log! :info msg) - (micro-blog.mastodon/run) - {:status 200 - :body msg})) + (tel/log! :info msg) + (micro-blog.mastodon/run) + {:status 200 + :body msg})) + +(defn nostr-proc-handler [_request] + (let [msg "Restarting Nostr scraper"] + (tel/log! :info msg) + (micro-blog.nostr/close) + (micro-blog.nostr/start) + {:status 200 + :body msg})) (def routes #{["/bluesky" :get blue-sky-proc-handler :route-name :blue-sky] - ["/mastodon" :get mastodon-proc-handler :route-name :mastodon]}) + ["/mastodon" :get mastodon-proc-handler :route-name :mastodon] + ["/nostr" :get nostr-proc-handler :route-name :nostr]}) (defn create-connector [] (-> (conn/default-connector-map (:api-host @config) (Integer/parseInt (str (:api-port @config)))) diff --git a/src/micro_blog/nostr.clj b/src/micro_blog/nostr.clj index 3e751fa..9a5a3de 100644 --- a/src/micro_blog/nostr.clj +++ b/src/micro_blog/nostr.clj @@ -74,6 +74,7 @@ (.get (ws/send! @socket msg)))) (defn close [] + (tel/log! :info "Closing nostr socket") (ws/close! @socket)) (defn start [] From d0a08a5e3ffc7615eba7bd4663e0a8b22a13accc Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Thu, 25 Sep 2025 20:53:15 +0200 Subject: [PATCH 11/12] fix bluesky proc in api --- src/micro_blog/api.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/micro_blog/api.clj b/src/micro_blog/api.clj index b200cb6..f28b078 100644 --- a/src/micro_blog/api.clj +++ b/src/micro_blog/api.clj @@ -17,7 +17,7 @@ (defn blue-sky-proc-handler [_request] (let [msg "Procing BlueSky Scrape"] (tel/log! :info msg) - (micro-blog.mastodon/run) + (micro-blog.blue-sky/run) {:status 200 :body msg})) From a165321a8e217242d31c92e747d334e31e104ec6 Mon Sep 17 00:00:00 2001 From: Travis Shears Date: Mon, 1 Dec 2025 17:37:07 +0100 Subject: [PATCH 12/12] remove old node code and try to fix bluesky bug --- README.md | 15 ++- build.sh | 11 +- old_node_src/bluesky.ts | 155 ----------------------- old_node_src/mastodon.ts | 103 --------------- old_node_src/nostr.ts | 106 ---------------- old_node_src/pixelfed.ts | 107 ---------------- old_node_src/pocketbase.ts | 223 --------------------------------- src/micro_blog/blue_sky.clj | 2 +- src/micro_blog/pocket_base.clj | 20 +-- 9 files changed, 36 insertions(+), 706 deletions(-) delete mode 100644 old_node_src/bluesky.ts delete mode 100644 old_node_src/mastodon.ts delete mode 100644 old_node_src/nostr.ts delete mode 100644 old_node_src/pixelfed.ts delete mode 100644 old_node_src/pocketbase.ts diff --git a/README.md b/README.md index d523f33..ee89b7d 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,21 @@ instance. - [x] BlueSky - [ ] Pixelfed - [x] Mastodon -- [ ] Nostr +- [x] Nostr +## Dev + +Run the app locally: + +```shell +$ clj -M -m micro-blog.main +``` + +or just run parts via repl: + +```shell +$ clj +``` ## Deployment diff --git a/build.sh b/build.sh index a5f2a26..6330d68 100755 --- a/build.sh +++ b/build.sh @@ -3,7 +3,14 @@ set -e export AWS_PROFILE=personal +export AWS_REGION=eu-central-1 -docker buildx build --platform linux/amd64,linux/arm64 -t 853019563312.dkr.ecr.eu-central-1.amazonaws.com/micro-blog-fetchers-homelabstack:latest --push . +REPO_NAME="micro-blog-fetchers-homelabstack" -echo "Image pushed to ECR" +if ! aws ecr describe-repositories --repository-names "$REPO_NAME" >/dev/null 2>&1; then + aws ecr create-repository --repository-name "$REPO_NAME" +fi + +docker buildx build --platform linux/amd64,linux/arm64 -t "853019563312.dkr.ecr.eu-central-1.amazonaws.com/$REPO_NAME:latest" --push . + +echo "Docker image built and pushed to AWS ECR" diff --git a/old_node_src/bluesky.ts b/old_node_src/bluesky.ts deleted file mode 100644 index 98e62a3..0000000 --- a/old_node_src/bluesky.ts +++ /dev/null @@ -1,155 +0,0 @@ -import pino from "pino"; -import { MicroBlogBackend } from "./pocketbase"; - -// logger -const logger = pino(); - -// pocketbase -const pb = new MicroBlogBackend(logger); - -type Session = { - did: string; - accessJwt: string; - refreshJwt: string; -}; - -type BlueSkyPost = { - cid: string; - embed?: { - images: { - fullsize: string; - alt: string; - }[]; - }; - record: { - createdAt: string; // '2024-06-25T05:32:06.269Z', - // embed: { '$type': 'app.bsky.embed.images', images: [Array] }, - // facets: [ [Object] ], - text: string; - facets: { - features: { - $type: string; //"app.bsky.richtext.facet#tag",\n' + - tag?: string; // "cooking"\n' + - }[]; - }[]; - }; -}; - -const createSession = async (): Promise => { - const identifier = process.env.BLUE_SKY_USERNAME; - const apiKey = process.env.BLUE_SKY_API_KEY; - const body = JSON.stringify({ identifier, password: apiKey }); - const url = "https://bsky.social/xrpc/com.atproto.server.createSession"; - const res = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: body, - }); - const data = (await res.json()) as Session; - return data; -}; - -const limit = 10; -const getPostsUntilID = async ( - session: Session, - id: string, - cursor: string | null = null, - oldFeed: BlueSkyPost[] = [], -): Promise => { - const params = new URLSearchParams(); - params.append("actor", session.did); - params.append("limit", limit.toString()); - if (cursor) { - params.append("cursor", cursor); - } - - const urlWithParams = new URL( - "https://bsky.social/xrpc/app.bsky.feed.getAuthorFeed", - ); - urlWithParams.search = params.toString(); - - const res = await fetch(urlWithParams, { - headers: { - Accept: "application/json", - Authorization: `Bearer ${session.accessJwt}`, - }, - }); - const rawData = (await res.json()) as { - feed: { post: BlueSkyPost }[]; - cursor: string | null; - }; - const rawFeed = rawData.feed; - const feed = rawFeed.map((item) => item.post); - cursor = rawData?.cursor; - const filteredFeed = []; - for (const post of feed) { - if (post.cid === id) { - break; - } - filteredFeed.push(post); - } - - // the post id we are searching until is in the res so return - if (filteredFeed.length !== feed.length) { - return filteredFeed.concat(oldFeed); - } - // there are more posts to add before the id - if (feed.length === limit) { - return getPostsUntilID(session, id, cursor, oldFeed.concat(feed)); - } - - // the id was not found in the feed, return everything - return oldFeed.concat(feed); -}; - -const savePost = async (post: BlueSkyPost) => { - const postData = { - remoteId: post.cid, - posted: post.record.createdAt, - source: "blue_sky" as const, - fullPost: post, - authorId: "travisshears.bsky.social", - }; - return await pb.savePost(postData); -}; - -const saveTags = async (post: BlueSkyPost, postId: string) => { - for (const facet of post.record.facets) { - for (const feature of facet.features) { - if (feature.$type === "app.bsky.richtext.facet#tag") { - const tag = feature.tag; - if (tag) { - await pb.setTag(tag, postId); - } - } - } - } -}; - -const saveImages = async (post: BlueSkyPost, postId: string) => { - const images = post.embed?.images ?? []; - for (const image of images) { - await pb.saveAndSetImage( - { remoteURL: image.fullsize, alt: image.alt }, - postId, - ); - } -}; - -(async () => { - const session = await createSession(); - const lastSavedPostId = await pb.getLatestPostRemoteIDBySource("blue_sky"); - const posts = await getPostsUntilID(session, lastSavedPostId ?? ""); - posts.reverse(); // save the oldest post first so if we fail posts are not lost on the next run - if (posts.length === 0) { - logger.info("No new posts to save"); - } - for (const post of posts) { - logger.info({ post }, "saving post"); - const savedNewPost = await savePost(post); - await saveTags(post, savedNewPost.id); - await saveImages(post, savedNewPost.id); - } -})(); diff --git a/old_node_src/mastodon.ts b/old_node_src/mastodon.ts deleted file mode 100644 index ef14cc6..0000000 --- a/old_node_src/mastodon.ts +++ /dev/null @@ -1,103 +0,0 @@ -import pino from "pino"; -import { MicroBlogBackend } from "./pocketbase"; - -// custom destination formatter -const logger = pino(); - -const pb = new MicroBlogBackend(logger); - -const baseURL = process.env.MASTODON_BASE_URL; -const accountId = process.env.MASTODON_ACCOUNT_ID; - -type MastodonPost = { - media_attachments: { - type: string; //'image', - url: string; - description: string; // 'Blurry gate', - }[]; - id: string; - content: string; - account: { - id: string; - }; - created_at: string; - tags: { name: string }[]; -}; - -const getPostUntilId = async ({ - lastSavedId, - maxId, - carryPosts = [], -}: { - lastSavedId?: string; - maxId?: string; - carryPosts?: MastodonPost[]; -}): Promise => { - const params = new URLSearchParams(); - params.append("limit", "10"); - const urlWithParams = new URL( - `${baseURL}/api/v1/accounts/${accountId}/statuses`, - ); - urlWithParams.search = params.toString(); - - const res = await fetch(urlWithParams); - const posts = (await res.json()) as MastodonPost[]; - const containsId = posts.some((post) => post.id === lastSavedId); - - if (!containsId && posts.length >= 5) { - return getPostUntilId({ - lastSavedId, - carryPosts: carryPosts?.concat(posts), - maxId: posts[posts.length - 1]?.id, - }); - } - - const allPosts = carryPosts?.concat(posts).reverse(); - if (lastSavedId) { - const index = allPosts.findIndex((post) => post.id === lastSavedId); - return allPosts.slice(index + 1); - } - return allPosts; -}; - -const savePost = async (post: MastodonPost) => { - const postData = { - remoteId: post.id, - authorId: post.account.id, - posted: post.created_at, - source: "mastodon" as const, - fullPost: post, - }; - return await pb.savePost(postData); -}; - -const saveTags = async (post: MastodonPost, postId: string) => { - logger.info({ tags: post.tags }, "saving tags"); - for (const tag of post.tags) { - await pb.setTag(tag.name, postId); - } -}; - -const saveImages = async (post: MastodonPost, postId: string) => { - logger.info({ images: post.media_attachments }, "saving images"); - for (const image of post.media_attachments) { - await pb.saveAndSetImage( - { remoteURL: image.url, alt: image.description }, - postId, - ); - } -}; - -(async () => { - const lastSavedPostId = await pb.getLatestPostRemoteIDBySource("mastodon"); - console.log({ lastSavedPostId }); - const posts = await getPostUntilId({ lastSavedId: lastSavedPostId }); - console.log({ posts }); - - for (const post of posts) { - logger.info({ post }, "saving post"); - const savedNewPost = await savePost(post); - await saveTags(post, savedNewPost.id); - await saveImages(post, savedNewPost.id); - } -})(); diff --git a/old_node_src/nostr.ts b/old_node_src/nostr.ts deleted file mode 100644 index fea4ec3..0000000 --- a/old_node_src/nostr.ts +++ /dev/null @@ -1,106 +0,0 @@ -import pino from "pino"; -import WebSocket from "ws"; -import { MicroBlogBackend } from "./pocketbase"; - -const logger = pino(); -const pb = new MicroBlogBackend(logger); - -const fetcherNpub = process.env.NOSTR_FETCHER_NPUB; -const myNpub = process.env.NOSTR_ID; - -if (!fetcherNpub) { - throw new Error("NOSTR_FETCHER_NPUB is not set"); -} - -if (!myNpub) { - throw new Error("NOSTR_ID is not set"); -} - -type NostrTag = ["t" | "r" | "imeta", string]; -type NostrEvent = [ - "EVENT" | "EOSE", - string, - { - id: string; - pubkey: string; - created_at: number; - kind: number; // 1 - tags: NostrTag[]; - content: string; - sig: string; - }, -]; - -(async () => { - logger.info("Starting Nostr Fetcher"); - - // figure out when the last post of wasved - const lastSavedPost = await pb.getLatestPostBySource("nostr"); - if (!lastSavedPost) { - throw new Error("No last saved nostr post found"); - } - let since: number | undefined; - since = new Date(lastSavedPost.posted).getTime() / 1000; - logger.info( - { lastSavedPostId: lastSavedPost.id, since }, - "lastSavedPost nostr post", - ); - // listen for new events for 30 seconds - logger.info("trying to connecting to nostr relay"); - const relay = process.env.NOSTR_RELAY; - if (!relay) { - throw new Error("No NOSTR_RELAY environment variable found"); - } - - const events: NostrEvent[] = []; - const ws = new WebSocket(relay); - ws.on("error", logger.error); - ws.on("message", function message(data: Buffer) { - const decodedData = JSON.parse( - Buffer.from(data).toString("utf8"), - ) as NostrEvent; - logger.info({ decodedData }, "recived a message from nostr relay"); - if (decodedData[0] === "EVENT") { - events.push(decodedData); - } - }); - ws.on("open", function open() { - logger.info("connection established"); - ws.send( - JSON.stringify([ - "REQ", - fetcherNpub, - { kinds: [1], authors: [myNpub], ...(since ? { since } : {}) }, - ]), - ); - }); - await new Promise((resolve) => setTimeout(resolve, 30000)); - logger.info("closing connection to nostr relay"); - ws.close(); - logger.info({ count: events.length }, "saving nostr posts"); - for (const event of events) { - const post = await pb.savePost({ - remoteId: event[2].id, - fullPost: event[2], - posted: new Date(event[2].created_at * 1000).toISOString(), - source: "nostr", - authorId: event[1], - }); - - for (const tag of event[2].tags) { - if (tag[0] === "t") { - await pb.setTag(tag[1], post.id); - } else if (tag[0] === "imeta") { - const value = tag[1]; - // remove "url " from the start of the string - const url = value.slice(4); - await pb.saveAndSetImage( - { - remoteURL: url, - }, - post.id, - ); - } - } - } -})(); diff --git a/old_node_src/pixelfed.ts b/old_node_src/pixelfed.ts deleted file mode 100644 index a2ce146..0000000 --- a/old_node_src/pixelfed.ts +++ /dev/null @@ -1,107 +0,0 @@ -import pino from "pino"; -import { lambdaRequestTracker, pinoLambdaDestination } from "pino-lambda"; -import { MicroBlogBackend } from "./pocketbase"; - -// custom destination formatter -const destination = pinoLambdaDestination(); -const logger = pino({}, destination); -const withRequest = lambdaRequestTracker(); - -const baseURL = process.env.PIXELFED_BASE_URL; -const accountID = process.env.PIXELFED_ACCOUNT_ID; - -const pb = new MicroBlogBackend(logger); - -type PixelFedPost = { - media_attachments: { - type: string; //'image', - url: string; // 'https://gram.social/storage/m/_v2/703621281309160235/530d83cd3-f15549/FR41GdSiUQY0/bNHMrzuQkuhXKKfR1zG4HHcjFTe6G2YF02SOr2zi.jpg', - description: string; // 'Blurry gate', - }[]; - id: string; - content: string; - account: { - id: string; - }; - created_at: string; - tags: { name: string }[]; -}; - -const getPostUntilId = async ({ - lastSavedId, - maxId, - carryPosts = [], -}: { - lastSavedId?: string; - maxId?: string; - carryPosts?: PixelFedPost[]; -}): Promise => { - const params = new URLSearchParams(); - params.append("limit", "5"); - params.append("only_media", "true"); - if (maxId) { - params.append("max_id", maxId); - } - - const urlWithParams = new URL(`${baseURL}${accountID}/statuses`); - urlWithParams.search = params.toString(); - - const res = await fetch(urlWithParams.toString()); - const posts = (await res.json()) as PixelFedPost[]; - const containsId = posts.some((post) => post.id === lastSavedId); - - if (!containsId && posts.length >= 5) { - return getPostUntilId({ - lastSavedId, - carryPosts: carryPosts?.concat(posts), - maxId: posts[posts.length - 1]?.id, - }); - } - - const allPosts = carryPosts?.concat(posts).reverse(); - if (lastSavedId) { - const index = allPosts.findIndex((post) => post.id === lastSavedId); - return allPosts.slice(index + 1); - } - return allPosts; -}; - -const savePost = async (post: PixelFedPost) => { - const postData = { - remoteId: post.id, - authorId: post.account.id, - posted: post.created_at, - source: "pixelfed" as const, - fullPost: post, - }; - return await pb.savePost(postData); -}; - -const saveTags = async (post: PixelFedPost, postId: string) => { - logger.info({ tags: post.tags }, "saving tags"); - for (const tag of post.tags) { - await pb.setTag(tag.name, postId); - } -}; - -const saveImages = async (post: PixelFedPost, postId: string) => { - logger.info({ images: post.media_attachments }, "saving images"); - for (const image of post.media_attachments) { - await pb.saveAndSetImage( - { remoteURL: image.url, alt: image.description }, - postId - ); - } -}; - -exports.run = async (event: any, context: any) => { - withRequest(event, context); - const lastSavedPostId = await pb.getLatestPostRemoteIDBySource("pixelfed"); - const posts = await getPostUntilId({ lastSavedId: lastSavedPostId }); - for (const post of posts) { - logger.info({ post }, "saving post"); - const savedNewPost = await savePost(post); - await saveTags(post, savedNewPost.id); - await saveImages(post, savedNewPost.id); - } -}; diff --git a/old_node_src/pocketbase.ts b/old_node_src/pocketbase.ts deleted file mode 100644 index fed568a..0000000 --- a/old_node_src/pocketbase.ts +++ /dev/null @@ -1,223 +0,0 @@ -import type { Logger } from "pino"; -import PocketBase from "pocketbase"; - -export type MicroBlogPostImage = { - id: string; - collectionId: string; - image: string; - alt?: string; - remoteURL: string; -}; - -export type MicroBlogPostTag = { - id: string; - tag: string; -}; - -export type MicroBlogPostSource = - | "blue_sky" - | "mastodon" - | "pleroma" - | "pixelfed" - | "nostr"; - -export type MicroBlogPost = { - source: MicroBlogPostSource; - fullPost: any; - remoteId: string; - authorId: string; - id: string; - posted: string; - expand: { - images?: MicroBlogPostImage[]; - tags?: { - id: string; - }[]; - }; -}; - -export class MicroBlogBackend { - private pb: PocketBase; - private clientSetTime?: Date; - constructor(private logger: Logger) { - this.pb = new PocketBase(process.env.POCKET_BASE_HOST); - } - - private async login() { - const pw = process.env.POCKET_BASE_PW; - const userName = process.env.POCKET_BASE_USER; - if (!pw) { - this.logger.error("POCKET_BASE_PW env var not set"); - throw new Error("POCKET_BASE_PW env var not set"); - } - if (!userName) { - this.logger.error("POCKET_BASE_USER env var not set"); - throw new Error("POCKET_BASE_USER env var not set"); - } - this.logger.info({ userName }, "Logging in to pocketbase"); - await this.pb.collection("users").authWithPassword(userName, pw); - this.clientSetTime = new Date(); - } - - private async checkLogin() { - if (!this.clientSetTime) { - await this.login(); - return; - } - const now = new Date(); - const diff = now.getTime() - this.clientSetTime.getTime(); - const day = 86_400_000; - - if (diff > day) { - await this.login(); - return; - } - } - - public async getLatestPostBySource( - postSource: MicroBlogPostSource, - ): Promise { - await this.checkLogin(); - try { - const post = await this.pb - .collection("micro_blog_posts") - .getFirstListItem(`source = '${postSource}'`, { sort: "-posted" }); - return post; - } catch (error: any) { - if (error.status === 404) { - return undefined; - } - throw error; - } - } - - public async getLatestPostRemoteIDBySource(postSource: MicroBlogPostSource) { - await this.checkLogin(); - const post = await this.getLatestPostBySource(postSource); - return post?.remoteId; - } - - async getTag(tag: string): Promise { - await this.checkLogin(); - try { - const remoteTag = await this.pb - .collection("micro_blog_tags") - .getFirstListItem(`tag = '${tag}'`); - return remoteTag; - } catch (e: any) { - if (e.status === 404) { - return undefined; - } - throw e; - } - } - - async getImageByRemoteURL( - remoteURL: string, - ): Promise { - await this.checkLogin(); - try { - const remoteImage = await this.pb - .collection("micro_blog_images") - .getFirstListItem(`remoteURL = '${remoteURL}'`); - return remoteImage; - } catch (e: any) { - if (e.status === 404) { - return undefined; - } - throw e; - } - } - - private async checkForPost( - remoteId: string, - ): Promise { - await this.checkLogin(); - try { - return await this.pb - .collection("micro_blog_posts") - .getFirstListItem(`remoteId = '${remoteId}'`); - } catch (e: any) { - if (e.status === 404) { - return undefined; - } - throw e; - } - } - - public async savePost( - post: Omit, - ): Promise { - await this.checkLogin(); - const existingPost = await this.checkForPost(post.remoteId); - if (!existingPost) { - return await this.pb - .collection("micro_blog_posts") - .create(post); - } - this.logger.info({ existingPost }, "Found existing post"); - return existingPost; - } - - public async setTag(rawTag: string, postId: string) { - await this.checkLogin(); - let tag = await this.getTag(rawTag); - if (!tag) { - tag = await this.pb - .collection("micro_blog_tags") - .create({ tag: rawTag }); - } - if (!tag) { - throw new Error("Failed to create tag"); - } - await this.pb.collection("micro_blog_posts").update(postId, { - "tags+": tag.id, - }); - } - - public async saveAndSetImage( - imageToSave: Omit, - postId: string, - ) { - await this.checkLogin(); - let image = await this.getImageByRemoteURL(imageToSave.remoteURL); - if (!image) { - const imageResponse = await fetch(imageToSave.remoteURL); - if (!imageResponse.ok) { - throw new Error("Failed to download image"); - } - const imageBlob = await imageResponse.blob(); - const imageFile = new File([imageBlob], "image.jpg", { - type: imageBlob.type, - }); - const data = { - ...imageToSave, - image: imageFile, - }; - image = await this.pb - .collection("micro_blog_images") - .create(data); - this.logger.info({ image }, "Created image"); - } - if (!image) { - throw new Error("Failed to create image"); - } - const res = await this.pb.collection("micro_blog_posts").update(postId, { - "images+": image.id, - }); - this.logger.info({ res }, "Updated post with image"); - } - - public async getPosts(page: number, limit = 20) { - await this.checkLogin(); - const resultList = await this.pb - .collection("micro_blog_posts") - .getList(page, limit, { - sort: "-posted", - expand: "images,tags", - filter: `(source = "blue_sky" || source = "pleroma")`, - // filter: 'source = "pleroma"', - }); - return resultList; - } -} diff --git a/src/micro_blog/blue_sky.clj b/src/micro_blog/blue_sky.clj index d491dac..08ff276 100644 --- a/src/micro_blog/blue_sky.clj +++ b/src/micro_blog/blue_sky.clj @@ -47,7 +47,7 @@ (defn get-posts-until-id ([session id] (get-posts-until-id session id nil [])) ([session id cursor prev-posts] - (tel/log! {:level :info :data {:postId (:remoteId id)}} "Getting posts until id") + (tel/log! {:level :info :data {:id id, :cursor cursor, :prev-posts (count prev-posts)}} "Getting posts until id") (let [limit 5 body (-> (http-client/get (str (@config :blue-sky-host) "/app.bsky.feed.getAuthorFeed") diff --git a/src/micro_blog/pocket_base.clj b/src/micro_blog/pocket_base.clj index 515ac85..ec4bd3b 100644 --- a/src/micro_blog/pocket_base.clj +++ b/src/micro_blog/pocket_base.clj @@ -190,17 +190,21 @@ (if (post-with-remote-id-already-saved? (:remoteId post)) (tel/log! {:level :warn :data {:remoteId (:remoteId post)}} "Post already saved, skipping") (try - (http-client/post (str (@config :pocket-base-host) "/api/collections/micro_blog_posts/records") - {:headers {"Authorization" (get-login-token-with-cache)} - :form-params (assoc post - :tags (map get-tag-id (:tags post)) - :images (map #(apply get-image-id %) (:images post))) - :content-type :json - :as :json}) + (let [tag-ids (doall (map get-tag-id (:tags post))) + image-ids (doall (map #(apply get-image-id %) (:images post)))] + (http-client/post (str (@config :pocket-base-host) "/api/collections/micro_blog_posts/records") + {:headers {"Authorization" (get-login-token-with-cache)} + :form-params (assoc post + :source (name (:source post)) + :isTech (name (:isTech post)) + :tags tag-ids + :images image-ids) + :content-type :json + :as :json})) (catch Exception e (tel/log! {:level :error :id :pocket-base/save-post-error - :data {:remoteId (:remoteId post) + :data {:post post :error (.getMessage e) :exception e}} "Error saving post to pocketbase")