init blue sky scraping
This commit is contained in:
parent
d72eb668c8
commit
c247774256
4 changed files with 171 additions and 1 deletions
|
|
@ -1,3 +1,10 @@
|
||||||
{:mistral-api-key "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
{:mistral-api-key "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||||
:mistral-agent-id "ag:xxxxxxxx:xxxxxxxx:xxxxxxxxxxxxxxxxxxx:xxxxxxxx"
|
:mistral-agent-id "ag:xxxxxxxx:xxxxxxxx:xxxxxxxxxxxxxxxxxxx:xxxxxxxx"
|
||||||
:mistral-host "https://api.mistral.ai"}
|
:mistral-host "https://api.mistral.ai"
|
||||||
|
|
||||||
|
:pocket-base-pw "xxxxxxxx"
|
||||||
|
:pocket-base-user "xxxxxxxxxxxxxxxxxxx"
|
||||||
|
:pocket-base-host "xxxxxxxxxxxxx"
|
||||||
|
|
||||||
|
:blue-sky-api-key "xxxxxxxxxxxxxxxxxxx"
|
||||||
|
:blue-sky-username "coolguy.bsky.social"}
|
||||||
|
|
|
||||||
83
src/micro_blog/blue_sky.clj
Normal file
83
src/micro_blog/blue_sky.clj
Normal file
|
|
@ -0,0 +1,83 @@
|
||||||
|
(ns micro-blog.blue-sky
|
||||||
|
(:require
|
||||||
|
[clj-http.client :as http-client]
|
||||||
|
[micro-blog.pocket-base :as pb]
|
||||||
|
[micro-blog.utils :as utils]
|
||||||
|
[malli.core :as m]
|
||||||
|
[clojure.pprint :refer [pprint]]
|
||||||
|
[micro-blog.config :refer [config]]))
|
||||||
|
|
||||||
|
(defn create-session []
|
||||||
|
(let [identifier (@config :blue-sky-username)
|
||||||
|
api-key (@config :blue-sky-api-key)
|
||||||
|
body {:identifier identifier :password api-key}
|
||||||
|
url (str (@config :blue-sky-host) "/com.atproto.server.createSession")
|
||||||
|
res-schema [:map
|
||||||
|
[:did string?]
|
||||||
|
[:accessJwt string?]]]
|
||||||
|
(-> (http-client/post url
|
||||||
|
{:form-params body
|
||||||
|
:content-type :json
|
||||||
|
:as :json})
|
||||||
|
:body
|
||||||
|
(utils/validate-with-throw res-schema)
|
||||||
|
(#(assoc % :access-jwt (:accessJwt %)))
|
||||||
|
(select-keys [:did :access-jwt]))))
|
||||||
|
|
||||||
|
(def post-res-schema [:map
|
||||||
|
[:cursor [:maybe :string]]
|
||||||
|
[:feed [:vector
|
||||||
|
[:map [:post [:map
|
||||||
|
[:cid :string]
|
||||||
|
[:author [:map
|
||||||
|
[:handle :string]]]
|
||||||
|
|
||||||
|
[:record [:map
|
||||||
|
[:createdAt :string]]]]]]]]])
|
||||||
|
|
||||||
|
(defn get-posts-until-id
|
||||||
|
([session id] (get-posts-until-id session id nil []))
|
||||||
|
([session id cursor prev-posts]
|
||||||
|
(let [limit 5
|
||||||
|
body
|
||||||
|
(-> (http-client/get (str (@config :blue-sky-host) "/app.bsky.feed.getAuthorFeed")
|
||||||
|
{:headers {"Authorization" (str "Bearer " (session :access-jwt))}
|
||||||
|
:query-params (cond-> {:actor (:did session)
|
||||||
|
:limit limit}
|
||||||
|
cursor (assoc :cursor cursor))
|
||||||
|
:content-type :json
|
||||||
|
:as :json})
|
||||||
|
:body
|
||||||
|
(utils/validate-with-throw post-res-schema))
|
||||||
|
posts (map :post (:feed body))
|
||||||
|
new-cursor (:cursor body)
|
||||||
|
new-posts (take-while #(not= (:cid %) id) posts)
|
||||||
|
new-and-prev-posts (concat new-posts prev-posts)]
|
||||||
|
(cond
|
||||||
|
;; end of posts
|
||||||
|
(not= (count posts) limit) new-and-prev-posts
|
||||||
|
;; found post
|
||||||
|
(some #(= id (:cid %)) posts) new-and-prev-posts
|
||||||
|
;; recur
|
||||||
|
:else (recur session id new-cursor new-and-prev-posts)))))
|
||||||
|
|
||||||
|
;; TODO: create post
|
||||||
|
;; const data = {
|
||||||
|
;; "remoteId": "test",
|
||||||
|
;; "authorId": "test",
|
||||||
|
;; "posted": "2022-01-01 10:00:00.123Z",
|
||||||
|
;; "source": "pleroma",
|
||||||
|
;; "tags": [
|
||||||
|
;; "RELATION_RECORD_ID"
|
||||||
|
;; ],
|
||||||
|
;; "fullPost": "JSON",
|
||||||
|
;; "images": [
|
||||||
|
;; "RELATION_RECORD_ID"
|
||||||
|
;; ]
|
||||||
|
;; };
|
||||||
|
|
||||||
|
(defn run []
|
||||||
|
(let [session (create-session)
|
||||||
|
last-saved-id (pb/get-latest-post-remote-id-by-source :blue_sky)
|
||||||
|
new-posts (get-posts-until-id session last-saved-id)]
|
||||||
|
{:session session :last-saved-id last-saved-id :new-posts new-posts}))
|
||||||
71
src/micro_blog/pocket_base.clj
Normal file
71
src/micro_blog/pocket_base.clj
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
(ns micro-blog.pocket-base
|
||||||
|
(:require
|
||||||
|
[clojure.pprint :refer [pprint]]
|
||||||
|
[clojure.string :as str]
|
||||||
|
[clj-http.client :as http-client]
|
||||||
|
[malli.core :as m]
|
||||||
|
[micro-blog.config :refer [config]]))
|
||||||
|
|
||||||
|
(defonce token-cache ^:private (atom {:token nil :fetched-at nil}))
|
||||||
|
(defn- now []
|
||||||
|
(java.time.Instant/now))
|
||||||
|
|
||||||
|
(defn older-then-a-day? [fetched-at]
|
||||||
|
(when fetched-at
|
||||||
|
(let [duration (java.time.Duration/between fetched-at (now))]
|
||||||
|
(> (.toHours duration) 23)))) ; 23 to be safe, or use 24
|
||||||
|
|
||||||
|
(defn get-login-token []
|
||||||
|
(let [user-name (@config :pocket-base-user)
|
||||||
|
pw (@config :pocket-base-pw)
|
||||||
|
body {:identity user-name :password pw}
|
||||||
|
url (str (@config :pocket-base-host) "/api/collections/users/auth-with-password")]
|
||||||
|
(->
|
||||||
|
(http-client/post url
|
||||||
|
{:form-params body
|
||||||
|
:content-type :json
|
||||||
|
:as :json})
|
||||||
|
:body
|
||||||
|
:token)))
|
||||||
|
|
||||||
|
(defn get-login-token-with-cache []
|
||||||
|
(let [{:keys [token fetched-at]} @token-cache]
|
||||||
|
(if (and token (not (older-then-a-day? fetched-at)))
|
||||||
|
token
|
||||||
|
(let [new-token (get-login-token)]
|
||||||
|
(println "Getting new login token")
|
||||||
|
(reset! token-cache {:token new-token :fetched-at (now)})
|
||||||
|
new-token))))
|
||||||
|
|
||||||
|
(def sources #{:pleroma :blue_sky :mastodon :pixelfed :nostr})
|
||||||
|
(defn valid-source? [source]
|
||||||
|
(contains? sources source))
|
||||||
|
|
||||||
|
(defn get-latest-post-remote-id-by-source [source]
|
||||||
|
(let [res-schema
|
||||||
|
[:map
|
||||||
|
[:items
|
||||||
|
[:vector
|
||||||
|
[:map
|
||||||
|
[:id string?]
|
||||||
|
[:remoteId string?]]]]]]
|
||||||
|
(when (not (valid-source? source))
|
||||||
|
(throw (ex-info "Invalid source" {:source source})))
|
||||||
|
(as->
|
||||||
|
(http-client/get (str (@config :pocket-base-host) "/api/collections/micro_blog_posts/records")
|
||||||
|
{:headers {"Authorization" (get-login-token-with-cache)}
|
||||||
|
:query-params {:page 1
|
||||||
|
"perPage" 1
|
||||||
|
:sort "-posted"
|
||||||
|
:filter (str "source = '" (name source) "'")
|
||||||
|
:fields (str/join "," ["remoteId" "id"])
|
||||||
|
"skipTotal" true}
|
||||||
|
:content-type :json
|
||||||
|
:as :json}) x
|
||||||
|
(:body x)
|
||||||
|
(if (m/validate res-schema x)
|
||||||
|
x
|
||||||
|
(do
|
||||||
|
(m/explain res-schema x)
|
||||||
|
(throw (ex-info "Res does not follow schema" {:res x}))))
|
||||||
|
(-> x :items first :remoteId))))
|
||||||
9
src/micro_blog/utils.clj
Normal file
9
src/micro_blog/utils.clj
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
(ns micro-blog.utils
|
||||||
|
(:require [malli.core :as m]))
|
||||||
|
|
||||||
|
(defn validate-with-throw [value schema]
|
||||||
|
(if (m/validate schema value)
|
||||||
|
value
|
||||||
|
(do
|
||||||
|
(m/explain schema value)
|
||||||
|
(throw (ex-info "Res does not follow schema" {:value value})))))
|
||||||
Loading…
Add table
Add a link
Reference in a new issue