init blue sky scraping
This commit is contained in:
parent
d72eb668c8
commit
c247774256
4 changed files with 171 additions and 1 deletions
83
src/micro_blog/blue_sky.clj
Normal file
83
src/micro_blog/blue_sky.clj
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
(ns micro-blog.blue-sky
|
||||
(:require
|
||||
[clj-http.client :as http-client]
|
||||
[micro-blog.pocket-base :as pb]
|
||||
[micro-blog.utils :as utils]
|
||||
[malli.core :as m]
|
||||
[clojure.pprint :refer [pprint]]
|
||||
[micro-blog.config :refer [config]]))
|
||||
|
||||
(defn create-session []
|
||||
(let [identifier (@config :blue-sky-username)
|
||||
api-key (@config :blue-sky-api-key)
|
||||
body {:identifier identifier :password api-key}
|
||||
url (str (@config :blue-sky-host) "/com.atproto.server.createSession")
|
||||
res-schema [:map
|
||||
[:did string?]
|
||||
[:accessJwt string?]]]
|
||||
(-> (http-client/post url
|
||||
{:form-params body
|
||||
:content-type :json
|
||||
:as :json})
|
||||
:body
|
||||
(utils/validate-with-throw res-schema)
|
||||
(#(assoc % :access-jwt (:accessJwt %)))
|
||||
(select-keys [:did :access-jwt]))))
|
||||
|
||||
(def post-res-schema [:map
|
||||
[:cursor [:maybe :string]]
|
||||
[:feed [:vector
|
||||
[:map [:post [:map
|
||||
[:cid :string]
|
||||
[:author [:map
|
||||
[:handle :string]]]
|
||||
|
||||
[:record [:map
|
||||
[:createdAt :string]]]]]]]]])
|
||||
|
||||
(defn get-posts-until-id
|
||||
([session id] (get-posts-until-id session id nil []))
|
||||
([session id cursor prev-posts]
|
||||
(let [limit 5
|
||||
body
|
||||
(-> (http-client/get (str (@config :blue-sky-host) "/app.bsky.feed.getAuthorFeed")
|
||||
{:headers {"Authorization" (str "Bearer " (session :access-jwt))}
|
||||
:query-params (cond-> {:actor (:did session)
|
||||
:limit limit}
|
||||
cursor (assoc :cursor cursor))
|
||||
:content-type :json
|
||||
:as :json})
|
||||
:body
|
||||
(utils/validate-with-throw post-res-schema))
|
||||
posts (map :post (:feed body))
|
||||
new-cursor (:cursor body)
|
||||
new-posts (take-while #(not= (:cid %) id) posts)
|
||||
new-and-prev-posts (concat new-posts prev-posts)]
|
||||
(cond
|
||||
;; end of posts
|
||||
(not= (count posts) limit) new-and-prev-posts
|
||||
;; found post
|
||||
(some #(= id (:cid %)) posts) new-and-prev-posts
|
||||
;; recur
|
||||
:else (recur session id new-cursor new-and-prev-posts)))))
|
||||
|
||||
;; TODO: create post
|
||||
;; const data = {
|
||||
;; "remoteId": "test",
|
||||
;; "authorId": "test",
|
||||
;; "posted": "2022-01-01 10:00:00.123Z",
|
||||
;; "source": "pleroma",
|
||||
;; "tags": [
|
||||
;; "RELATION_RECORD_ID"
|
||||
;; ],
|
||||
;; "fullPost": "JSON",
|
||||
;; "images": [
|
||||
;; "RELATION_RECORD_ID"
|
||||
;; ]
|
||||
;; };
|
||||
|
||||
(defn run []
|
||||
(let [session (create-session)
|
||||
last-saved-id (pb/get-latest-post-remote-id-by-source :blue_sky)
|
||||
new-posts (get-posts-until-id session last-saved-id)]
|
||||
{:session session :last-saved-id last-saved-id :new-posts new-posts}))
|
||||
Loading…
Add table
Add a link
Reference in a new issue