micro_blog_repo_fetchers/old_node_src/bluesky.ts

155 lines
4 KiB
TypeScript

import pino from "pino";
import { MicroBlogBackend } from "./pocketbase";
// logger
const logger = pino();
// pocketbase
const pb = new MicroBlogBackend(logger);
type Session = {
did: string;
accessJwt: string;
refreshJwt: string;
};
type BlueSkyPost = {
cid: string;
embed?: {
images: {
fullsize: string;
alt: string;
}[];
};
record: {
createdAt: string; // '2024-06-25T05:32:06.269Z',
// embed: { '$type': 'app.bsky.embed.images', images: [Array] },
// facets: [ [Object] ],
text: string;
facets: {
features: {
$type: string; //"app.bsky.richtext.facet#tag",\n' +
tag?: string; // "cooking"\n' +
}[];
}[];
};
};
const createSession = async (): Promise<Session> => {
const identifier = process.env.BLUE_SKY_USERNAME;
const apiKey = process.env.BLUE_SKY_API_KEY;
const body = JSON.stringify({ identifier, password: apiKey });
const url = "https://bsky.social/xrpc/com.atproto.server.createSession";
const res = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: body,
});
const data = (await res.json()) as Session;
return data;
};
const limit = 10;
const getPostsUntilID = async (
session: Session,
id: string,
cursor: string | null = null,
oldFeed: BlueSkyPost[] = [],
): Promise<BlueSkyPost[]> => {
const params = new URLSearchParams();
params.append("actor", session.did);
params.append("limit", limit.toString());
if (cursor) {
params.append("cursor", cursor);
}
const urlWithParams = new URL(
"https://bsky.social/xrpc/app.bsky.feed.getAuthorFeed",
);
urlWithParams.search = params.toString();
const res = await fetch(urlWithParams, {
headers: {
Accept: "application/json",
Authorization: `Bearer ${session.accessJwt}`,
},
});
const rawData = (await res.json()) as {
feed: { post: BlueSkyPost }[];
cursor: string | null;
};
const rawFeed = rawData.feed;
const feed = rawFeed.map((item) => item.post);
cursor = rawData?.cursor;
const filteredFeed = [];
for (const post of feed) {
if (post.cid === id) {
break;
}
filteredFeed.push(post);
}
// the post id we are searching until is in the res so return
if (filteredFeed.length !== feed.length) {
return filteredFeed.concat(oldFeed);
}
// there are more posts to add before the id
if (feed.length === limit) {
return getPostsUntilID(session, id, cursor, oldFeed.concat(feed));
}
// the id was not found in the feed, return everything
return oldFeed.concat(feed);
};
const savePost = async (post: BlueSkyPost) => {
const postData = {
remoteId: post.cid,
posted: post.record.createdAt,
source: "blue_sky" as const,
fullPost: post,
authorId: "travisshears.bsky.social",
};
return await pb.savePost(postData);
};
const saveTags = async (post: BlueSkyPost, postId: string) => {
for (const facet of post.record.facets) {
for (const feature of facet.features) {
if (feature.$type === "app.bsky.richtext.facet#tag") {
const tag = feature.tag;
if (tag) {
await pb.setTag(tag, postId);
}
}
}
}
};
const saveImages = async (post: BlueSkyPost, postId: string) => {
const images = post.embed?.images ?? [];
for (const image of images) {
await pb.saveAndSetImage(
{ remoteURL: image.fullsize, alt: image.alt },
postId,
);
}
};
(async () => {
const session = await createSession();
const lastSavedPostId = await pb.getLatestPostRemoteIDBySource("blue_sky");
const posts = await getPostsUntilID(session, lastSavedPostId ?? "");
posts.reverse(); // save the oldest post first so if we fail posts are not lost on the next run
if (posts.length === 0) {
logger.info("No new posts to save");
}
for (const post of posts) {
logger.info({ post }, "saving post");
const savedNewPost = await savePost(post);
await saveTags(post, savedNewPost.id);
await saveImages(post, savedNewPost.id);
}
})();