Files
browser-input/src/modules/facebook.ts
2025-10-17 00:14:13 +07:00

1026 lines
42 KiB
TypeScript

// facebook.ts
import { postgres } from "bun";
import * as model from "../model";
import { NewFact } from "../model";
import { turndownService } from "../utils/turndown";
const ELEMENT_SELECTORS = {
profile: {
displayName: 'span[dir="auto"] > h1[class^="html-h1"]',
profilePicture: '[role="main"] > div > div:first-of-type image',
followerCount: 'a[href*="followers"] > strong',
followingCount: 'a[href*="following"] > strong',
bio: 'div[role="main"] > div:last-child > div:last-child > div > div > div:nth-child(2) > div > div > div > div > div > div > div:last-child'
},
post: {
searchPageContainer: 'div[role="main"] > div:last-child > div:last-child > div > div:last-child > div:last-child > div',
container:
'div[role="main"] > div:last-child > div:last-child > div > div:last-child > div:last-child',
postURL: 'a[href*="/posts/"], a[href*="/reels/"], a[href*="/videos/"]',
},
postFromProfile: {
postUrl: 'span[dir="ltr"] > div > span a[attributionsrc]',
profileName: 'a[role="link"] > b[class*="html-b"]',
profileUrl: 'h3 a[role="link"]',
caption: 'div[data-ad-rendering-role="story_message"]',
photoUrl: 'a[attributionsrc][href*="/photo/"] img',
containerInteractions:
'div[data-visualcompletion="ignore-dynamic"] > div > div > div > div:first-child',
like: 'div > div > div:first-child span[class*="html-span"] span[aria-hidden="true"]',
comment: 'div > div > div:nth-of-type(2) > div:nth-of-type(2) span[class*="html-span"] span[dir="auto"]',
share: 'div > div > div:nth-of-type(2) > div:nth-of-type(3) span[class*="html-span"] span[dir="auto"]',
seeMoreBtn: 'div[data-ad-comet-preview="message"] div[dir="auto"] div[role="button"]',
},
reelFromProfile: {
postUrl: 'div[class*="html-div "] a[attributionsrc]',
profileName: 'h3[dir="auto"][class*="html-h3"] a[role="link"][attributionsrc]',
profileUrl: 'h3[dir="auto"][class*="html-h3"] a[role="link"]',
caption: 'div[class*="html-div "] a[attributionsrc] span[dir="auto"]',
containerInteractions: 'div[class*="html-div "] a[attributionsrc] > div:nth-of-type(2)',
interactionList: 'div > div > div > div > div',
seeMoreBtn: 'div[class*="html-div "] a[attributionsrc] span[dir="auto"] > div > object > div[role="button"]',
date: 'span[dir="auto"] > span > span:not([class]) > span:nth-of-type(2):not([aria-hidden])'
},
detailPost: {
container: 'div[role="dialog"]:nth-of-type(2)',
displayName: 'a[attributionsrc][role="link"] > b[class*="html-b"]',
profile: 'a[attributionsrc][role="link"]',
caption: 'div[data-ad-preview="message"]',
photoUrls: 'a[attributionsrc][href*="/photo/"] img',
reactionContainer: 'div[data-visualcompletion="ignore-dynamic"] > div > div > div > div:first-child',
likeCount: 'div > div > div:first-child span[class*="html-span"] span[aria-hidden="true"]',
commentCount: 'div > div > div:nth-of-type(2) > div:nth-of-type(2) span[class*="html-span"] span[dir="auto"]',
shareCount: 'div > div > div:nth-of-type(2) > div:nth-of-type(3) span[class*="html-span"] span[dir="auto"]',
},
commentDetailPost: {
container: 'div > div > div > div > div div[data-virtualized="false"] > div > div > div',
section: 'div[data-virtualized="false"]',
commentURL: 'a[attributionsrc][role="link"][href*="/posts/"]',
profileURL: 'a[attributionsrc]',
displayName: 'a[attributionsrc] > span > span[dir="auto"]',
profileImageUrl: 'a[attributionsrc] image',
content: 'span[dir="auto"][lang]',
likeCount: 'span[class^="html-span"] > div[role="button"][aria-label*=";"]',
replyCount: 'span[class^="html-span"] > span[dir="auto"]',
},
commentDetailReels: {
container: 'div[role="complementary"] div:not([class]) > div:last-of-type > div > div',
section: ':scope > div[data-virtualized="false"]',
seeMoreBtn: 'div[dir="auto"] > div[role="button"]',
displayName: 'div[role="article"] a[href][tabindex="0"]',
content: 'span[dir="auto"][lang]',
likeCount: 'div[role="button"][tabindex="0"] > div[class^="html-div"] > span',
replyCount: 'div[role=button] > span > span[dir="auto"]',
displayDate: 'li > span > div > a',
},
detailReels: {
container: 'div[role="main"]',
displayName: 'h2[dir="auto"] a',
caption: 'div > span[dir="auto"] > div',
seeMoreBtn: 'div[role="button"]',
reactionContainer: 'div > div > div > div:nth-child(2) > div:nth-child(2) > div > div > div',
},
detailVideos: {
// container: '[role="main"]',
videoSection: '[role="main"]',
contenVideoSection: '[role="complementary"]',
profileUrl: '[data-ad-rendering-role="profile_name"] a',
caption: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(1) > div:nth-child(3) > div:nth-child(1)',
title: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(1) > div:nth-child(2) > div > div > div > span',
seeMoreBtn: '[dir="auto"] > [role="button"]',
likeCount: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(2) > div:nth-child(1) > div > div:nth-child(1) > div > span > div > span:nth-child(2) > span > span',
commentCount: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(2) > div:nth-child(1) > div > div:nth-child(2) > div:nth-child(1) > span > div > div > div:nth-child(1) > span > span',
viewCount: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(2) > div:nth-child(1) > div > div:nth-child(2) > div:nth-child(2) > span > span > div > div:nth-child(1) > span'
},
commentDetailVideo: {
container: '[role="complementary"] div[style="height: auto;"]',
section: 'div[data-virtualized="false"]',
commentUrl: 'a[href*="comment_id"]',
displayName: 'a[href*="comment_id"][aria-hidden="false"]',
content: '[dir="auto"][lang]',
likeCount: 'span > [role="button"][aria-label][tabindex="0"] > div > span',
commentCount: '[role="button"][ tabindex="0"] > span > span[dir="auto"]',
}
};
export default class FacebookModule {
context = window.location.href;
private markSaved(el?: Element | null) {
el?.classList.add("saved");
}
private isSaved(el?: Element | null) {
if (!el || el === document.body) return false;
return el.classList.contains("saved");
}
tagElement(): void {
}
async saveData(): Promise<model.FactCollection[]> {
const out: model.FactCollection[] = [];
if (window.location.href.includes("/posts")) {
const post = this.saveDetailPost();
if (post) out.push(post);
const comments = this.saveCommentPosts(
document.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailPost.container) || undefined
);
if (comments.length) out.push(...comments);
} else if (window.location.href.includes("/reel")) {
const reel = await this.saveDetailReels();
if (reel) out.push(reel);
const c = await this.saveCommentReels(
document.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailReels.container) || undefined
);
if (c.length) out.push(...c);
} else if (window.location.href.includes("/search/")) {
const posts = await this.saveAllPost();
if (posts && posts.facts && posts.facts.length > 0) out.push(posts);
} else if (window.location.href.includes("/videos")) {
const post = this.saveDetailVideo();
if (post) out.push(post);
const comments = this.saveCommentVideo()
if (comments.length) out.push(...comments);
} else {
const profile = this.saveProfile();
if (profile) out.push(profile);
const posts = await this.saveAllPost();
if (posts && posts.facts && posts.facts.length > 0) out.push(posts);
}
return out;
}
saveDetailVideo(): model.FactCollection {
const Post: model.FactCollection = model.NewFactCollection('post');
const context = window.location.href;
// const videoSection = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.videoSection);
const contenVideoSection = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.contenVideoSection);
const postUrl = window.location.href;
const profileUrl = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.profileUrl)?.getAttribute('href') || '';
const displayName = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.profileUrl)?.textContent || '';
const caption = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.caption)?.textContent || '';
const titleContent = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.title)?.textContent || '';
const contentTurndown = turndownService.turndown(`${titleContent} \n ${caption}`);
const seeMoreBtn = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.seeMoreBtn)?.textContent || '';
// Reactions
const likeCount = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.likeCount)?.textContent || "0";
const commentCount = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.commentCount)?.textContent || "0";
const viewCount = contenVideoSection?.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailVideos.viewCount)?.textContent || "0";
Post.facts.push(NewFact('fb_post_source_url', 'url', postUrl, postUrl));
Post.facts.push(NewFact('fb_post_profile_url', 'url', profileUrl, context));
Post.facts.push(NewFact('fb_post_display_name', 'string', displayName, context));
Post.facts.push(NewFact('fb_post_content', 'string', contentTurndown, context));
Post.facts.push(NewFact('fb_post_see_more_btn', 'string', seeMoreBtn, context));
Post.facts.push(NewFact('fb_post_like_count', 'string', this.parseCount(likeCount), context));
Post.facts.push(NewFact('fb_post_comment_count', 'string', this.parseCount(commentCount), context));
Post.facts.push(NewFact('fb_post_view_count', 'string', this.parseCount(viewCount), context));
return Post;
}
saveCommentVideo(): model.FactCollection[] {
const container = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailVideo.container);
const sections = container?.querySelectorAll<HTMLElement>(ELEMENT_SELECTORS.commentDetailVideo.section);
const comments = model.NewFactCollection("comment");
const extractComment = async (section: HTMLElement): Promise<void> => {
// Dapatkan href dengan fallback ke string kosong jika undefined
const commentUrlEl = section
.querySelector<HTMLAnchorElement>(ELEMENT_SELECTORS.commentDetailVideo.commentUrl)
?.getAttribute("href")
?.split("&__tn_")[0] ?? "";
// Karena commentUrlEl sekarang string (meskipun kosong), profileUrl pasti bisa diproses
const profileUrl = commentUrlEl
.split("?comment_id=")[0]
.split("&comment_id=")[0];
const displayName = section
.querySelector<HTMLAnchorElement>(ELEMENT_SELECTORS.commentDetailVideo.displayName)
?.textContent
?.trim() ?? "";
const content = section
.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailVideo.content)
?.textContent
?.trim() ?? "";
const likeCount = section
.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailVideo.likeCount)
?.textContent
?.trim() ?? "0";
const commentCount = section
.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailVideo.commentCount)
?.textContent
?.trim() ?? "0";
// Sekarang kita bisa memanggil NewFact tanpa error tipe
comments.facts.push(
NewFact("fb_comment_source_url", "url", commentUrlEl, window.location.href)
);
comments.facts.push(
NewFact("fb_comment_profile_url", "url", profileUrl, window.location.href)
);
comments.facts.push(
NewFact("fb_comment_display_name", "string", displayName, window.location.href)
);
comments.facts.push(
NewFact("fb_comment_content", "string", content, window.location.href)
);
comments.facts.push(
NewFact("fb_comment_like_count", "string", this.parseCount(likeCount), window.location.href)
);
comments.facts.push(
NewFact("fb_comment_comment_count", "string", this.parseCount(commentCount), window.location.href)
);
};
sections?.forEach((section) => {
// Skip if section is already saved
if (this.isSaved(section)) return;
extractComment(section);
})
return [comments];
}
saveCommentPosts(scope?: HTMLElement): model.FactCollection[] {
if (!scope) return [];
const container = scope.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailPost.container);
if (!container) return [];
const sections = container.querySelectorAll<HTMLElement>(ELEMENT_SELECTORS.commentDetailPost.section);
const facts = model.NewFactCollection("comment");
sections.forEach((section) => {
// Skip if section is already saved
if (this.isSaved(section)) return;
const commentUrlEl = section.querySelector<HTMLAnchorElement>(ELEMENT_SELECTORS.commentDetailPost.commentURL);
const cleanUrl = commentUrlEl?.href ? commentUrlEl.href.split("&__cft__[0]")[0] : undefined;
const contextUrl = cleanUrl || this.context || window.location.href;
if (cleanUrl) {
facts.facts.push(NewFact("fb_comment_source_url", "url", cleanUrl, window.location.href));
this.context = cleanUrl;
}
const authorEl = section.querySelector<HTMLAnchorElement>(ELEMENT_SELECTORS.commentDetailPost.profileURL);
if (authorEl) {
const name = authorEl.textContent?.trim() || "";
const url = authorEl.href?.split("?")[0] || "";
if (name) facts.facts.push(NewFact("fb_comment_display_name", "name", name, contextUrl));
if (url) facts.facts.push(NewFact("fb_comment_profile_url", "url", url, contextUrl));
}
const bodyEl = section.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailPost.content);
if (bodyEl) {
const body = turndownService.turndown(bodyEl.innerHTML || "");
if (body) facts.facts.push(NewFact("fb_comment_content_md", "md", body, contextUrl));
}
const likeEl = section.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailPost.likeCount);
if (likeEl) {
const likeCount = this.parseCount(likeEl.textContent || "");
facts.facts.push(NewFact("fb_comment_like_count", "integer", likeCount, contextUrl));
}
const replyEl = section.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailPost.replyCount);
if (replyEl) {
const replyCount = this.parseCount((replyEl.textContent || "").trim());
facts.facts.push(NewFact("fb_comment_comment_count", "integer", replyCount, contextUrl));
}
const dateEl = section.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailPost.commentURL);
if (dateEl?.textContent) {
facts.facts.push(NewFact("fb_comment_duration_string", "string", dateEl.textContent.trim(), contextUrl));
}
// Mark section as saved after processing
this.markSaved(section);
});
return facts.facts.length ? [facts] : [];
}
async saveCommentReels(scope?: HTMLElement): Promise<model.FactCollection[]> {
if (!scope) return [];
const seeMoreBTNs = scope.querySelectorAll<HTMLElement>(ELEMENT_SELECTORS.commentDetailReels.seeMoreBtn);
seeMoreBTNs.forEach((btn) => btn.click());
await new Promise<void>((r) => setTimeout(r, 800));
const sections = scope.querySelectorAll<HTMLElement>(ELEMENT_SELECTORS.commentDetailReels.section);
if (!sections.length) return [];
const facts = model.NewFactCollection("comment");
sections.forEach((section) => {
// Skip if section is already saved
if (this.isSaved(section)) return;
const dateEl = section.querySelector<HTMLAnchorElement>(ELEMENT_SELECTORS.commentDetailReels.displayDate);
const clean = dateEl?.href ? dateEl.href.replace("&__tn__=R", "") : undefined;
const contextUrl = clean || this.context || window.location.href;
if (clean) {
facts.facts.push(NewFact("fb_comment_source_url", "url", clean, window.location.href));
this.context = clean;
}
const authorEl = section.querySelector<HTMLAnchorElement>(ELEMENT_SELECTORS.commentDetailReels.displayName);
if (authorEl) {
const name = authorEl.textContent?.trim() || "";
const url = authorEl.href?.split("?")[0] || "";
if (name) facts.facts.push(NewFact("fb_comment_display_name", "name", name, contextUrl));
if (url) facts.facts.push(NewFact("fb_comment_profile_url", "url", url, contextUrl));
}
const bodyEl = section.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailReels.content);
if (bodyEl) {
const body = turndownService.turndown(bodyEl.innerHTML || "");
if (body) facts.facts.push(NewFact("fb_comment_content_md", "md", body, contextUrl));
}
const likeEl = section.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailReels.likeCount);
if (likeEl) {
const likeCount = this.parseCount((likeEl.textContent || "").trim());
facts.facts.push(NewFact("fb_comment_like_count", "integer", likeCount, contextUrl));
}
const replyEl = section.querySelector<HTMLElement>(ELEMENT_SELECTORS.commentDetailReels.replyCount);
if (replyEl) {
const replyCount = this.parseCount((replyEl.textContent || "").trim());
facts.facts.push(NewFact("fb_comment_comment_count", "integer", replyCount, contextUrl));
}
if (dateEl?.textContent) {
facts.facts.push(NewFact("fb_comment_duration_string", "string", dateEl.textContent.trim(), contextUrl));
}
// Mark section as saved after processing
this.markSaved(section);
});
return facts.facts.length ? [facts] : [];
}
saveProfile(): model.FactCollection | null {
const facts = model.NewFactCollection("profile");
const displayName = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.profile.displayName);
if (displayName && !this.isSaved(displayName)) {
facts.facts.push(NewFact("fb_profile_source_url", "url", window.location.href, this.context));
const name = displayName.lastChild?.textContent || "";
if (name) facts.facts.push(NewFact("fb_profile_display_name", "name", name, this.context));
this.markSaved(displayName);
}
const profilePicture = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.profile.profilePicture);
if (profilePicture && !this.isSaved(profilePicture)) {
const pic = profilePicture.getAttribute("xlink:href") || "";
if (pic) facts.facts.push(NewFact("fb_profile_profileimage_url", "url", pic, this.context));
this.markSaved(profilePicture);
}
const bioEl = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.profile.bio);
if (bioEl) {
const bioContent = turndownService.turndown(bioEl.innerHTML)
facts.facts.push(NewFact("fb_profile_bio_md", "md", bioContent, window.location.href))
}
const followerCount = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.profile.followerCount);
if (followerCount && !this.isSaved(followerCount)) {
const count = this.parseCount(followerCount.textContent);
facts.facts.push(NewFact("fb_profile_follower_count", "integer", count, this.context));
this.markSaved(followerCount);
}
const followingCount = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.profile.followingCount);
if (followingCount && !this.isSaved(followingCount)) {
const count = this.parseCount(followingCount.textContent);
facts.facts.push(NewFact("fb_profile_following_count", "integer", count, this.context));
this.markSaved(followingCount);
}
return facts.facts.length ? facts : null;
}
async saveAllPost(): Promise<model.FactCollection | null> {
let sections: NodeListOf<HTMLElement> | undefined;
if (window.location.href.includes("/search/")) {
sections = document.querySelectorAll<HTMLElement>(ELEMENT_SELECTORS.post.searchPageContainer);
} else {
const container = document.querySelector<HTMLElement>(ELEMENT_SELECTORS.post.container);
sections = container?.querySelectorAll<HTMLElement>(":scope > div");
}
if (!sections?.length) return null;
// First, expand all content before processing
await this.expandAllContent(sections);
const facts = window.location.href.includes("/search/") ? model.NewFactCollection("search") : model.NewFactCollection("post");
const tasks = sections
? Array.from(sections).map(async (postElement) => {
if (this.isSaved(postElement)) return;
const isReels =
postElement.querySelector('a[attributionsrc]')?.getAttribute("aria-label")?.includes("Reels") ?? false;
const data = isReels
? await this.extractReelsFromProfile(postElement)
: await this.extractPostFromProfile(postElement);
if (data && data.facts.length) {
facts.facts.push(...data.facts);
// mark CONTAINER post
this.markSaved(postElement);
}
})
: [];
await Promise.allSettled(tasks);
return facts.facts.length ? facts : null;
}
private async expandAllContent(sections: NodeListOf<HTMLElement>): Promise<void> {
const expandTasks: Promise<void>[] = [];
sections.forEach((postElement) => {
if (this.isSaved(postElement)) return;
const isReels =
postElement.querySelector('a[attributionsrc]')?.getAttribute("aria-label")?.includes("Reels") ?? false;
const seeMoreSelector = isReels
? ELEMENT_SELECTORS.reelFromProfile.seeMoreBtn
: ELEMENT_SELECTORS.postFromProfile.seeMoreBtn;
// Find all expandable content in this post
const seeMoreButtons = postElement.querySelectorAll<HTMLElement>(seeMoreSelector);
seeMoreButtons.forEach((btn) => {
expandTasks.push(this.expandSingleContent(btn, postElement, seeMoreSelector));
});
});
// Expand all content in parallel but with some delay between batches
const batchSize = 3; // Process 3 posts at a time to avoid overwhelming the page
for (let i = 0; i < expandTasks.length; i += batchSize) {
const batch = expandTasks.slice(i, i + batchSize);
await Promise.allSettled(batch);
// Small delay between batches
if (i + batchSize < expandTasks.length) {
await new Promise(resolve => setTimeout(resolve, 300));
}
}
}
private async expandSingleContent(
button: HTMLElement,
postElement: HTMLElement,
seeMoreSelector: string
): Promise<void> {
try {
// Scroll to button for better interaction
button.scrollIntoView({ block: "nearest", inline: "nearest" });
// Click the button
button.click();
// Wait for content to expand
await this.waitForContentExpansion(postElement, seeMoreSelector, 2000);
// Additional small delay for DOM to stabilize
await new Promise(resolve => setTimeout(resolve, 100));
} catch (error) {
console.warn("Failed to expand content:", error);
}
}
private async waitForContentExpansion(
container: HTMLElement,
seeMoreSelector: string,
timeoutMs: number
): Promise<void> {
const startTime = Date.now();
return new Promise((resolve) => {
const checkExpansion = () => {
// Check if button is gone (content expanded)
const button = container.querySelector(seeMoreSelector);
const elapsed = Date.now() - startTime;
if (!button || elapsed >= timeoutMs) {
resolve();
return;
}
// Continue checking
setTimeout(checkExpansion, 50);
};
checkExpansion();
});
}
async saveDetailReels(): Promise<model.FactCollection | null> {
const jsonScript = document.querySelector<HTMLElement>('script[type="application/ld+json"]');
if (!jsonScript) {
const containers = document.querySelectorAll<HTMLElement>(ELEMENT_SELECTORS.detailReels.container);
const lastMain = containers[containers.length - 1];
if (!lastMain || this.isSaved(lastMain)) return null;
const data = await this.extractReelsFromDom(lastMain);
if (!data.facts.length) return null;
// mark CONTAINER reels detail
this.markSaved(lastMain);
return data;
}
return null;
}
async extractReelsFromDom(reelsElement: HTMLElement): Promise<model.FactCollection> {
const postData = model.NewFactCollection("post");
this.context = window.location.href;
postData.facts.push(NewFact("fb_post_source_url", "url", window.location.href, this.context));
const displayNameElement = reelsElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailReels.displayName);
if (displayNameElement) {
const displayName = displayNameElement.textContent?.replace(/\s+/g, " ").trim() || "";
if (displayName) postData.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context));
}
const captionWrap = reelsElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailReels.caption)?.parentElement;
if (captionWrap) {
await this.expandCaptionIfNeeded(captionWrap, ELEMENT_SELECTORS.detailReels.seeMoreBtn, {
maxClicks: 3,
timeoutMs: 2000,
});
const clone = captionWrap.cloneNode(true) as HTMLElement;
clone.querySelectorAll("button, [role='button'], object, a").forEach((el) => el.remove());
const caption = turndownService.turndown(clone);
if (caption) postData.facts.push(NewFact("fb_post_content_md", "md", caption, this.context));
}
const reactionContainer = reelsElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailReels.reactionContainer);
if (reactionContainer) {
const likeCount = reactionContainer.children[1]?.textContent?.trim() || "";
const commentCount = reactionContainer.children[2]?.textContent?.trim() || "";
const shareCount = reactionContainer.children[3]?.textContent?.trim() || "";
postData.facts.push(NewFact("fb_post_like_count", "integer", this.parseCount(likeCount), this.context));
postData.facts.push(NewFact("fb_post_comment_count", "integer", this.parseCount(commentCount), this.context));
postData.facts.push(NewFact("fb_post_share_count", "integer", this.parseCount(shareCount), this.context));
}
// Add savedat_datetime
// postData.facts.push(NewFact("savedat_datetime", "datetime", new Date().toISOString(), this.context));
return postData;
}
saveDetailPost(): model.FactCollection | null {
const container =
document.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailPost.container);
if (!container || this.isSaved(container)) return null;
const out = model.NewFactCollection("post");
// URL post
out.facts.push(NewFact("fb_post_source_url", "url", window.location.href, this.context));
// Display name
const displayNameEl = container.querySelector<HTMLElement>(
ELEMENT_SELECTORS.detailPost.displayName
);
if (displayNameEl) {
const displayName = displayNameEl.textContent?.replace(/\s+/g, " ").trim() || "";
if (displayName) out.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context));
}
// Foto
const photoImgs = Array.from(
container.querySelectorAll<HTMLImageElement>(ELEMENT_SELECTORS.detailPost.photoUrls)
)
.map((img) => img.src)
.filter(Boolean);
// Caption -> markdown
let captionMd = "";
const captionEl = container.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailPost.caption);
if (captionEl) {
captionMd = (turndownService.turndown(captionEl.innerHTML || "") || "").trim();
}
// content_md: "[photo](u1) [photo-2](u2) ...\n\n<caption md>"
const photoLine = photoImgs
.map((u, i) => (i === 0 ? `[photo](${u})` : `[photo-${i + 1}](${u})`))
.join(" ");
const contentMd = [photoLine, captionMd].filter(Boolean).join("\n\n").trim();
if (contentMd) out.facts.push(NewFact("fb_post_content_md", "md", contentMd, this.context));
// // (opsional) field foto terpisah jika masih dipakai downstream
// if (photoImgs.length === 1) {
// out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context));
// } else if (photoImgs.length > 1) {
// out.facts.push(NewFact("photoUrls", "string", JSON.stringify(photoImgs), this.context));
// out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context));
// }
// Reactions
const reactBox = container.querySelector<HTMLElement>(
ELEMENT_SELECTORS.detailPost.reactionContainer
);
if (reactBox) {
const likeCount = reactBox.querySelector<HTMLElement>(ELEMENT_SELECTORS.detailPost.likeCount);
if (likeCount) {
const count = this.parseCount(likeCount.textContent?.trim());
out.facts.push(NewFact("fb_post_like_count", "integer", count, this.context));
}
const commentCount = reactBox.querySelector<HTMLElement>(
ELEMENT_SELECTORS.detailPost.commentCount
);
if (commentCount) {
const count = this.parseCount(commentCount.textContent?.trim());
out.facts.push(NewFact("fb_post_comment_count", "integer", count, this.context));
}
const shareCount = reactBox.querySelector<HTMLElement>(
ELEMENT_SELECTORS.detailPost.shareCount
);
if (shareCount) {
const count = this.parseCount(shareCount.textContent?.trim());
out.facts.push(NewFact("fb_post_share_count", "integer", count, this.context));
}
}
if (!out.facts.length) return null;
this.markSaved(container);
return out;
}
async extractPostFromProfile(postElement: HTMLElement): Promise<model.FactCollection | null> {
const captionEl = postElement.querySelector<HTMLElement>(
ELEMENT_SELECTORS.postFromProfile.caption
);
if (!postElement || this.isSaved(postElement) || !captionEl) return null;
const out = model.NewFactCollection("post");
// 1) URL post
const postUrlEl = postElement.querySelector<HTMLAnchorElement>(
ELEMENT_SELECTORS.postFromProfile.postUrl
);
// Href bisa relatif; normalisasi jadi absolute FB URL
const rawHref = postUrlEl?.getAttribute("href") || "";
const fullPostUrl = rawHref
? (rawHref.startsWith("http")
? rawHref.split("?")[0]
: `https://www.facebook.com${rawHref.split("?")[0]}`)
: "";
// this.context = window.location.href;
if (fullPostUrl) {
this.context = fullPostUrl
out.facts.push(NewFact("fb_post_source_url", "url", fullPostUrl, this.context));
}
// 2) Profile URL & Display Name
const profileUrlEl = postElement.querySelector<HTMLAnchorElement>(
ELEMENT_SELECTORS.postFromProfile.profileUrl
);
const profileNameEl = postElement.querySelector<HTMLElement>(
ELEMENT_SELECTORS.postFromProfile.profileName
);
const profileUrl = profileUrlEl?.getAttribute("href")?.split("?")[0] || "";
const displayName = profileNameEl?.textContent?.replace(/\s+/g, " ").trim() || "";
if (profileUrl) out.facts.push(NewFact("fb_post_profile_url", "url", profileUrl.startsWith("https://www.facebook.com/") ? profileUrl : `https://www.facebook.com${profileUrl}`, this.context));
if (displayName) out.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context));
const mediaExternal = postElement.querySelectorAll<HTMLElement>('[data-ad-rendering-role="image"]');
const photos = postElement.querySelectorAll<HTMLElement>(ELEMENT_SELECTORS.postFromProfile.photoUrl);
const videos = postElement.querySelectorAll<HTMLElement>('a[attributionsrc][href*="/videos/"]');
// Extract URLs from different media types
const mediaExternalUrls = Array.from(mediaExternal)
.map((img) => turndownService.turndown(img.innerHTML) || "")
.filter(Boolean);
const photoUrls = Array.from(photos)
.map((img) => {
const src = (img as HTMLImageElement).src || "";
return src.split("?__cft__")[0]; // Remove __cft__ parameter
})
.filter(Boolean);
const videoUrls = Array.from(videos)
.map((video) => {
const href = video.getAttribute("href") || "";
return href.split("?__cft__")[0]; // Remove __cft__ parameter
})
.filter(Boolean);
// Combine all media URLs
const allMediaUrls = [...mediaExternalUrls, ...photoUrls, ...videoUrls];
const captionMd = captionEl
? (turndownService.turndown(captionEl.innerHTML || "") || "").trim()
: "";
// 4) Create content markdown with all media types
if (allMediaUrls.length || captionMd) {
const mediaLines: string[] = [];
// Add external media
mediaExternalUrls.forEach((url, i) => {
mediaLines.push(i === 0 && mediaExternalUrls.length === 1 ? `${url}` : `[](${url})`);
});
// Add regular photos
photoUrls.forEach((url, i) => {
const index = mediaExternalUrls.length + i + 1;
mediaLines.push(index === 1 && allMediaUrls.length === 1 ? `[photo](${url})` : `[photo](${url})`);
});
// Add videos
videoUrls.forEach((url, i) => {
// const index = i + 1;
mediaLines.push(videoUrls.length === 1 ? `[video](${url})` : `[video](${url})`);
});
const mediaLine = mediaLines.join(" ");
const contentMd = [mediaLine, captionMd].filter(Boolean).join("\n\n").trim();
if (contentMd) out.facts.push(NewFact("fb_post_content_md", "md", contentMd, this.context));
}
// // (opsional) simpan juga url foto terpisah
// if (photoImgs.length === 1) {
// out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context));
// } else if (photoImgs.length > 1) {
// out.facts.push(NewFact("photoUrls", "string", JSON.stringify(photoImgs), this.context));
// out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context));
// }
// 5) Reactions (like, comment, share)
const reactBox = postElement.querySelector<HTMLElement>(
ELEMENT_SELECTORS.postFromProfile.containerInteractions
);
if (reactBox) {
const likeEl = reactBox.querySelector<HTMLElement>(ELEMENT_SELECTORS.postFromProfile.like);
const commentEl = reactBox.querySelector<HTMLElement>(ELEMENT_SELECTORS.postFromProfile.comment);
const shareEl = reactBox.querySelector<HTMLElement>(ELEMENT_SELECTORS.postFromProfile.share);
const likeCount = likeEl?.textContent?.trim() || "";
const commentCount = commentEl?.textContent?.trim() || "";
const shareCount = shareEl?.textContent?.trim() || "";
out.facts.push(NewFact("fb_post_like_count", "integer", this.parseCount(likeCount), this.context));
out.facts.push(NewFact("fb_post_comment_count", "integer", this.parseCount(commentCount), this.context));
out.facts.push(NewFact("fb_post_share_count", "integer", this.parseCount(shareCount), this.context));
}
// Add savedat_datetime
// out.facts.push(NewFact("savedat_datetime", "datetime", new Date().toISOString(), this.context));
// Tidak ada fakta yang terkumpul? jangan tandai & return null
if (!out.facts.length) return null;
// Tandai container post sebagai saved (bukan <body>)
this.markSaved(postElement);
return out;
}
async extractReelsFromProfile(postElement: HTMLElement): Promise<model.FactCollection> {
// Check if this element was already processed
if (this.isSaved(postElement)) {
return model.NewFactCollection("post"); // Return empty collection for already processed elements
}
const postData = model.NewFactCollection("post");
const reelUrlEl = postElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.reelFromProfile.postUrl);
if (reelUrlEl) {
const href = reelUrlEl.getAttribute("href") || "";
const fullUrl = href.startsWith("http") ? href.split("?")[0] : `https://www.facebook.com${href.split("?")[0]}`;
if (fullUrl) {
this.context = fullUrl
postData.facts.push(NewFact("fb_post_source_url", "url", fullUrl, this.context))
};
}
const profileUrlEl = postElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.reelFromProfile.profileUrl);
if (profileUrlEl) {
const href = profileUrlEl.getAttribute("href") || "";
const cleanUrl = href.split("?")[0];
if (cleanUrl) postData.facts.push(NewFact("fb_post_profile_url", "url", cleanUrl.startsWith("https://www.facebook.com") ? cleanUrl : `https://www.facebook.com${cleanUrl}`, this.context));
}
const displayNameEl = postElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.reelFromProfile.profileName);
if (displayNameEl) {
const displayName = displayNameEl.textContent?.replace(/\s+/g, " ").trim() || "";
if (displayName) postData.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context));
}
const captionElement = postElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.reelFromProfile.caption);
if (captionElement) {
const clone = captionElement.cloneNode(true) as HTMLElement;
clone.querySelectorAll('div[role="button"], button, object, a').forEach((el) => el.remove());
const caption = turndownService.turndown(clone.innerHTML.trim());
if (caption) postData.facts.push(NewFact("fb_post_content_md", "md", caption, this.context));
}
const containerInteractions = postElement.querySelector<HTMLElement>(
ELEMENT_SELECTORS.reelFromProfile.containerInteractions
);
if (containerInteractions) {
// Filter elements that contain '__fb-' in their innerHTML and extract text content
const reactionList = [...containerInteractions.querySelectorAll("div > div > div > div > div")]
.filter((el) => el.innerHTML.includes("__fb-"))
.map((el) => el.textContent?.trim() || "");
console.log("Reels reaction list:", reactionList);
// Get counts from the filtered reaction list
const likeCount = reactionList[1] || "0";
const commentCount = reactionList[2] || "0";
const shareCount = reactionList[3] || "0";
console.log("Reels counts:", { likeCount, commentCount, shareCount });
postData.facts.push(NewFact("fb_post_like_count", "integer", this.parseCount(likeCount), this.context));
postData.facts.push(NewFact("fb_post_comment_count", "integer", this.parseCount(commentCount), this.context));
postData.facts.push(NewFact("fb_post_share_count", "integer", this.parseCount(shareCount), this.context));
}
const dateEl = postElement.querySelector<HTMLElement>(ELEMENT_SELECTORS.reelFromProfile.date);
if (dateEl) {
const dateText = dateEl.textContent?.trim() || "";
if (dateText) {
postData.facts.push(NewFact("fb_post_duration_string", "string", dateText, this.context));
}
}
// Add savedat_datetime
// postData.facts.push(NewFact("savedat_datetime", "datetime", new Date().toISOString(), this.context));
// Mark the container as saved only if we actually extracted data
if (postData.facts.length > 0) {
this.markSaved(postElement);
}
return postData;
}
private async expandCaptionIfNeeded(
captionEl: HTMLElement,
seeMoreSelector: string,
opts: { maxClicks?: number; timeoutMs?: number } = {}
): Promise<void> {
const { maxClicks = 3, timeoutMs = 2000 } = opts;
captionEl.scrollIntoView({ block: "nearest", inline: "nearest" });
const before = this.measureCaption(captionEl);
for (let i = 0; i < maxClicks; i++) {
const btn = captionEl.querySelector<HTMLElement>(seeMoreSelector);
if (!btn) break;
btn.click();
await this.waitForExpansion(captionEl, seeMoreSelector, timeoutMs);
await this.nextPaint();
await this.nextPaint();
const after = this.measureCaption(captionEl);
const expanded =
after.height > before.height || after.textLen > before.textLen || !captionEl.querySelector(seeMoreSelector);
if (expanded) break;
}
}
private waitForExpansion(captionEl: HTMLElement, seeMoreSelector: string, timeoutMs: number): Promise<void> {
const start = this.measureCaption(captionEl);
return new Promise((resolve) => {
let done = false;
const finish = () => {
if (!done) {
done = true;
cleanup();
resolve();
}
};
const ro = new ResizeObserver(() => finish());
ro.observe(captionEl);
const mo = new MutationObserver(() => finish());
mo.observe(captionEl, { childList: true, subtree: true, characterData: true });
let rafId = 0;
const poll = () => {
if (done) return;
const cur = this.measureCaption(captionEl);
const grew = cur.height > start.height || cur.textLen > start.textLen;
const buttonGone = !captionEl.querySelector(seeMoreSelector);
if (grew || buttonGone) return finish();
rafId = requestAnimationFrame(poll);
};
rafId = requestAnimationFrame(poll);
const t = setTimeout(finish, timeoutMs);
function cleanup() {
try { ro.disconnect(); } catch { }
try { mo.disconnect(); } catch { }
try { cancelAnimationFrame(rafId); } catch { }
clearTimeout(t);
}
});
}
private nextPaint(): Promise<void> {
return new Promise((res) => requestAnimationFrame(() => res()));
}
private measureCaption(el: HTMLElement) {
const rect = el.getBoundingClientRect();
return { height: Math.round(rect.height), textLen: (el.textContent || "").length };
}
addClassTagged(element: Element): void {
if (!element || element === document.body) return;
element.setAttribute("class", (element.getAttribute("class") || "") + " tagged");
}
parseCount(str = ""): string {
if (typeof str !== "string") return "0";
const replyMatch = str.match(/Lihat semua (\d+)\s*balasan?|(\d+)\s*replies?/i);
if (replyMatch) return replyMatch[1] || replyMatch[2];
const raw = str.trim().match(/^([\d,.]+)\s*([a-zA-Z]*)/);
if (!raw) return "0";
const numPart = raw[1].replace(/\./g, "").replace(/,/g, ".");
const unitPart = raw[2].toLowerCase();
const num = parseFloat(numPart);
if (isNaN(num)) return "0";
const multipliers: Record<string, number> = {
k: 1_000,
rb: 1_000,
m: 1_000_000,
jt: 1_000_000,
b: 1_000_000_000,
};
const total = Math.round(num * (multipliers[unitPart] || 1));
return String(total);
}
}