// facebook.ts import { postgres } from "bun"; import * as model from "../model"; import { NewFact } from "../model"; import { turndownService } from "../utils/turndown"; const ELEMENT_SELECTORS = { profile: { displayName: 'span[dir="auto"] > h1[class^="html-h1"]', profilePicture: '[role="main"] > div > div:first-of-type image', followerCount: 'a[href*="followers"] > strong', followingCount: 'a[href*="following"] > strong', bio: 'div[role="main"] > div:last-child > div:last-child > div > div > div:nth-child(2) > div > div > div > div > div > div > div:last-child' }, post: { searchPageContainer: 'div[role="main"] > div:last-child > div:last-child > div > div:last-child > div:last-child > div', container: 'div[role="main"] > div:last-child > div:last-child > div > div:last-child > div:last-child', postURL: 'a[href*="/posts/"], a[href*="/reels/"], a[href*="/videos/"]', }, postFromProfile: { postUrl: 'span[dir="ltr"] > div > span a[attributionsrc]', profileName: 'a[role="link"] > b[class*="html-b"]', profileUrl: 'h3 a[role="link"]', caption: 'div[data-ad-rendering-role="story_message"]', photoUrl: 'a[attributionsrc][href*="/photo/"] img', containerInteractions: 'div[data-visualcompletion="ignore-dynamic"] > div > div > div > div:first-child', like: 'div > div > div:first-child span[class*="html-span"] span[aria-hidden="true"]', comment: 'div > div > div:nth-of-type(2) > div:nth-of-type(2) span[class*="html-span"] span[dir="auto"]', share: 'div > div > div:nth-of-type(2) > div:nth-of-type(3) span[class*="html-span"] span[dir="auto"]', seeMoreBtn: 'div[data-ad-comet-preview="message"] div[dir="auto"] div[role="button"]', }, reelFromProfile: { postUrl: 'div[class*="html-div "] a[attributionsrc]', profileName: 'h3[dir="auto"][class*="html-h3"] a[role="link"][attributionsrc]', profileUrl: 'h3[dir="auto"][class*="html-h3"] a[role="link"]', caption: 'div[class*="html-div "] a[attributionsrc] span[dir="auto"]', containerInteractions: 'div[class*="html-div "] a[attributionsrc] > div:nth-of-type(2)', interactionList: 'div > div > div > div > div', seeMoreBtn: 'div[class*="html-div "] a[attributionsrc] span[dir="auto"] > div > object > div[role="button"]', date: 'span[dir="auto"] > span > span:not([class]) > span:nth-of-type(2):not([aria-hidden])' }, detailPost: { container: 'div[role="dialog"]:nth-of-type(2)', displayName: 'a[attributionsrc][role="link"] > b[class*="html-b"]', profile: 'a[attributionsrc][role="link"]', caption: 'div[data-ad-preview="message"]', photoUrls: 'a[attributionsrc][href*="/photo/"] img', reactionContainer: 'div[data-visualcompletion="ignore-dynamic"] > div > div > div > div:first-child', likeCount: 'div > div > div:first-child span[class*="html-span"] span[aria-hidden="true"]', commentCount: 'div > div > div:nth-of-type(2) > div:nth-of-type(2) span[class*="html-span"] span[dir="auto"]', shareCount: 'div > div > div:nth-of-type(2) > div:nth-of-type(3) span[class*="html-span"] span[dir="auto"]', }, commentDetailPost: { container: 'div > div > div > div > div div[data-virtualized="false"] > div > div > div', section: 'div[data-virtualized="false"]', commentURL: 'a[attributionsrc][role="link"][href*="/posts/"]', profileURL: 'a[attributionsrc]', displayName: 'a[attributionsrc] > span > span[dir="auto"]', profileImageUrl: 'a[attributionsrc] image', content: 'span[dir="auto"][lang]', likeCount: 'span[class^="html-span"] > div[role="button"][aria-label*=";"]', replyCount: 'span[class^="html-span"] > span[dir="auto"]', }, commentDetailReels: { container: 'div[role="complementary"] div:not([class]) > div:last-of-type > div > div', section: ':scope > div[data-virtualized="false"]', seeMoreBtn: 'div[dir="auto"] > div[role="button"]', displayName: 'div[role="article"] a[href][tabindex="0"]', content: 'span[dir="auto"][lang]', likeCount: 'div[role="button"][tabindex="0"] > div[class^="html-div"] > span', replyCount: 'div[role=button] > span > span[dir="auto"]', displayDate: 'li > span > div > a', }, detailReels: { container: 'div[role="main"]', displayName: 'h2[dir="auto"] a', caption: 'div > span[dir="auto"] > div', seeMoreBtn: 'div[role="button"]', reactionContainer: 'div > div > div > div:nth-child(2) > div:nth-child(2) > div > div > div', }, detailVideos: { // container: '[role="main"]', videoSection: '[role="main"]', contenVideoSection: '[role="complementary"]', profileUrl: '[data-ad-rendering-role="profile_name"] a', caption: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(1) > div:nth-child(3) > div:nth-child(1)', title: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(1) > div:nth-child(2) > div > div > div > span', seeMoreBtn: '[dir="auto"] > [role="button"]', likeCount: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(2) > div:nth-child(1) > div > div:nth-child(1) > div > span > div > span:nth-child(2) > span > span', commentCount: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(2) > div:nth-child(1) > div > div:nth-child(2) > div:nth-child(1) > span > div > div > div:nth-child(1) > span > span', viewCount: 'div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div:nth-child(2) > div:nth-child(1) > div > div:nth-child(2) > div:nth-child(2) > span > span > div > div:nth-child(1) > span' }, commentDetailVideo: { container: '[role="complementary"] div[style="height: auto;"]', section: 'div[data-virtualized="false"]', commentUrl: 'a[href*="comment_id"]', displayName: 'a[href*="comment_id"][aria-hidden="false"]', content: '[dir="auto"][lang]', likeCount: 'span > [role="button"][aria-label][tabindex="0"] > div > span', commentCount: '[role="button"][ tabindex="0"] > span > span[dir="auto"]', } }; export default class FacebookModule { context = window.location.href; private markSaved(el?: Element | null) { el?.classList.add("saved"); } private isSaved(el?: Element | null) { if (!el || el === document.body) return false; return el.classList.contains("saved"); } tagElement(): void { } async saveData(): Promise { const out: model.FactCollection[] = []; if (window.location.href.includes("/posts")) { const post = this.saveDetailPost(); if (post) out.push(post); const comments = this.saveCommentPosts( document.querySelector(ELEMENT_SELECTORS.detailPost.container) || undefined ); if (comments.length) out.push(...comments); } else if (window.location.href.includes("/reel")) { const reel = await this.saveDetailReels(); if (reel) out.push(reel); const c = await this.saveCommentReels( document.querySelector(ELEMENT_SELECTORS.commentDetailReels.container) || undefined ); if (c.length) out.push(...c); } else if (window.location.href.includes("/search/")) { const posts = await this.saveAllPost(); if (posts && posts.facts && posts.facts.length > 0) out.push(posts); } else if (window.location.href.includes("/videos")) { const post = this.saveDetailVideo(); if (post) out.push(post); const comments = this.saveCommentVideo() if (comments.length) out.push(...comments); } else { const profile = this.saveProfile(); if (profile) out.push(profile); const posts = await this.saveAllPost(); if (posts && posts.facts && posts.facts.length > 0) out.push(posts); } return out; } saveDetailVideo(): model.FactCollection { const Post: model.FactCollection = model.NewFactCollection('post'); const context = window.location.href; // const videoSection = document.querySelector(ELEMENT_SELECTORS.detailVideos.videoSection); const contenVideoSection = document.querySelector(ELEMENT_SELECTORS.detailVideos.contenVideoSection); const postUrl = window.location.href; const profileUrl = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.profileUrl)?.getAttribute('href') || ''; const displayName = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.profileUrl)?.textContent || ''; const caption = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.caption)?.textContent || ''; const titleContent = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.title)?.textContent || ''; const contentTurndown = turndownService.turndown(`${titleContent} \n ${caption}`); const seeMoreBtn = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.seeMoreBtn)?.textContent || ''; // Reactions const likeCount = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.likeCount)?.textContent || "0"; const commentCount = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.commentCount)?.textContent || "0"; const viewCount = contenVideoSection?.querySelector(ELEMENT_SELECTORS.detailVideos.viewCount)?.textContent || "0"; Post.facts.push(NewFact('fb_post_source_url', 'url', postUrl, postUrl)); Post.facts.push(NewFact('fb_post_profile_url', 'url', profileUrl, context)); Post.facts.push(NewFact('fb_post_display_name', 'string', displayName, context)); Post.facts.push(NewFact('fb_post_content', 'string', contentTurndown, context)); Post.facts.push(NewFact('fb_post_see_more_btn', 'string', seeMoreBtn, context)); Post.facts.push(NewFact('fb_post_like_count', 'string', this.parseCount(likeCount), context)); Post.facts.push(NewFact('fb_post_comment_count', 'string', this.parseCount(commentCount), context)); Post.facts.push(NewFact('fb_post_view_count', 'string', this.parseCount(viewCount), context)); return Post; } saveCommentVideo(): model.FactCollection[] { const container = document.querySelector(ELEMENT_SELECTORS.commentDetailVideo.container); const sections = container?.querySelectorAll(ELEMENT_SELECTORS.commentDetailVideo.section); const comments = model.NewFactCollection("comment"); const extractComment = async (section: HTMLElement): Promise => { // Dapatkan href dengan fallback ke string kosong jika undefined const commentUrlEl = section .querySelector(ELEMENT_SELECTORS.commentDetailVideo.commentUrl) ?.getAttribute("href") ?.split("&__tn_")[0] ?? ""; // Karena commentUrlEl sekarang string (meskipun kosong), profileUrl pasti bisa diproses const profileUrl = commentUrlEl .split("?comment_id=")[0] .split("&comment_id=")[0]; const displayName = section .querySelector(ELEMENT_SELECTORS.commentDetailVideo.displayName) ?.textContent ?.trim() ?? ""; const content = section .querySelector(ELEMENT_SELECTORS.commentDetailVideo.content) ?.textContent ?.trim() ?? ""; const likeCount = section .querySelector(ELEMENT_SELECTORS.commentDetailVideo.likeCount) ?.textContent ?.trim() ?? "0"; const commentCount = section .querySelector(ELEMENT_SELECTORS.commentDetailVideo.commentCount) ?.textContent ?.trim() ?? "0"; // Sekarang kita bisa memanggil NewFact tanpa error tipe comments.facts.push( NewFact("fb_comment_source_url", "url", commentUrlEl, window.location.href) ); comments.facts.push( NewFact("fb_comment_profile_url", "url", profileUrl, window.location.href) ); comments.facts.push( NewFact("fb_comment_display_name", "string", displayName, window.location.href) ); comments.facts.push( NewFact("fb_comment_content", "string", content, window.location.href) ); comments.facts.push( NewFact("fb_comment_like_count", "string", this.parseCount(likeCount), window.location.href) ); comments.facts.push( NewFact("fb_comment_comment_count", "string", this.parseCount(commentCount), window.location.href) ); }; sections?.forEach((section) => { // Skip if section is already saved if (this.isSaved(section)) return; extractComment(section); }) return [comments]; } saveCommentPosts(scope?: HTMLElement): model.FactCollection[] { if (!scope) return []; const container = scope.querySelector(ELEMENT_SELECTORS.commentDetailPost.container); if (!container) return []; const sections = container.querySelectorAll(ELEMENT_SELECTORS.commentDetailPost.section); const facts = model.NewFactCollection("comment"); sections.forEach((section) => { // Skip if section is already saved if (this.isSaved(section)) return; const commentUrlEl = section.querySelector(ELEMENT_SELECTORS.commentDetailPost.commentURL); const cleanUrl = commentUrlEl?.href ? commentUrlEl.href.split("&__cft__[0]")[0] : undefined; const contextUrl = cleanUrl || this.context || window.location.href; if (cleanUrl) { facts.facts.push(NewFact("fb_comment_source_url", "url", cleanUrl, window.location.href)); this.context = cleanUrl; } const authorEl = section.querySelector(ELEMENT_SELECTORS.commentDetailPost.profileURL); if (authorEl) { const name = authorEl.textContent?.trim() || ""; const url = authorEl.href?.split("?")[0] || ""; if (name) facts.facts.push(NewFact("fb_comment_display_name", "name", name, contextUrl)); if (url) facts.facts.push(NewFact("fb_comment_profile_url", "url", url, contextUrl)); } const bodyEl = section.querySelector(ELEMENT_SELECTORS.commentDetailPost.content); if (bodyEl) { const body = turndownService.turndown(bodyEl.innerHTML || ""); if (body) facts.facts.push(NewFact("fb_comment_content_md", "md", body, contextUrl)); } const likeEl = section.querySelector(ELEMENT_SELECTORS.commentDetailPost.likeCount); if (likeEl) { const likeCount = this.parseCount(likeEl.textContent || ""); facts.facts.push(NewFact("fb_comment_like_count", "integer", likeCount, contextUrl)); } const replyEl = section.querySelector(ELEMENT_SELECTORS.commentDetailPost.replyCount); if (replyEl) { const replyCount = this.parseCount((replyEl.textContent || "").trim()); facts.facts.push(NewFact("fb_comment_comment_count", "integer", replyCount, contextUrl)); } const dateEl = section.querySelector(ELEMENT_SELECTORS.commentDetailPost.commentURL); if (dateEl?.textContent) { facts.facts.push(NewFact("fb_comment_duration_string", "string", dateEl.textContent.trim(), contextUrl)); } // Mark section as saved after processing this.markSaved(section); }); return facts.facts.length ? [facts] : []; } async saveCommentReels(scope?: HTMLElement): Promise { if (!scope) return []; const seeMoreBTNs = scope.querySelectorAll(ELEMENT_SELECTORS.commentDetailReels.seeMoreBtn); seeMoreBTNs.forEach((btn) => btn.click()); await new Promise((r) => setTimeout(r, 800)); const sections = scope.querySelectorAll(ELEMENT_SELECTORS.commentDetailReels.section); if (!sections.length) return []; const facts = model.NewFactCollection("comment"); sections.forEach((section) => { // Skip if section is already saved if (this.isSaved(section)) return; const dateEl = section.querySelector(ELEMENT_SELECTORS.commentDetailReels.displayDate); const clean = dateEl?.href ? dateEl.href.replace("&__tn__=R", "") : undefined; const contextUrl = clean || this.context || window.location.href; if (clean) { facts.facts.push(NewFact("fb_comment_source_url", "url", clean, window.location.href)); this.context = clean; } const authorEl = section.querySelector(ELEMENT_SELECTORS.commentDetailReels.displayName); if (authorEl) { const name = authorEl.textContent?.trim() || ""; const url = authorEl.href?.split("?")[0] || ""; if (name) facts.facts.push(NewFact("fb_comment_display_name", "name", name, contextUrl)); if (url) facts.facts.push(NewFact("fb_comment_profile_url", "url", url, contextUrl)); } const bodyEl = section.querySelector(ELEMENT_SELECTORS.commentDetailReels.content); if (bodyEl) { const body = turndownService.turndown(bodyEl.innerHTML || ""); if (body) facts.facts.push(NewFact("fb_comment_content_md", "md", body, contextUrl)); } const likeEl = section.querySelector(ELEMENT_SELECTORS.commentDetailReels.likeCount); if (likeEl) { const likeCount = this.parseCount((likeEl.textContent || "").trim()); facts.facts.push(NewFact("fb_comment_like_count", "integer", likeCount, contextUrl)); } const replyEl = section.querySelector(ELEMENT_SELECTORS.commentDetailReels.replyCount); if (replyEl) { const replyCount = this.parseCount((replyEl.textContent || "").trim()); facts.facts.push(NewFact("fb_comment_comment_count", "integer", replyCount, contextUrl)); } if (dateEl?.textContent) { facts.facts.push(NewFact("fb_comment_duration_string", "string", dateEl.textContent.trim(), contextUrl)); } // Mark section as saved after processing this.markSaved(section); }); return facts.facts.length ? [facts] : []; } saveProfile(): model.FactCollection | null { const facts = model.NewFactCollection("profile"); const displayName = document.querySelector(ELEMENT_SELECTORS.profile.displayName); if (displayName && !this.isSaved(displayName)) { facts.facts.push(NewFact("fb_profile_source_url", "url", window.location.href, this.context)); const name = displayName.lastChild?.textContent || ""; if (name) facts.facts.push(NewFact("fb_profile_display_name", "name", name, this.context)); this.markSaved(displayName); } const profilePicture = document.querySelector(ELEMENT_SELECTORS.profile.profilePicture); if (profilePicture && !this.isSaved(profilePicture)) { const pic = profilePicture.getAttribute("xlink:href") || ""; if (pic) facts.facts.push(NewFact("fb_profile_profileimage_url", "url", pic, this.context)); this.markSaved(profilePicture); } const bioEl = document.querySelector(ELEMENT_SELECTORS.profile.bio); if (bioEl) { const bioContent = turndownService.turndown(bioEl.innerHTML) facts.facts.push(NewFact("fb_profile_bio_md", "md", bioContent, window.location.href)) } const followerCount = document.querySelector(ELEMENT_SELECTORS.profile.followerCount); if (followerCount && !this.isSaved(followerCount)) { const count = this.parseCount(followerCount.textContent); facts.facts.push(NewFact("fb_profile_follower_count", "integer", count, this.context)); this.markSaved(followerCount); } const followingCount = document.querySelector(ELEMENT_SELECTORS.profile.followingCount); if (followingCount && !this.isSaved(followingCount)) { const count = this.parseCount(followingCount.textContent); facts.facts.push(NewFact("fb_profile_following_count", "integer", count, this.context)); this.markSaved(followingCount); } return facts.facts.length ? facts : null; } async saveAllPost(): Promise { let sections: NodeListOf | undefined; if (window.location.href.includes("/search/")) { sections = document.querySelectorAll(ELEMENT_SELECTORS.post.searchPageContainer); } else { const container = document.querySelector(ELEMENT_SELECTORS.post.container); sections = container?.querySelectorAll(":scope > div"); } if (!sections?.length) return null; // First, expand all content before processing await this.expandAllContent(sections); const facts = window.location.href.includes("/search/") ? model.NewFactCollection("search") : model.NewFactCollection("post"); const tasks = sections ? Array.from(sections).map(async (postElement) => { if (this.isSaved(postElement)) return; const isReels = postElement.querySelector('a[attributionsrc]')?.getAttribute("aria-label")?.includes("Reels") ?? false; const data = isReels ? await this.extractReelsFromProfile(postElement) : await this.extractPostFromProfile(postElement); if (data && data.facts.length) { facts.facts.push(...data.facts); // mark CONTAINER post this.markSaved(postElement); } }) : []; await Promise.allSettled(tasks); return facts.facts.length ? facts : null; } private async expandAllContent(sections: NodeListOf): Promise { const expandTasks: Promise[] = []; sections.forEach((postElement) => { if (this.isSaved(postElement)) return; const isReels = postElement.querySelector('a[attributionsrc]')?.getAttribute("aria-label")?.includes("Reels") ?? false; const seeMoreSelector = isReels ? ELEMENT_SELECTORS.reelFromProfile.seeMoreBtn : ELEMENT_SELECTORS.postFromProfile.seeMoreBtn; // Find all expandable content in this post const seeMoreButtons = postElement.querySelectorAll(seeMoreSelector); seeMoreButtons.forEach((btn) => { expandTasks.push(this.expandSingleContent(btn, postElement, seeMoreSelector)); }); }); // Expand all content in parallel but with some delay between batches const batchSize = 3; // Process 3 posts at a time to avoid overwhelming the page for (let i = 0; i < expandTasks.length; i += batchSize) { const batch = expandTasks.slice(i, i + batchSize); await Promise.allSettled(batch); // Small delay between batches if (i + batchSize < expandTasks.length) { await new Promise(resolve => setTimeout(resolve, 300)); } } } private async expandSingleContent( button: HTMLElement, postElement: HTMLElement, seeMoreSelector: string ): Promise { try { // Scroll to button for better interaction button.scrollIntoView({ block: "nearest", inline: "nearest" }); // Click the button button.click(); // Wait for content to expand await this.waitForContentExpansion(postElement, seeMoreSelector, 2000); // Additional small delay for DOM to stabilize await new Promise(resolve => setTimeout(resolve, 100)); } catch (error) { console.warn("Failed to expand content:", error); } } private async waitForContentExpansion( container: HTMLElement, seeMoreSelector: string, timeoutMs: number ): Promise { const startTime = Date.now(); return new Promise((resolve) => { const checkExpansion = () => { // Check if button is gone (content expanded) const button = container.querySelector(seeMoreSelector); const elapsed = Date.now() - startTime; if (!button || elapsed >= timeoutMs) { resolve(); return; } // Continue checking setTimeout(checkExpansion, 50); }; checkExpansion(); }); } async saveDetailReels(): Promise { const jsonScript = document.querySelector('script[type="application/ld+json"]'); if (!jsonScript) { const containers = document.querySelectorAll(ELEMENT_SELECTORS.detailReels.container); const lastMain = containers[containers.length - 1]; if (!lastMain || this.isSaved(lastMain)) return null; const data = await this.extractReelsFromDom(lastMain); if (!data.facts.length) return null; // mark CONTAINER reels detail this.markSaved(lastMain); return data; } return null; } async extractReelsFromDom(reelsElement: HTMLElement): Promise { const postData = model.NewFactCollection("post"); this.context = window.location.href; postData.facts.push(NewFact("fb_post_source_url", "url", window.location.href, this.context)); const displayNameElement = reelsElement.querySelector(ELEMENT_SELECTORS.detailReels.displayName); if (displayNameElement) { const displayName = displayNameElement.textContent?.replace(/\s+/g, " ").trim() || ""; if (displayName) postData.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context)); } const captionWrap = reelsElement.querySelector(ELEMENT_SELECTORS.detailReels.caption)?.parentElement; if (captionWrap) { await this.expandCaptionIfNeeded(captionWrap, ELEMENT_SELECTORS.detailReels.seeMoreBtn, { maxClicks: 3, timeoutMs: 2000, }); const clone = captionWrap.cloneNode(true) as HTMLElement; clone.querySelectorAll("button, [role='button'], object, a").forEach((el) => el.remove()); const caption = turndownService.turndown(clone); if (caption) postData.facts.push(NewFact("fb_post_content_md", "md", caption, this.context)); } const reactionContainer = reelsElement.querySelector(ELEMENT_SELECTORS.detailReels.reactionContainer); if (reactionContainer) { const likeCount = reactionContainer.children[1]?.textContent?.trim() || ""; const commentCount = reactionContainer.children[2]?.textContent?.trim() || ""; const shareCount = reactionContainer.children[3]?.textContent?.trim() || ""; postData.facts.push(NewFact("fb_post_like_count", "integer", this.parseCount(likeCount), this.context)); postData.facts.push(NewFact("fb_post_comment_count", "integer", this.parseCount(commentCount), this.context)); postData.facts.push(NewFact("fb_post_share_count", "integer", this.parseCount(shareCount), this.context)); } // Add savedat_datetime // postData.facts.push(NewFact("savedat_datetime", "datetime", new Date().toISOString(), this.context)); return postData; } saveDetailPost(): model.FactCollection | null { const container = document.querySelector(ELEMENT_SELECTORS.detailPost.container); if (!container || this.isSaved(container)) return null; const out = model.NewFactCollection("post"); // URL post out.facts.push(NewFact("fb_post_source_url", "url", window.location.href, this.context)); // Display name const displayNameEl = container.querySelector( ELEMENT_SELECTORS.detailPost.displayName ); if (displayNameEl) { const displayName = displayNameEl.textContent?.replace(/\s+/g, " ").trim() || ""; if (displayName) out.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context)); } // Foto const photoImgs = Array.from( container.querySelectorAll(ELEMENT_SELECTORS.detailPost.photoUrls) ) .map((img) => img.src) .filter(Boolean); // Caption -> markdown let captionMd = ""; const captionEl = container.querySelector(ELEMENT_SELECTORS.detailPost.caption); if (captionEl) { captionMd = (turndownService.turndown(captionEl.innerHTML || "") || "").trim(); } // content_md: "[photo](u1) [photo-2](u2) ...\n\n" const photoLine = photoImgs .map((u, i) => (i === 0 ? `[photo](${u})` : `[photo-${i + 1}](${u})`)) .join(" "); const contentMd = [photoLine, captionMd].filter(Boolean).join("\n\n").trim(); if (contentMd) out.facts.push(NewFact("fb_post_content_md", "md", contentMd, this.context)); // // (opsional) field foto terpisah jika masih dipakai downstream // if (photoImgs.length === 1) { // out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context)); // } else if (photoImgs.length > 1) { // out.facts.push(NewFact("photoUrls", "string", JSON.stringify(photoImgs), this.context)); // out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context)); // } // Reactions const reactBox = container.querySelector( ELEMENT_SELECTORS.detailPost.reactionContainer ); if (reactBox) { const likeCount = reactBox.querySelector(ELEMENT_SELECTORS.detailPost.likeCount); if (likeCount) { const count = this.parseCount(likeCount.textContent?.trim()); out.facts.push(NewFact("fb_post_like_count", "integer", count, this.context)); } const commentCount = reactBox.querySelector( ELEMENT_SELECTORS.detailPost.commentCount ); if (commentCount) { const count = this.parseCount(commentCount.textContent?.trim()); out.facts.push(NewFact("fb_post_comment_count", "integer", count, this.context)); } const shareCount = reactBox.querySelector( ELEMENT_SELECTORS.detailPost.shareCount ); if (shareCount) { const count = this.parseCount(shareCount.textContent?.trim()); out.facts.push(NewFact("fb_post_share_count", "integer", count, this.context)); } } if (!out.facts.length) return null; this.markSaved(container); return out; } async extractPostFromProfile(postElement: HTMLElement): Promise { const captionEl = postElement.querySelector( ELEMENT_SELECTORS.postFromProfile.caption ); if (!postElement || this.isSaved(postElement) || !captionEl) return null; const out = model.NewFactCollection("post"); // 1) URL post const postUrlEl = postElement.querySelector( ELEMENT_SELECTORS.postFromProfile.postUrl ); // Href bisa relatif; normalisasi jadi absolute FB URL const rawHref = postUrlEl?.getAttribute("href") || ""; const fullPostUrl = rawHref ? (rawHref.startsWith("http") ? rawHref.split("?")[0] : `https://www.facebook.com${rawHref.split("?")[0]}`) : ""; // this.context = window.location.href; if (fullPostUrl) { this.context = fullPostUrl out.facts.push(NewFact("fb_post_source_url", "url", fullPostUrl, this.context)); } // 2) Profile URL & Display Name const profileUrlEl = postElement.querySelector( ELEMENT_SELECTORS.postFromProfile.profileUrl ); const profileNameEl = postElement.querySelector( ELEMENT_SELECTORS.postFromProfile.profileName ); const profileUrl = profileUrlEl?.getAttribute("href")?.split("?")[0] || ""; const displayName = profileNameEl?.textContent?.replace(/\s+/g, " ").trim() || ""; if (profileUrl) out.facts.push(NewFact("fb_post_profile_url", "url", profileUrl.startsWith("https://www.facebook.com/") ? profileUrl : `https://www.facebook.com${profileUrl}`, this.context)); if (displayName) out.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context)); const mediaExternal = postElement.querySelectorAll('[data-ad-rendering-role="image"]'); const photos = postElement.querySelectorAll(ELEMENT_SELECTORS.postFromProfile.photoUrl); const videos = postElement.querySelectorAll('a[attributionsrc][href*="/videos/"]'); // Extract URLs from different media types const mediaExternalUrls = Array.from(mediaExternal) .map((img) => turndownService.turndown(img.innerHTML) || "") .filter(Boolean); const photoUrls = Array.from(photos) .map((img) => { const src = (img as HTMLImageElement).src || ""; return src.split("?__cft__")[0]; // Remove __cft__ parameter }) .filter(Boolean); const videoUrls = Array.from(videos) .map((video) => { const href = video.getAttribute("href") || ""; return href.split("?__cft__")[0]; // Remove __cft__ parameter }) .filter(Boolean); // Combine all media URLs const allMediaUrls = [...mediaExternalUrls, ...photoUrls, ...videoUrls]; const captionMd = captionEl ? (turndownService.turndown(captionEl.innerHTML || "") || "").trim() : ""; // 4) Create content markdown with all media types if (allMediaUrls.length || captionMd) { const mediaLines: string[] = []; // Add external media mediaExternalUrls.forEach((url, i) => { mediaLines.push(i === 0 && mediaExternalUrls.length === 1 ? `${url}` : `[](${url})`); }); // Add regular photos photoUrls.forEach((url, i) => { const index = mediaExternalUrls.length + i + 1; mediaLines.push(index === 1 && allMediaUrls.length === 1 ? `[photo](${url})` : `[photo](${url})`); }); // Add videos videoUrls.forEach((url, i) => { // const index = i + 1; mediaLines.push(videoUrls.length === 1 ? `[video](${url})` : `[video](${url})`); }); const mediaLine = mediaLines.join(" "); const contentMd = [mediaLine, captionMd].filter(Boolean).join("\n\n").trim(); if (contentMd) out.facts.push(NewFact("fb_post_content_md", "md", contentMd, this.context)); } // // (opsional) simpan juga url foto terpisah // if (photoImgs.length === 1) { // out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context)); // } else if (photoImgs.length > 1) { // out.facts.push(NewFact("photoUrls", "string", JSON.stringify(photoImgs), this.context)); // out.facts.push(NewFact("photoUrl", "url", photoImgs[0], this.context)); // } // 5) Reactions (like, comment, share) const reactBox = postElement.querySelector( ELEMENT_SELECTORS.postFromProfile.containerInteractions ); if (reactBox) { const likeEl = reactBox.querySelector(ELEMENT_SELECTORS.postFromProfile.like); const commentEl = reactBox.querySelector(ELEMENT_SELECTORS.postFromProfile.comment); const shareEl = reactBox.querySelector(ELEMENT_SELECTORS.postFromProfile.share); const likeCount = likeEl?.textContent?.trim() || ""; const commentCount = commentEl?.textContent?.trim() || ""; const shareCount = shareEl?.textContent?.trim() || ""; out.facts.push(NewFact("fb_post_like_count", "integer", this.parseCount(likeCount), this.context)); out.facts.push(NewFact("fb_post_comment_count", "integer", this.parseCount(commentCount), this.context)); out.facts.push(NewFact("fb_post_share_count", "integer", this.parseCount(shareCount), this.context)); } // Add savedat_datetime // out.facts.push(NewFact("savedat_datetime", "datetime", new Date().toISOString(), this.context)); // Tidak ada fakta yang terkumpul? jangan tandai & return null if (!out.facts.length) return null; // Tandai container post sebagai saved (bukan ) this.markSaved(postElement); return out; } async extractReelsFromProfile(postElement: HTMLElement): Promise { // Check if this element was already processed if (this.isSaved(postElement)) { return model.NewFactCollection("post"); // Return empty collection for already processed elements } const postData = model.NewFactCollection("post"); const reelUrlEl = postElement.querySelector(ELEMENT_SELECTORS.reelFromProfile.postUrl); if (reelUrlEl) { const href = reelUrlEl.getAttribute("href") || ""; const fullUrl = href.startsWith("http") ? href.split("?")[0] : `https://www.facebook.com${href.split("?")[0]}`; if (fullUrl) { this.context = fullUrl postData.facts.push(NewFact("fb_post_source_url", "url", fullUrl, this.context)) }; } const profileUrlEl = postElement.querySelector(ELEMENT_SELECTORS.reelFromProfile.profileUrl); if (profileUrlEl) { const href = profileUrlEl.getAttribute("href") || ""; const cleanUrl = href.split("?")[0]; if (cleanUrl) postData.facts.push(NewFact("fb_post_profile_url", "url", cleanUrl.startsWith("https://www.facebook.com") ? cleanUrl : `https://www.facebook.com${cleanUrl}`, this.context)); } const displayNameEl = postElement.querySelector(ELEMENT_SELECTORS.reelFromProfile.profileName); if (displayNameEl) { const displayName = displayNameEl.textContent?.replace(/\s+/g, " ").trim() || ""; if (displayName) postData.facts.push(NewFact("fb_post_display_name", "name", displayName, this.context)); } const captionElement = postElement.querySelector(ELEMENT_SELECTORS.reelFromProfile.caption); if (captionElement) { const clone = captionElement.cloneNode(true) as HTMLElement; clone.querySelectorAll('div[role="button"], button, object, a').forEach((el) => el.remove()); const caption = turndownService.turndown(clone.innerHTML.trim()); if (caption) postData.facts.push(NewFact("fb_post_content_md", "md", caption, this.context)); } const containerInteractions = postElement.querySelector( ELEMENT_SELECTORS.reelFromProfile.containerInteractions ); if (containerInteractions) { // Filter elements that contain '__fb-' in their innerHTML and extract text content const reactionList = [...containerInteractions.querySelectorAll("div > div > div > div > div")] .filter((el) => el.innerHTML.includes("__fb-")) .map((el) => el.textContent?.trim() || ""); console.log("Reels reaction list:", reactionList); // Get counts from the filtered reaction list const likeCount = reactionList[1] || "0"; const commentCount = reactionList[2] || "0"; const shareCount = reactionList[3] || "0"; console.log("Reels counts:", { likeCount, commentCount, shareCount }); postData.facts.push(NewFact("fb_post_like_count", "integer", this.parseCount(likeCount), this.context)); postData.facts.push(NewFact("fb_post_comment_count", "integer", this.parseCount(commentCount), this.context)); postData.facts.push(NewFact("fb_post_share_count", "integer", this.parseCount(shareCount), this.context)); } const dateEl = postElement.querySelector(ELEMENT_SELECTORS.reelFromProfile.date); if (dateEl) { const dateText = dateEl.textContent?.trim() || ""; if (dateText) { postData.facts.push(NewFact("fb_post_duration_string", "string", dateText, this.context)); } } // Add savedat_datetime // postData.facts.push(NewFact("savedat_datetime", "datetime", new Date().toISOString(), this.context)); // Mark the container as saved only if we actually extracted data if (postData.facts.length > 0) { this.markSaved(postElement); } return postData; } private async expandCaptionIfNeeded( captionEl: HTMLElement, seeMoreSelector: string, opts: { maxClicks?: number; timeoutMs?: number } = {} ): Promise { const { maxClicks = 3, timeoutMs = 2000 } = opts; captionEl.scrollIntoView({ block: "nearest", inline: "nearest" }); const before = this.measureCaption(captionEl); for (let i = 0; i < maxClicks; i++) { const btn = captionEl.querySelector(seeMoreSelector); if (!btn) break; btn.click(); await this.waitForExpansion(captionEl, seeMoreSelector, timeoutMs); await this.nextPaint(); await this.nextPaint(); const after = this.measureCaption(captionEl); const expanded = after.height > before.height || after.textLen > before.textLen || !captionEl.querySelector(seeMoreSelector); if (expanded) break; } } private waitForExpansion(captionEl: HTMLElement, seeMoreSelector: string, timeoutMs: number): Promise { const start = this.measureCaption(captionEl); return new Promise((resolve) => { let done = false; const finish = () => { if (!done) { done = true; cleanup(); resolve(); } }; const ro = new ResizeObserver(() => finish()); ro.observe(captionEl); const mo = new MutationObserver(() => finish()); mo.observe(captionEl, { childList: true, subtree: true, characterData: true }); let rafId = 0; const poll = () => { if (done) return; const cur = this.measureCaption(captionEl); const grew = cur.height > start.height || cur.textLen > start.textLen; const buttonGone = !captionEl.querySelector(seeMoreSelector); if (grew || buttonGone) return finish(); rafId = requestAnimationFrame(poll); }; rafId = requestAnimationFrame(poll); const t = setTimeout(finish, timeoutMs); function cleanup() { try { ro.disconnect(); } catch { } try { mo.disconnect(); } catch { } try { cancelAnimationFrame(rafId); } catch { } clearTimeout(t); } }); } private nextPaint(): Promise { return new Promise((res) => requestAnimationFrame(() => res())); } private measureCaption(el: HTMLElement) { const rect = el.getBoundingClientRect(); return { height: Math.round(rect.height), textLen: (el.textContent || "").length }; } addClassTagged(element: Element): void { if (!element || element === document.body) return; element.setAttribute("class", (element.getAttribute("class") || "") + " tagged"); } parseCount(str = ""): string { if (typeof str !== "string") return "0"; const replyMatch = str.match(/Lihat semua (\d+)\s*balasan?|(\d+)\s*replies?/i); if (replyMatch) return replyMatch[1] || replyMatch[2]; const raw = str.trim().match(/^([\d,.]+)\s*([a-zA-Z]*)/); if (!raw) return "0"; const numPart = raw[1].replace(/\./g, "").replace(/,/g, "."); const unitPart = raw[2].toLowerCase(); const num = parseFloat(numPart); if (isNaN(num)) return "0"; const multipliers: Record = { k: 1_000, rb: 1_000, m: 1_000_000, jt: 1_000_000, b: 1_000_000_000, }; const total = Math.round(num * (multipliers[unitPart] || 1)); return String(total); } }