From f9cfb46e73b2e51d0c48fe509594bc3338ed65ac Mon Sep 17 00:00:00 2001 From: Zane Helton Date: Sat, 28 Jun 2025 01:38:46 -0400 Subject: [PATCH] Fix various issues with embeds on message updates The `postHandleMessage` function is parsing links and adding them to the EmbedCache table if necessary. In the previous implementation, message updates would push to the embeds unconditionally. This commit parses links from the message and: 1. Normalizes the URLs - Useful for deduplicating similar URLs 2. Remove embeds with matching normalized URLs - Leaves all embeds except for ones with a `.url` property matching the updated message - This allows embeds to be re-ordered if the URL is moved 3. If no normalized URLs are found, remove all embeds 4. Take the deduplicated + normalized URLs and add an embed to the message and insert into the EmbedCache table (if necessary) This enables: 1. Embeds to be re-ordered by re-ordering links 2. Embeds to be removed by removing links and fixes: 1. Duplicate embeds being attached to a message when edited --- src/api/util/handlers/Message.ts | 106 +++++++++++++++++++++++++++---- src/util/util/Url.ts | 32 ++++++++++ src/util/util/index.ts | 1 + 3 files changed, 125 insertions(+), 14 deletions(-) create mode 100644 src/util/util/Url.ts diff --git a/src/api/util/handlers/Message.ts b/src/api/util/handlers/Message.ts index 1733f7cb..869f0149 100644 --- a/src/api/util/handlers/Message.ts +++ b/src/api/util/handlers/Message.ts @@ -45,6 +45,7 @@ import { Webhook, handleFile, Permissions, + normalizeUrl, } from "@spacebar/util"; import { HTTPError } from "lambert-server"; import { In } from "typeorm"; @@ -270,23 +271,98 @@ export async function handleMessage(opts: MessageOptions): Promise { // TODO: cache link result in db export async function postHandleMessage(message: Message) { const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown - let links = content?.match(LINK_REGEX); - if (!links) return; + + const linkMatches = content?.match(LINK_REGEX) || []; const data = { ...message }; - data.embeds = data.embeds.filter((x) => x.type !== "link"); - links = links.slice(0, 20) as RegExpMatchArray; // embed max 20 links — TODO: make this configurable with instance policies + const currentNormalizedUrls = new Set(); + for (const link of linkMatches) { + // Don't process links in <> + if (link.startsWith("<") && link.endsWith(">")) { + continue; + } + try { + const normalized = normalizeUrl(link); + currentNormalizedUrls.add(normalized); + } catch (e) { + continue; + } + } - const cachePromises = []; + // Remove existing embeds whose URLs ARE in the current message (we'll regenerate them) + data.embeds = data.embeds.filter((embed) => { + if (!embed.url) { + return true; + } + try { + const normalizedEmbedUrl = normalizeUrl(embed.url); + const shouldRemove = currentNormalizedUrls.has(normalizedEmbedUrl); + return !shouldRemove; + } catch { + return true; + } + }); - for (const link of links) { + const seenNormalizedUrls = new Set(); + const uniqueLinks: string[] = []; + + for (const link of linkMatches.slice(0, 20)) { + // embed max 20 links - TODO: make this configurable with instance policies // Don't embed links in <> if (link.startsWith("<") && link.endsWith(">")) continue; - const url = new URL(link); + try { + const normalized = normalizeUrl(link); + + if (!seenNormalizedUrls.has(normalized)) { + seenNormalizedUrls.add(normalized); + uniqueLinks.push(link); + } + } catch (e) { + // Invalid URL, skip + continue; + } + } + + if (uniqueLinks.length === 0) { + // No valid unique links found, update message to remove old embeds + data.embeds = data.embeds.filter((embed) => { + const hasUrl = !!embed.url; + return !hasUrl; + }); + await Promise.all([ + emitEvent({ + event: "MESSAGE_UPDATE", + channel_id: message.channel_id, + data, + } as MessageUpdateEvent), + Message.update( + { id: message.id, channel_id: message.channel_id }, + { embeds: data.embeds }, + ), + ]); + return; + } + + const cachePromises = []; + + for (const link of uniqueLinks) { + let url: URL; + try { + url = new URL(link); + } catch (e) { + // Skip invalid URLs + continue; + } + + const normalizedUrl = normalizeUrl(link); + + // Check cache using normalized URL + const cached = await EmbedCache.findOne({ + where: { url: normalizedUrl }, + }); - const cached = await EmbedCache.findOne({ where: { url: link } }); if (cached) { data.embeds.push(cached.embed); continue; @@ -296,7 +372,7 @@ export async function postHandleMessage(message: Message) { const endpointPublic = Config.get().cdn.endpointPublic || "http://127.0.0.1"; // lol const handler = - url.hostname == new URL(endpointPublic).hostname + url.hostname === new URL(endpointPublic).hostname ? EmbedHandlers["self"] : EmbedHandlers[url.hostname] || EmbedHandlers["default"]; @@ -307,26 +383,28 @@ export async function postHandleMessage(message: Message) { if (!Array.isArray(res)) res = [res]; for (const embed of res) { + // Cache with normalized URL const cache = EmbedCache.create({ - url: link, + url: normalizedUrl, embed: embed, }); cachePromises.push(cache.save()); data.embeds.push(embed); } } catch (e) { - console.error(`[Embeds] Error while generating embed`, e); + console.error( + `[Embeds] Error while generating embed for ${link}`, + e, + ); Sentry.captureException(e, (scope) => { scope.clear(); - scope.setContext("request", { url }); + scope.setContext("request", { url: link }); return scope; }); continue; } } - if (!data.embeds) return; - await Promise.all([ emitEvent({ event: "MESSAGE_UPDATE", diff --git a/src/util/util/Url.ts b/src/util/util/Url.ts new file mode 100644 index 00000000..a2349126 --- /dev/null +++ b/src/util/util/Url.ts @@ -0,0 +1,32 @@ +/** + * Normalize a URL by: + * - Removing trailing slashes (except root path) + * - Sorting query params alphabetically + * - Removing empty query strings + * - Removing fragments + */ +export function normalizeUrl(input: string): string { + try { + const u = new URL(input); + // Remove fragment + u.hash = ""; + // Normalize pathname - remove trailing slash except for root "/" + if (u.pathname !== "/" && u.pathname.endsWith("/")) { + u.pathname = u.pathname.slice(0, -1); + } + // Normalize query params: sort by key + if (u.search) { + const params = Array.from(u.searchParams.entries()); + params.sort(([a], [b]) => a.localeCompare(b)); + u.search = params.length + ? "?" + params.map(([k, v]) => `${k}=${v}`).join("&") + : ""; + } else { + // Ensure no empty search string + u.search = ""; + } + return u.toString(); + } catch (e) { + return input; + } +} diff --git a/src/util/util/index.ts b/src/util/util/index.ts index f55315e3..4383c1d9 100644 --- a/src/util/util/index.ts +++ b/src/util/util/index.ts @@ -41,6 +41,7 @@ export * from "./String"; export * from "./Token"; export * from "./TraverseDirectory"; export * from "./WebAuthn"; +export * from "./Url"; export * from "./Gifs"; export * from "./Application"; export * from "./NameValidation";