diff --git a/src/api/util/handlers/Message.ts b/src/api/util/handlers/Message.ts index 1733f7cb..869f0149 100644 --- a/src/api/util/handlers/Message.ts +++ b/src/api/util/handlers/Message.ts @@ -45,6 +45,7 @@ import { Webhook, handleFile, Permissions, + normalizeUrl, } from "@spacebar/util"; import { HTTPError } from "lambert-server"; import { In } from "typeorm"; @@ -270,23 +271,98 @@ export async function handleMessage(opts: MessageOptions): Promise { // TODO: cache link result in db export async function postHandleMessage(message: Message) { const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown - let links = content?.match(LINK_REGEX); - if (!links) return; + + const linkMatches = content?.match(LINK_REGEX) || []; const data = { ...message }; - data.embeds = data.embeds.filter((x) => x.type !== "link"); - links = links.slice(0, 20) as RegExpMatchArray; // embed max 20 links — TODO: make this configurable with instance policies + const currentNormalizedUrls = new Set(); + for (const link of linkMatches) { + // Don't process links in <> + if (link.startsWith("<") && link.endsWith(">")) { + continue; + } + try { + const normalized = normalizeUrl(link); + currentNormalizedUrls.add(normalized); + } catch (e) { + continue; + } + } - const cachePromises = []; + // Remove existing embeds whose URLs ARE in the current message (we'll regenerate them) + data.embeds = data.embeds.filter((embed) => { + if (!embed.url) { + return true; + } + try { + const normalizedEmbedUrl = normalizeUrl(embed.url); + const shouldRemove = currentNormalizedUrls.has(normalizedEmbedUrl); + return !shouldRemove; + } catch { + return true; + } + }); - for (const link of links) { + const seenNormalizedUrls = new Set(); + const uniqueLinks: string[] = []; + + for (const link of linkMatches.slice(0, 20)) { + // embed max 20 links - TODO: make this configurable with instance policies // Don't embed links in <> if (link.startsWith("<") && link.endsWith(">")) continue; - const url = new URL(link); + try { + const normalized = normalizeUrl(link); + + if (!seenNormalizedUrls.has(normalized)) { + seenNormalizedUrls.add(normalized); + uniqueLinks.push(link); + } + } catch (e) { + // Invalid URL, skip + continue; + } + } + + if (uniqueLinks.length === 0) { + // No valid unique links found, update message to remove old embeds + data.embeds = data.embeds.filter((embed) => { + const hasUrl = !!embed.url; + return !hasUrl; + }); + await Promise.all([ + emitEvent({ + event: "MESSAGE_UPDATE", + channel_id: message.channel_id, + data, + } as MessageUpdateEvent), + Message.update( + { id: message.id, channel_id: message.channel_id }, + { embeds: data.embeds }, + ), + ]); + return; + } + + const cachePromises = []; + + for (const link of uniqueLinks) { + let url: URL; + try { + url = new URL(link); + } catch (e) { + // Skip invalid URLs + continue; + } + + const normalizedUrl = normalizeUrl(link); + + // Check cache using normalized URL + const cached = await EmbedCache.findOne({ + where: { url: normalizedUrl }, + }); - const cached = await EmbedCache.findOne({ where: { url: link } }); if (cached) { data.embeds.push(cached.embed); continue; @@ -296,7 +372,7 @@ export async function postHandleMessage(message: Message) { const endpointPublic = Config.get().cdn.endpointPublic || "http://127.0.0.1"; // lol const handler = - url.hostname == new URL(endpointPublic).hostname + url.hostname === new URL(endpointPublic).hostname ? EmbedHandlers["self"] : EmbedHandlers[url.hostname] || EmbedHandlers["default"]; @@ -307,26 +383,28 @@ export async function postHandleMessage(message: Message) { if (!Array.isArray(res)) res = [res]; for (const embed of res) { + // Cache with normalized URL const cache = EmbedCache.create({ - url: link, + url: normalizedUrl, embed: embed, }); cachePromises.push(cache.save()); data.embeds.push(embed); } } catch (e) { - console.error(`[Embeds] Error while generating embed`, e); + console.error( + `[Embeds] Error while generating embed for ${link}`, + e, + ); Sentry.captureException(e, (scope) => { scope.clear(); - scope.setContext("request", { url }); + scope.setContext("request", { url: link }); return scope; }); continue; } } - if (!data.embeds) return; - await Promise.all([ emitEvent({ event: "MESSAGE_UPDATE", diff --git a/src/util/util/Url.ts b/src/util/util/Url.ts new file mode 100644 index 00000000..a2349126 --- /dev/null +++ b/src/util/util/Url.ts @@ -0,0 +1,32 @@ +/** + * Normalize a URL by: + * - Removing trailing slashes (except root path) + * - Sorting query params alphabetically + * - Removing empty query strings + * - Removing fragments + */ +export function normalizeUrl(input: string): string { + try { + const u = new URL(input); + // Remove fragment + u.hash = ""; + // Normalize pathname - remove trailing slash except for root "/" + if (u.pathname !== "/" && u.pathname.endsWith("/")) { + u.pathname = u.pathname.slice(0, -1); + } + // Normalize query params: sort by key + if (u.search) { + const params = Array.from(u.searchParams.entries()); + params.sort(([a], [b]) => a.localeCompare(b)); + u.search = params.length + ? "?" + params.map(([k, v]) => `${k}=${v}`).join("&") + : ""; + } else { + // Ensure no empty search string + u.search = ""; + } + return u.toString(); + } catch (e) { + return input; + } +} diff --git a/src/util/util/index.ts b/src/util/util/index.ts index f55315e3..4383c1d9 100644 --- a/src/util/util/index.ts +++ b/src/util/util/index.ts @@ -41,6 +41,7 @@ export * from "./String"; export * from "./Token"; export * from "./TraverseDirectory"; export * from "./WebAuthn"; +export * from "./Url"; export * from "./Gifs"; export * from "./Application"; export * from "./NameValidation";