Fix various issues with embeds on message updates

The `postHandleMessage` function is parsing links and adding them to the EmbedCache table if necessary. In the previous implementation, message updates would push to the embeds unconditionally. This commit parses links from the message and: 1. Normalizes the URLs - Useful for deduplicating similar URLs 2. Remove embeds with matching normalized URLs - Leaves all embeds except for ones with a `.url` property matching the updated message - This allows embeds to be re-ordered if the URL is moved 3. If no normalized URLs are found, remove all embeds 4. Take the deduplicated + normalized URLs and add an embed to the message and insert into the EmbedCache table (if necessary) This enables: 1. Embeds to be re-ordered by re-ordering links 2. Embeds to be removed by removing links and fixes: 1. Duplicate embeds being attached to a message when edited
2025-06-28 01:38:46 -04:00 · 2025-06-28 01:38:46 -04:00 · f9cfb46e73
commit f9cfb46e73
parent c0f9c22e67
3 changed files with 125 additions and 14 deletions
--- a/src/api/util/handlers/Message.ts
+++ b/src/api/util/handlers/Message.ts
@ -45,6 +45,7 @@ import {
 	Webhook,
 	handleFile,
 	Permissions,
+	normalizeUrl,
 } from "@spacebar/util";
 import { HTTPError } from "lambert-server";
 import { In } from "typeorm";
@ -270,23 +271,98 @@ export async function handleMessage(opts: MessageOptions): Promise<Message> {
 // TODO: cache link result in db
 export async function postHandleMessage(message: Message) {
 	const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown
-	let links = content?.match(LINK_REGEX);
-	if (!links) return;
+
+	const linkMatches = content?.match(LINK_REGEX) || [];

 	const data = { ...message };
-	data.embeds = data.embeds.filter((x) => x.type !== "link");

-	links = links.slice(0, 20) as RegExpMatchArray; // embed max 20 links — TODO: make this configurable with instance policies
+	const currentNormalizedUrls = new Set<string>();
+	for (const link of linkMatches) {
+		// Don't process links in <>
+		if (link.startsWith("<") && link.endsWith(">")) {
+			continue;
+		}
+		try {
+			const normalized = normalizeUrl(link);
+			currentNormalizedUrls.add(normalized);
+		} catch (e) {
+			continue;
+		}
+	}

-	const cachePromises = [];
+	// Remove existing embeds whose URLs ARE in the current message (we'll regenerate them)
+	data.embeds = data.embeds.filter((embed) => {
+		if (!embed.url) {
+			return true;
+		}
+		try {
+			const normalizedEmbedUrl = normalizeUrl(embed.url);
+			const shouldRemove = currentNormalizedUrls.has(normalizedEmbedUrl);
+			return !shouldRemove;
+		} catch {
+			return true;
+		}
+	});

-	for (const link of links) {
+	const seenNormalizedUrls = new Set<string>();
+	const uniqueLinks: string[] = [];
+
+	for (const link of linkMatches.slice(0, 20)) {
+		// embed max 20 links - TODO: make this configurable with instance policies
 		// Don't embed links in <>
 		if (link.startsWith("<") && link.endsWith(">")) continue;

-		const url = new URL(link);
+		try {
+			const normalized = normalizeUrl(link);
+
+			if (!seenNormalizedUrls.has(normalized)) {
+				seenNormalizedUrls.add(normalized);
+				uniqueLinks.push(link);
+			}
+		} catch (e) {
+			// Invalid URL, skip
+			continue;
+		}
+	}
+
+	if (uniqueLinks.length === 0) {
+		// No valid unique links found, update message to remove old embeds
+		data.embeds = data.embeds.filter((embed) => {
+			const hasUrl = !!embed.url;
+			return !hasUrl;
+		});
+		await Promise.all([
+			emitEvent({
+				event: "MESSAGE_UPDATE",
+				channel_id: message.channel_id,
+				data,
+			} as MessageUpdateEvent),
+			Message.update(
+				{ id: message.id, channel_id: message.channel_id },
+				{ embeds: data.embeds },
+			),
+		]);
+		return;
+	}
+
+	const cachePromises = [];
+
+	for (const link of uniqueLinks) {
+		let url: URL;
+		try {
+			url = new URL(link);
+		} catch (e) {
+			// Skip invalid URLs
+			continue;
+		}
+
+		const normalizedUrl = normalizeUrl(link);
+
+		// Check cache using normalized URL
+		const cached = await EmbedCache.findOne({
+			where: { url: normalizedUrl },
+		});

-		const cached = await EmbedCache.findOne({ where: { url: link } });
 		if (cached) {
 			data.embeds.push(cached.embed);
 			continue;
@ -296,7 +372,7 @@ export async function postHandleMessage(message: Message) {
 		const endpointPublic =
 			Config.get().cdn.endpointPublic || "http://127.0.0.1"; // lol
 		const handler =
-			url.hostname == new URL(endpointPublic).hostname
+			url.hostname === new URL(endpointPublic).hostname
 				? EmbedHandlers["self"]
 				: EmbedHandlers[url.hostname] || EmbedHandlers["default"];

@ -307,26 +383,28 @@ export async function postHandleMessage(message: Message) {
 			if (!Array.isArray(res)) res = [res];

 			for (const embed of res) {
+				// Cache with normalized URL
 				const cache = EmbedCache.create({
-					url: link,
+					url: normalizedUrl,
 					embed: embed,
 				});
 				cachePromises.push(cache.save());
 				data.embeds.push(embed);
 			}
 		} catch (e) {
-			console.error(`[Embeds] Error while generating embed`, e);
+			console.error(
+				`[Embeds] Error while generating embed for ${link}`,
+				e,
+			);
 			Sentry.captureException(e, (scope) => {
 				scope.clear();
-				scope.setContext("request", { url });
+				scope.setContext("request", { url: link });
 				return scope;
 			});
 			continue;
 		}
 	}

-	if (!data.embeds) return;
-
 	await Promise.all([
 		emitEvent({
 			event: "MESSAGE_UPDATE",
--- a/src/util/util/Url.ts
+++ b/src/util/util/Url.ts
@ -0,0 +1,32 @@
+/**
+ * Normalize a URL by:
+ * - Removing trailing slashes (except root path)
+ * - Sorting query params alphabetically
+ * - Removing empty query strings
+ * - Removing fragments
+ */
+export function normalizeUrl(input: string): string {
+	try {
+		const u = new URL(input);
+		// Remove fragment
+		u.hash = "";
+		// Normalize pathname - remove trailing slash except for root "/"
+		if (u.pathname !== "/" && u.pathname.endsWith("/")) {
+			u.pathname = u.pathname.slice(0, -1);
+		}
+		// Normalize query params: sort by key
+		if (u.search) {
+			const params = Array.from(u.searchParams.entries());
+			params.sort(([a], [b]) => a.localeCompare(b));
+			u.search = params.length
+				? "?" + params.map(([k, v]) => `${k}=${v}`).join("&")
+				: "";
+		} else {
+			// Ensure no empty search string
+			u.search = "";
+		}
+		return u.toString();
+	} catch (e) {
+		return input;
+	}
+}
--- a/src/util/util/index.ts
+++ b/src/util/util/index.ts
@ -41,6 +41,7 @@ export * from "./String";
 export * from "./Token";
 export * from "./TraverseDirectory";
 export * from "./WebAuthn";
+export * from "./Url";
 export * from "./Gifs";
 export * from "./Application";
 export * from "./NameValidation";