Fix various issues with embeds on message updates

The `postHandleMessage` function is parsing links and adding them to the
EmbedCache table if necessary. In the previous implementation, message
updates would push to the embeds unconditionally.

This commit parses links from the message and:

1. Normalizes the URLs
  - Useful for deduplicating similar URLs
2. Remove embeds with matching normalized URLs
  - Leaves all embeds except for ones with a `.url` property matching
  the updated message
  - This allows embeds to be re-ordered if the URL is moved
3. If no normalized URLs are found, remove all embeds
4. Take the deduplicated + normalized URLs and add an embed to the message and
insert into the EmbedCache table (if necessary)

This enables:

1. Embeds to be re-ordered by re-ordering links
2. Embeds to be removed by removing links

and fixes:

1. Duplicate embeds being attached to a message when edited
This commit is contained in:
Zane Helton 2025-06-28 01:38:46 -04:00 committed by Madeline
parent c0f9c22e67
commit f9cfb46e73
3 changed files with 125 additions and 14 deletions

View File

@ -45,6 +45,7 @@ import {
Webhook,
handleFile,
Permissions,
normalizeUrl,
} from "@spacebar/util";
import { HTTPError } from "lambert-server";
import { In } from "typeorm";
@ -270,23 +271,98 @@ export async function handleMessage(opts: MessageOptions): Promise<Message> {
// TODO: cache link result in db
export async function postHandleMessage(message: Message) {
const content = message.content?.replace(/ *`[^)]*` */g, ""); // remove markdown
let links = content?.match(LINK_REGEX);
if (!links) return;
const linkMatches = content?.match(LINK_REGEX) || [];
const data = { ...message };
data.embeds = data.embeds.filter((x) => x.type !== "link");
links = links.slice(0, 20) as RegExpMatchArray; // embed max 20 links — TODO: make this configurable with instance policies
const currentNormalizedUrls = new Set<string>();
for (const link of linkMatches) {
// Don't process links in <>
if (link.startsWith("<") && link.endsWith(">")) {
continue;
}
try {
const normalized = normalizeUrl(link);
currentNormalizedUrls.add(normalized);
} catch (e) {
continue;
}
}
const cachePromises = [];
// Remove existing embeds whose URLs ARE in the current message (we'll regenerate them)
data.embeds = data.embeds.filter((embed) => {
if (!embed.url) {
return true;
}
try {
const normalizedEmbedUrl = normalizeUrl(embed.url);
const shouldRemove = currentNormalizedUrls.has(normalizedEmbedUrl);
return !shouldRemove;
} catch {
return true;
}
});
for (const link of links) {
const seenNormalizedUrls = new Set<string>();
const uniqueLinks: string[] = [];
for (const link of linkMatches.slice(0, 20)) {
// embed max 20 links - TODO: make this configurable with instance policies
// Don't embed links in <>
if (link.startsWith("<") && link.endsWith(">")) continue;
const url = new URL(link);
try {
const normalized = normalizeUrl(link);
if (!seenNormalizedUrls.has(normalized)) {
seenNormalizedUrls.add(normalized);
uniqueLinks.push(link);
}
} catch (e) {
// Invalid URL, skip
continue;
}
}
if (uniqueLinks.length === 0) {
// No valid unique links found, update message to remove old embeds
data.embeds = data.embeds.filter((embed) => {
const hasUrl = !!embed.url;
return !hasUrl;
});
await Promise.all([
emitEvent({
event: "MESSAGE_UPDATE",
channel_id: message.channel_id,
data,
} as MessageUpdateEvent),
Message.update(
{ id: message.id, channel_id: message.channel_id },
{ embeds: data.embeds },
),
]);
return;
}
const cachePromises = [];
for (const link of uniqueLinks) {
let url: URL;
try {
url = new URL(link);
} catch (e) {
// Skip invalid URLs
continue;
}
const normalizedUrl = normalizeUrl(link);
// Check cache using normalized URL
const cached = await EmbedCache.findOne({
where: { url: normalizedUrl },
});
const cached = await EmbedCache.findOne({ where: { url: link } });
if (cached) {
data.embeds.push(cached.embed);
continue;
@ -296,7 +372,7 @@ export async function postHandleMessage(message: Message) {
const endpointPublic =
Config.get().cdn.endpointPublic || "http://127.0.0.1"; // lol
const handler =
url.hostname == new URL(endpointPublic).hostname
url.hostname === new URL(endpointPublic).hostname
? EmbedHandlers["self"]
: EmbedHandlers[url.hostname] || EmbedHandlers["default"];
@ -307,26 +383,28 @@ export async function postHandleMessage(message: Message) {
if (!Array.isArray(res)) res = [res];
for (const embed of res) {
// Cache with normalized URL
const cache = EmbedCache.create({
url: link,
url: normalizedUrl,
embed: embed,
});
cachePromises.push(cache.save());
data.embeds.push(embed);
}
} catch (e) {
console.error(`[Embeds] Error while generating embed`, e);
console.error(
`[Embeds] Error while generating embed for ${link}`,
e,
);
Sentry.captureException(e, (scope) => {
scope.clear();
scope.setContext("request", { url });
scope.setContext("request", { url: link });
return scope;
});
continue;
}
}
if (!data.embeds) return;
await Promise.all([
emitEvent({
event: "MESSAGE_UPDATE",

32
src/util/util/Url.ts Normal file
View File

@ -0,0 +1,32 @@
/**
* Normalize a URL by:
* - Removing trailing slashes (except root path)
* - Sorting query params alphabetically
* - Removing empty query strings
* - Removing fragments
*/
export function normalizeUrl(input: string): string {
try {
const u = new URL(input);
// Remove fragment
u.hash = "";
// Normalize pathname - remove trailing slash except for root "/"
if (u.pathname !== "/" && u.pathname.endsWith("/")) {
u.pathname = u.pathname.slice(0, -1);
}
// Normalize query params: sort by key
if (u.search) {
const params = Array.from(u.searchParams.entries());
params.sort(([a], [b]) => a.localeCompare(b));
u.search = params.length
? "?" + params.map(([k, v]) => `${k}=${v}`).join("&")
: "";
} else {
// Ensure no empty search string
u.search = "";
}
return u.toString();
} catch (e) {
return input;
}
}

View File

@ -41,6 +41,7 @@ export * from "./String";
export * from "./Token";
export * from "./TraverseDirectory";
export * from "./WebAuthn";
export * from "./Url";
export * from "./Gifs";
export * from "./Application";
export * from "./NameValidation";