Better embed handling

This commit is contained in:
Madeline 2022-10-01 14:44:32 +10:00
parent 547a57055f
commit e18af893f6
5 changed files with 159 additions and 129 deletions

BIN
package-lock.json generated

Binary file not shown.

View File

@ -37,6 +37,7 @@
"@types/node": "^18.7.20",
"@types/node-fetch": "^2.6.2",
"@types/node-os-utils": "^1.3.0",
"@types/probe-image-size": "^7.2.0",
"@types/sharp": "^0.31.0",
"@types/ws": "^8.5.3",
"express": "^4.18.1",
@ -72,6 +73,7 @@
"node-fetch": "^2.6.7",
"node-os-utils": "^1.3.7",
"picocolors": "^1.0.0",
"probe-image-size": "^7.2.3",
"proxy-agent": "^5.0.0",
"sharp": "^0.31.0",
"sqlite3": "^5.1.1",

View File

@ -24,9 +24,8 @@ import {
MessageCreateSchema,
} from "@fosscord/util";
import { HTTPError } from "lambert-server";
import fetch from "node-fetch";
import cheerio from "cheerio";
import { In } from "typeorm";
import { EmbedHandlers } from "@fosscord/api";
const allow_empty = false;
// TODO: check webhook, application, system author, stickers
// TODO: embed gifs/videos/images
@ -34,18 +33,6 @@ const allow_empty = false;
const LINK_REGEX =
/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/g;
const DEFAULT_FETCH_OPTIONS: any = {
redirect: "follow",
follow: 1,
headers: {
"user-agent":
"Mozilla/5.0 (compatible; Fosscord/1.0; +https://github.com/fosscord/fosscord)",
},
// size: 1024 * 1024 * 5, // grabbed from config later
compress: true,
method: "GET",
};
export async function handleMessage(opts: MessageOptions): Promise<Message> {
const channel = await Channel.findOneOrFail({
where: { id: opts.channel_id },
@ -200,124 +187,24 @@ export async function postHandleMessage(message: Message) {
links = links.slice(0, 20) as RegExpMatchArray; // embed max 20 links — TODO: make this configurable with instance policies
const { endpointPublic, resizeWidthMax, resizeHeightMax } =
Config.get().cdn;
for (const link of links) {
let embed: Embed;
const url = new URL(link);
// bit gross, but whatever!
const { endpointPublic } = Config.get().cdn;
const handler = url.hostname == new URL(endpointPublic!).hostname ? EmbedHandlers["self"] : EmbedHandlers[url.hostname] || EmbedHandlers["default"];
try {
const request = await fetch(link, {
...DEFAULT_FETCH_OPTIONS,
size: Config.get().limits.message.maxEmbedDownloadSize,
});
const res = await handler(url);
if (!res) continue;
embed = res;
}
catch (e) {
continue;
}
let embed: Embed;
const type = request.headers.get("content-type");
if (type?.indexOf("image") == 0) {
embed = {
provider: {
url: link,
name: new URL(link).hostname,
},
image: {
// can't be bothered rn
proxy_url: `${endpointPublic}/external/resize/${encodeURIComponent(
link,
)}?width=500&height=400`,
url: link,
width: 500,
height: 400,
},
};
data.embeds.push(embed);
} else {
const text = await request.text();
const $ = cheerio.load(text);
const title = $('meta[property="og:title"]').attr("content");
const provider_name = $('meta[property="og:site_name"]').text();
const author_name = $('meta[property="article:author"]').attr(
"content",
);
const description =
$('meta[property="og:description"]').attr("content") ||
$('meta[property="description"]').attr("content");
const image = $('meta[property="og:image"]').attr("content");
const width =
parseInt(
$('meta[property="og:image:width"]').attr("content") ||
"",
) || undefined;
const height =
parseInt(
$('meta[property="og:image:height"]').attr("content") ||
"",
) || undefined;
const url = $('meta[property="og:url"]').attr("content");
// TODO: color
embed = {
provider: {
url: link,
name: provider_name,
},
};
const resizeWidth = Math.min(resizeWidthMax ?? 1, width ?? 100);
const resizeHeight = Math.min(
resizeHeightMax ?? 1,
height ?? 100,
);
if (author_name) embed.author = { name: author_name };
if (image)
embed.thumbnail = {
proxy_url: `${endpointPublic}/external/resize/${encodeURIComponent(
image,
)}?width=${resizeWidth}&height=${resizeHeight}`,
url: image,
width: width,
height: height,
};
if (title) embed.title = title;
if (url) embed.url = url;
if (description) embed.description = description;
const approvedProviders = [
"media4.giphy.com",
"c.tenor.com",
// todo: make configurable? don't really care tho
];
// very bad code below
// don't care lol
if (
embed?.thumbnail?.url &&
approvedProviders.indexOf(
new URL(embed.thumbnail.url).hostname,
) !== -1
) {
embed = {
provider: {
url: link,
name: new URL(link).hostname,
},
image: {
proxy_url: `${endpointPublic}/external/resize/${encodeURIComponent(
image!,
)}?width=${resizeWidth}&height=${resizeHeight}`,
url: image,
width: width,
height: height,
},
};
}
if (title || description) {
data.embeds.push(embed);
}
}
} catch (error) { }
data.embeds.push(embed);
}
await Promise.all([

View File

@ -7,3 +7,4 @@ export * from "./handlers/route";
export * from "./utility/String";
export * from "./handlers/Voice";
export * from "./utility/captcha";
export * from "./utility/EmbedHandlers";

View File

@ -0,0 +1,140 @@
import { Config, Embed, EmbedType } from "@fosscord/util";
import fetch, { Response } from "node-fetch";
import * as cheerio from "cheerio";
import probe from "probe-image-size";
export const DEFAULT_FETCH_OPTIONS: any = {
redirect: "follow",
follow: 1,
headers: {
"user-agent":
"Mozilla/5.0 (compatible; Fosscord/1.0; +https://github.com/fosscord/fosscord)",
},
// size: 1024 * 1024 * 5, // grabbed from config later
compress: true,
method: "GET",
};
export const getProxyUrl = (url: URL, width: number, height: number) => {
const { endpointPublic, resizeWidthMax, resizeHeightMax } = Config.get().cdn;
width = Math.min(width || 500, resizeWidthMax || width);
height = Math.min(height || 500, resizeHeightMax || width);
return `${endpointPublic}/external/resize/${encodeURIComponent(url.href)}?width=${width}&height=${height}`;
};
export const getMetaDescriptions = async (url: URL) => {
let response: Response;
try {
response = await fetch(url, {
...DEFAULT_FETCH_OPTIONS,
size: Config.get().limits.message.maxEmbedDownloadSize,
});
}
catch (e) {
return null;
}
const text = await response.text();
const $ = cheerio.load(text);
return {
title: $('meta[property="og:title"]').attr("content"),
provider_name: $('meta[property="og:site_name"]').text(),
author: $('meta[property="article:author"]').attr("content"),
description:
$('meta[property="og:description"]').attr("content") ||
$('meta[property="description"]').attr("content"),
image: $('meta[property="og:image"]').attr("content"),
width: parseInt(
$('meta[property="og:image:width"]').attr("content") ||
"",
) || undefined,
height: parseInt(
$('meta[property="og:image:height"]').attr("content") ||
"",
) || undefined,
url: $('meta[property="og:url"]').attr("content"),
youtube_embed: $(`meta[property="og:video:secure_url"]`).attr("content")
};
};
const genericImageHandler = async (url: URL): Promise<Embed | null> => {
const metas = await getMetaDescriptions(url);
if (!metas) return null;
const result = await probe(url.href);
const width = metas.width || result.width;
const height = metas.height || result.height;
return {
url: url.href,
type: EmbedType.image,
thumbnail: {
width: width,
height: height,
url: url.href,
proxy_url: getProxyUrl(url, result.width, result.height),
}
};
};
export const EmbedHandlers: { [key: string]: (url: URL) => Promise<Embed | null>; } = {
// the url does not have a special handler
"default": genericImageHandler,
"giphy.com": genericImageHandler,
"media4.giphy.com": genericImageHandler,
"tenor.com": genericImageHandler,
"c.tenor.com": genericImageHandler,
"media.tenor.com": genericImageHandler,
"www.youtube.com": async (url: URL): Promise<Embed | null> => {
const metas = await getMetaDescriptions(url);
if (!metas) return null;
return {
video: {
// TODO: does this adjust with aspect ratio?
width: metas.width,
height: metas.height,
url: metas.youtube_embed!,
},
url: url.href,
type: EmbedType.video,
title: metas.title,
thumbnail: {
width: metas.width,
height: metas.height,
url: metas.image,
proxy_url: getProxyUrl(new URL(metas.image!), metas.width!, metas.height!),
},
provider: {
url: "https://www.youtube.com",
name: "YouTube",
},
description: metas.description,
color: 16711680,
author: {
name: metas.author,
// TODO: author channel url
}
};
},
// the url is an image from this instance
"self": async (url: URL): Promise<Embed | null> => {
const result = await probe(url.href);
return {
url: url.href,
type: EmbedType.image,
thumbnail: {
width: result.width,
height: result.height,
url: url.href,
proxy_url: url.href,
}
};
},
};