import { AxiosInstance, AxiosHeaders } from "axios"; import { createAxiosInstance } from "../utils/axiosInstance"; import GlobalSetting from "../models/GlobalSetting"; import { GlobalSettingAttributes } from "../models/GlobalSetting"; import * as cheerio from "cheerio"; import { config } from "../config"; import { logger } from "../utils/logger"; import { injectable } from "inversify"; interface sourceItem { messageId?: string; title?: string; completeTitle?: string; link?: string; pubDate?: string; content?: string; description?: string; image?: string; cloudLinks?: string[]; tags?: string[]; cloudType?: string; } @injectable() export class Searcher { private static instance: Searcher; private api: AxiosInstance | null = null; constructor() { this.initAxiosInstance(); Searcher.instance = this; } private async initAxiosInstance(isUpdate: boolean = false) { let globalSetting = {} as GlobalSettingAttributes; if (isUpdate) { const settings = await GlobalSetting.findOne(); globalSetting = settings?.dataValues || ({} as GlobalSettingAttributes); } this.api = createAxiosInstance( config.telegram.baseUrl, AxiosHeaders.from({ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "cache-control": "max-age=0", priority: "u=0, i", "sec-ch-ua": '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "none", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", }), globalSetting?.isProxyEnabled, globalSetting?.isProxyEnabled ? { host: globalSetting?.httpProxyHost, port: globalSetting?.httpProxyPort } : undefined ); } public static async updateAxiosInstance(): Promise { await Searcher.instance.initAxiosInstance(true); } private extractCloudLinks(text: string): { links: string[]; cloudType: string } { const links: string[] = []; let cloudType = ""; Object.values(config.cloudPatterns).forEach((pattern, index) => { const matches = text.match(pattern); if (matches) { links.push(...matches); if (!cloudType) cloudType = Object.keys(config.cloudPatterns)[index]; } }); return { links: [...new Set(links)], cloudType, }; } async searchAll(keyword: string, channelId?: string, messageId?: string) { const allResults: any[] = []; const channelList: any[] = channelId ? config.telegram.channels.filter((channel: any) => channel.id === channelId) : config.telegram.channels; // 使用Promise.all进行并行请求 const searchPromises = channelList.map(async (channel) => { try { const messageIdparams = messageId ? `before=${messageId}` : ""; const url = `/${channel.id}${keyword ? `?q=${encodeURIComponent(keyword)}&${messageIdparams}` : `?${messageIdparams}`}`; console.log(`Searching in channel ${channel.name} with URL: ${url}`); return this.searchInWeb(url).then((results) => { console.log(`Found ${results.items.length} items in channel ${channel.name}`); if (results.items.length > 0) { const channelResults = results.items .filter((item: sourceItem) => item.cloudLinks && item.cloudLinks.length > 0) .map((item: sourceItem) => ({ ...item, channel: channel.name, channelId: channel.id, })); allResults.push({ list: channelResults, channelInfo: { ...channel, channelLogo: results.channelLogo, }, id: channel.id, }); } }); } catch (error) { logger.error(`搜索频道 ${channel.name} 失败:`, error); } }); // 等待所有请求完成 await Promise.all(searchPromises); return { data: allResults, }; } async searchInWeb(url: string) { try { if (!this.api) { throw new Error("Axios instance is not initialized"); } const response = await this.api.get(url); const html = response.data; const $ = cheerio.load(html); const items: sourceItem[] = []; let channelLogo = ""; $(".tgme_header_link").each((_, element) => { channelLogo = $(element).find("img").attr("src") || ""; }); // 遍历每个消息容器 $(".tgme_widget_message_wrap").each((_, element) => { const messageEl = $(element); // 通过 data-post 属性来获取消息的链接 去除channelId 获得消息id const messageId = messageEl .find(".tgme_widget_message") .data("post") ?.toString() .split("/")[1]; // 提取标题 (第一个
标签前的内容) const title = messageEl .find(".js-message_text") .html() ?.split("
")[0] .replace(/<[^>]+>/g, "") .replace(/\n/g, "") || ""; // 提取描述 (第一个标签前面的内容,不包含标题) const content = messageEl .find(".js-message_text") .html() ?.replace(title, "") .split("/g, "") .trim() || ""; // 提取链接 (消息中的链接) // const link = messageEl.find('.tgme_widget_message').data('post'); // 提取发布时间 const pubDate = messageEl.find("time").attr("datetime"); // 提取图片 const image = messageEl .find(".tgme_widget_message_photo_wrap") .attr("style") ?.match(/url\('(.+?)'\)/)?.[1]; const tags: string[] = []; // 提取云盘链接 const links = messageEl .find(".tgme_widget_message_text a") .map((_, el) => $(el).attr("href")) .get(); messageEl.find(".tgme_widget_message_text a").each((index, element) => { const tagText = $(element).text(); if (tagText && tagText.startsWith("#")) { tags.push(tagText); } }); const cloudInfo = this.extractCloudLinks(links.join(" ")); // 添加到数组第一位 items.unshift({ messageId, title, pubDate, content, image, cloudLinks: cloudInfo.links, cloudType: cloudInfo.cloudType, tags, }); }); return { items: items, channelLogo }; } catch (error) { logger.error(`搜索错误: ${url}`, error); return { items: [], channelLogo: "", }; } } } export default new Searcher();