mirror of
https://github.com/jiangrui1994/CloudSaver.git
synced 2026-01-11 07:38:45 +08:00
223 lines
7.0 KiB
TypeScript
223 lines
7.0 KiB
TypeScript
import { AxiosInstance, AxiosHeaders } from "axios";
|
|
import { createAxiosInstance } from "../utils/axiosInstance";
|
|
import GlobalSetting from "../models/GlobalSetting";
|
|
import { GlobalSettingAttributes } from "../models/GlobalSetting";
|
|
import * as cheerio from "cheerio";
|
|
import { config } from "../config";
|
|
import { logger } from "../utils/logger";
|
|
import { injectable } from "inversify";
|
|
|
|
interface sourceItem {
|
|
messageId?: string;
|
|
title?: string;
|
|
completeTitle?: string;
|
|
link?: string;
|
|
pubDate?: string;
|
|
content?: string;
|
|
description?: string;
|
|
image?: string;
|
|
cloudLinks?: string[];
|
|
tags?: string[];
|
|
cloudType?: string;
|
|
}
|
|
|
|
@injectable()
|
|
export class Searcher {
|
|
private static instance: Searcher;
|
|
private api: AxiosInstance | null = null;
|
|
|
|
constructor() {
|
|
this.initAxiosInstance();
|
|
Searcher.instance = this;
|
|
}
|
|
|
|
private async initAxiosInstance(isUpdate: boolean = false) {
|
|
let globalSetting = {} as GlobalSettingAttributes;
|
|
if (isUpdate) {
|
|
const settings = await GlobalSetting.findOne();
|
|
globalSetting = settings?.dataValues || ({} as GlobalSettingAttributes);
|
|
}
|
|
this.api = createAxiosInstance(
|
|
config.telegram.baseUrl,
|
|
AxiosHeaders.from({
|
|
accept:
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
|
|
"cache-control": "max-age=0",
|
|
priority: "u=0, i",
|
|
"sec-ch-ua": '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": '"macOS"',
|
|
"sec-fetch-dest": "document",
|
|
"sec-fetch-mode": "navigate",
|
|
"sec-fetch-site": "none",
|
|
"sec-fetch-user": "?1",
|
|
"upgrade-insecure-requests": "1",
|
|
}),
|
|
globalSetting?.isProxyEnabled,
|
|
globalSetting?.isProxyEnabled
|
|
? { host: globalSetting?.httpProxyHost, port: globalSetting?.httpProxyPort }
|
|
: undefined
|
|
);
|
|
}
|
|
|
|
public static async updateAxiosInstance(): Promise<void> {
|
|
await Searcher.instance.initAxiosInstance(true);
|
|
}
|
|
|
|
private extractCloudLinks(text: string): { links: string[]; cloudType: string } {
|
|
const links: string[] = [];
|
|
let cloudType = "";
|
|
Object.values(config.cloudPatterns).forEach((pattern, index) => {
|
|
const matches = text.match(pattern);
|
|
if (matches) {
|
|
links.push(...matches);
|
|
if (!cloudType) cloudType = Object.keys(config.cloudPatterns)[index];
|
|
}
|
|
});
|
|
return {
|
|
links: [...new Set(links)],
|
|
cloudType,
|
|
};
|
|
}
|
|
|
|
async searchAll(keyword: string, channelId?: string, messageId?: string) {
|
|
const allResults: any[] = [];
|
|
|
|
const channelList: any[] = channelId
|
|
? config.telegram.channels.filter((channel: any) => channel.id === channelId)
|
|
: config.telegram.channels;
|
|
|
|
// 使用Promise.all进行并行请求
|
|
const searchPromises = channelList.map(async (channel) => {
|
|
try {
|
|
const messageIdparams = messageId ? `before=${messageId}` : "";
|
|
const url = `/${channel.id}${keyword ? `?q=${encodeURIComponent(keyword)}&${messageIdparams}` : `?${messageIdparams}`}`;
|
|
console.log(`Searching in channel ${channel.name} with URL: ${url}`);
|
|
return this.searchInWeb(url).then((results) => {
|
|
console.log(`Found ${results.items.length} items in channel ${channel.name}`);
|
|
if (results.items.length > 0) {
|
|
const channelResults = results.items
|
|
.filter((item: sourceItem) => item.cloudLinks && item.cloudLinks.length > 0)
|
|
.map((item: sourceItem) => ({
|
|
...item,
|
|
channel: channel.name,
|
|
channelId: channel.id,
|
|
}));
|
|
|
|
allResults.push({
|
|
list: channelResults,
|
|
channelInfo: {
|
|
...channel,
|
|
channelLogo: results.channelLogo,
|
|
},
|
|
id: channel.id,
|
|
});
|
|
}
|
|
});
|
|
} catch (error) {
|
|
logger.error(`搜索频道 ${channel.name} 失败:`, error);
|
|
}
|
|
});
|
|
|
|
// 等待所有请求完成
|
|
await Promise.all(searchPromises);
|
|
|
|
return {
|
|
data: allResults,
|
|
};
|
|
}
|
|
|
|
async searchInWeb(url: string) {
|
|
try {
|
|
if (!this.api) {
|
|
throw new Error("Axios instance is not initialized");
|
|
}
|
|
const response = await this.api.get(url);
|
|
const html = response.data;
|
|
const $ = cheerio.load(html);
|
|
const items: sourceItem[] = [];
|
|
let channelLogo = "";
|
|
$(".tgme_header_link").each((_, element) => {
|
|
channelLogo = $(element).find("img").attr("src") || "";
|
|
});
|
|
// 遍历每个消息容器
|
|
$(".tgme_widget_message_wrap").each((_, element) => {
|
|
const messageEl = $(element);
|
|
|
|
// 通过 data-post 属性来获取消息的链接 去除channelId 获得消息id
|
|
const messageId = messageEl
|
|
.find(".tgme_widget_message")
|
|
.data("post")
|
|
?.toString()
|
|
.split("/")[1];
|
|
|
|
// 提取标题 (第一个<br>标签前的内容)
|
|
const title =
|
|
messageEl
|
|
.find(".js-message_text")
|
|
.html()
|
|
?.split("<br>")[0]
|
|
.replace(/<[^>]+>/g, "")
|
|
.replace(/\n/g, "") || "";
|
|
|
|
// 提取描述 (第一个<a>标签前面的内容,不包含标题)
|
|
const content =
|
|
messageEl
|
|
.find(".js-message_text")
|
|
.html()
|
|
?.replace(title, "")
|
|
.split("<a")[0]
|
|
.replace(/<br>/g, "")
|
|
.trim() || "";
|
|
|
|
// 提取链接 (消息中的链接)
|
|
// const link = messageEl.find('.tgme_widget_message').data('post');
|
|
|
|
// 提取发布时间
|
|
const pubDate = messageEl.find("time").attr("datetime");
|
|
|
|
// 提取图片
|
|
const image = messageEl
|
|
.find(".tgme_widget_message_photo_wrap")
|
|
.attr("style")
|
|
?.match(/url\('(.+?)'\)/)?.[1];
|
|
|
|
const tags: string[] = [];
|
|
// 提取云盘链接
|
|
const links = messageEl
|
|
.find(".tgme_widget_message_text a")
|
|
.map((_, el) => $(el).attr("href"))
|
|
.get();
|
|
messageEl.find(".tgme_widget_message_text a").each((index, element) => {
|
|
const tagText = $(element).text();
|
|
if (tagText && tagText.startsWith("#")) {
|
|
tags.push(tagText);
|
|
}
|
|
});
|
|
const cloudInfo = this.extractCloudLinks(links.join(" "));
|
|
// 添加到数组第一位
|
|
items.unshift({
|
|
messageId,
|
|
title,
|
|
pubDate,
|
|
content,
|
|
image,
|
|
cloudLinks: cloudInfo.links,
|
|
cloudType: cloudInfo.cloudType,
|
|
tags,
|
|
});
|
|
});
|
|
return { items: items, channelLogo };
|
|
} catch (error) {
|
|
logger.error(`搜索错误: ${url}`, error);
|
|
return {
|
|
items: [],
|
|
channelLogo: "",
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
export default new Searcher();
|