obsidian-douban/src/org/wanxp/douban/data/search/SearchParser.ts
2023-12-21 10:25:28 +08:00

53 lines
1.9 KiB
TypeScript

import {CheerioAPI, load} from "cheerio";
import DoubanSearchResultSubject from "../model/DoubanSearchResultSubject";
import {SearchPage} from "../model/SearchPage";
import {log} from "../../../utils/Logutil";
import {SupportType} from "../../../constant/Constsant";
export default class SearchParserHandler {
static parseSearch(dataHtml: CheerioAPI): DoubanSearchResultSubject[] {
return dataHtml('.result')
.get()
.map((i: any) => {
const item = dataHtml(i);
let idPattern = /(\d){5,10}/g;
let urlPattern = /(https%3A%2F%2F)\S+(\d){5,10}(%2F)/g;
let linkValue = item.find("div.content > div > h3 > a").attr("href");
let ececResult = idPattern.exec(linkValue);
let urlResult = urlPattern.exec(linkValue);
let cast = item.find(".subject-cast").text();
let score = item.find(".rating_nums").text();
let title = item.find("div.content > div > h3 > a").text();
let type = item.find("div.content > div > h3 > span").text();
let desc = item.find("div.content > p").text();
const result: DoubanSearchResultSubject = {
id: ececResult ? ececResult[0] : '',
title: title ? title : '-',
score: score ? Number(score) : null,
cast: cast,
type: type ? type : '-',
desc: desc ? desc : '-',
url: urlResult ? decodeURIComponent(urlResult[0]) : 'https://www.douban.com',
image: "",
imageUrl: "",
publisher: "",
datePublished: undefined,
genre: []
};
return result;
})
}
static parseSearchJson(result: string, type:SupportType, start:number): SearchPage {
log.debug("解析给多页面结果");
const data:{total:number, limit:number, more:boolean, items:string[]} = JSON.parse(result);
const list:string[] = data.items;
const resultList:DoubanSearchResultSubject[] = list
.map(e => load(e))
.map(e=>this.parseSearch(e))
.map(e => e? e[0]:null);
return new SearchPage(data.total, start / data.limit, data.limit, type, resultList);
};
}