fix search error

This commit is contained in:
HughWan 2023-12-21 13:27:16 +08:00
parent 1b5b9bb0b3
commit f0c1421f5d
11 changed files with 129 additions and 104 deletions

@ -76,14 +76,14 @@
## 支持的字段 ## 支持的字段
(若有缺少想导入的字段, 欢迎提issues反馈) (若有缺少想导入的字段, 欢迎提issues反馈)
| 字段 | 电影 | 电视剧 | 书籍 | 音乐 | 日记 | 游戏 | 广播 | | 字段 | 电影 | 电视剧 | 书籍 | 音乐 | 日记 | 游戏 | * |
|------------------|-------------------|------------------|-------------------|----------------|----------------|---------------| ---- | |------------------|-------------------|------------------|-------------------|----------------|----------------|---------------|-------|
| id | 豆瓣ID | 豆瓣ID | 豆瓣ID | 豆瓣ID | 豆瓣ID | 豆瓣ID | - | | id | 豆瓣ID | 豆瓣ID | 豆瓣ID | 豆瓣ID | 豆瓣ID | 豆瓣ID | id |
| title | 电影名称 | 电视剧名称 | 书名 | 音乐名 | 日记标题 | 游戏名称 | - | | title | 电影名称 | 电视剧名称 | 书名 | 音乐名 | 日记标题 | 游戏名称 | 剧名 |
| type | 类型 | 类型 | 类型 | 类型 | 类型 | 类型 | - | | type | 类型 | 类型 | 类型 | 类型 | 类型 | 类型 | 类型 |
| score | 评分 | 评分 | 评分 | 评分 | 评分 | 评分 | - | | score | 评分 | 评分 | 评分 | 评分 | 评分 | 评分 | 评分 |
| image | 封面 | 封面 | 封面 | 封面 | 图片 | 封面 | - | | image | 封面 | 封面 | 封面 | 封面 | 图片 | 封面 | 封面 |
| imageData.url | 封面url | 封面url | 封面url | 封面url | 封面url | 封面url | - | | imageData.url | 封面url | 封面url | 封面url | 封面url | 封面url | 封面url | 封面url |
| url | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | - | | url | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | 豆瓣网址 | - |
| desc | 简介 | 简介 | 内容简介 | 简介 | 简介 | 简介 | - | | desc | 简介 | 简介 | 内容简介 | 简介 | 简介 | 简介 | - |
| publisher | - | - | 出版社 | 出版者 | 发布者 | 发行商 | - | | publisher | - | - | 出版社 | 出版者 | 发布者 | 发行商 | - |

@ -42,10 +42,10 @@ export default abstract class DoubanAbstractLoadHandler<T extends DoubanSubject>
} }
async parse(extract: T, context: HandleContext): Promise<HandleResult> { async parse(extract: T, context: HandleContext): Promise<HandleResult> {
let template: string = await this.getTemplate(extract, context); const template: string = await this.getTemplate(extract, context);
await this.saveImage(extract, context); await this.saveImage(extract, context);
let frontMatterStart: number = template.indexOf(BasicConst.YAML_FRONT_MATTER_SYMBOL, 0); const frontMatterStart: number = template.indexOf(BasicConst.YAML_FRONT_MATTER_SYMBOL, 0);
let frontMatterEnd: number = template.indexOf(BasicConst.YAML_FRONT_MATTER_SYMBOL, frontMatterStart + 1); const frontMatterEnd: number = template.indexOf(BasicConst.YAML_FRONT_MATTER_SYMBOL, frontMatterStart + 1);
let frontMatter: string = ''; let frontMatter: string = '';
let frontMatterBefore: string = ''; let frontMatterBefore: string = '';
let frontMatterAfter: string = ''; let frontMatterAfter: string = '';
@ -156,7 +156,7 @@ export default abstract class DoubanAbstractLoadHandler<T extends DoubanSubject>
if (context.syncActive) { if (context.syncActive) {
guessType = this.getGuessType(data); guessType = this.getGuessType(data);
} }
let sub = this.parseSubjectFromHtml(data, context); const sub = this.parseSubjectFromHtml(data, context);
sub.userState = userState; sub.userState = userState;
sub.guessType = guessType; sub.guessType = guessType;
return sub; return sub;
@ -166,14 +166,14 @@ export default abstract class DoubanAbstractLoadHandler<T extends DoubanSubject>
.catch(e => { .catch(e => {
log.error(i18nHelper.getMessage('130101', e.toString()), e); log.error(i18nHelper.getMessage('130101', e.toString()), e);
if (url) { if (url) {
let id = StringUtil.analyzeIdByUrl(url); const id = StringUtil.analyzeIdByUrl(url);
context.syncStatusHolder?context.syncStatusHolder.syncStatus.fail(id, ''):null; context.syncStatusHolder?context.syncStatusHolder.syncStatus.fail(id, ''):null;
}else { }else {
context.syncStatusHolder?context.syncStatusHolder.syncStatus.handled(1):null; context.syncStatusHolder?context.syncStatusHolder.syncStatus.handled(1):null;
} }
return e; return e;
}); });
;
} }
@ -223,7 +223,7 @@ export default abstract class DoubanAbstractLoadHandler<T extends DoubanSubject>
resultName = chineseName; resultName = chineseName;
break; break;
case PersonNameMode.EN_NAME: case PersonNameMode.EN_NAME:
resultName = originalName.trim().replaceAll(chineseName, '').trim(); resultName = originalName.trim().replace(chineseName, '').trim();
if (!resultName) { if (!resultName) {
resultName = originalName; resultName = originalName;
} }
@ -539,6 +539,7 @@ export default abstract class DoubanAbstractLoadHandler<T extends DoubanSubject>
}) })
} }
protected getPropertyValue(html: CheerioAPI, name: PropertyName): string { protected getPropertyValue(html: CheerioAPI, name: PropertyName): string {
return HtmlUtil.getHtmlText(html, this.doubanPlugin.settingsManager.getSelector(this.getSupportType(), name)); return HtmlUtil.getHtmlText(html, this.doubanPlugin.settingsManager.getSelector(this.getSupportType(), name));
} }

@ -55,13 +55,13 @@ export default class DoubanBookLoadHandler extends DoubanAbstractLoadHandler<Dou
} }
analysisUser(html: CheerioAPI, context: HandleContext): {data:CheerioAPI , userState: UserStateSubject} { analysisUser(html: CheerioAPI, context: HandleContext): {data:CheerioAPI , userState: UserStateSubject} {
let rate = html('input#n_rating').val(); const rate = html('input#n_rating').val();
let tagsStr = html('span#rating').next().text().trim(); const tagsStr = html('span#rating').next().text().trim();
let tags = tagsStr ? tagsStr.replace('标签:', '').trim().split(' ') : null; const tags = tagsStr ? tagsStr.replace('标签:', '').trim().split(' ') : null;
let stateWord = html('div#interest_sect_level > div.a_stars > span.mr10').text().trim(); const stateWord = html('div#interest_sect_level > div.a_stars > span.mr10').text().trim();
let collectionDateStr = html('div#interest_sect_level > div.a_stars > span.mr10').next().text().trim(); const collectionDateStr = html('div#interest_sect_level > div.a_stars > span.mr10').next().text().trim();
let userState1 = DoubanAbstractLoadHandler.getUserState(stateWord); const userState1 = DoubanAbstractLoadHandler.getUserState(stateWord);
let comment = this.getComment(html); const comment = this.getComment(html);
const userState: UserStateSubject = { const userState: UserStateSubject = {
@ -76,28 +76,25 @@ export default class DoubanBookLoadHandler extends DoubanAbstractLoadHandler<Dou
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanBookSubject { parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanBookSubject {
let desc = html('#link-report > span.all.hidden > div > div[class= "intro"]').html(); let desc = html(".intro p").text();
if (desc) { if (!desc) {
//替换p标签 为换行符 desc = html(html("head > meta[property= 'og:description']").get(0)).attr("content");
desc = desc.replace(/<p>/g, '').replace(/<\/p>/g, '\n');
//去掉开头的换行符
desc = desc.replace(/^\n/, '');
} }
let image = html(html("head > meta[property= 'og:image']").get(0)).attr("content"); const image = html(html("head > meta[property= 'og:image']").get(0)).attr("content");
let item = html(html("head > script[type='application/ld+json']").get(0)).text(); let item = html(html("head > script[type='application/ld+json']").get(0)).text();
item = super.html_decode(item); item = super.html_decode(item);
let obj = JSON.parse(item.replace(/[\r\n\s+]/g, '')); const obj = JSON.parse(item.replace(/[\r\n\s+]/g, ''));
let title = obj.name; const title = obj.name;
let url = obj.url; const url = obj.url;
let author = obj.author.map((a: any) => a.name); const author = obj.author.map((a: any) => a.name);
let isbn = obj.isbn; const isbn = obj.isbn;
let score = html(html("#interest_sectl > div > div.rating_self.clearfix > strong[property= 'v:average']").get(0)).text(); const score = html(html("#interest_sectl > div > div.rating_self.clearfix > strong[property= 'v:average']").get(0)).text();
let detailDom = html(html("#info").get(0)); const detailDom = html(html("#info").get(0));
let publish = detailDom.find("span.pl"); const publish = detailDom.find("span.pl");
let valueMap = new Map<string, any>(); const valueMap = new Map<string, any>();
publish.map((index, info) => { publish.map((index, info) => {
let key = html(info).text().trim(); let key = html(info).text().trim();

@ -70,7 +70,7 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
private getComment(html: CheerioAPI, context: HandleContext) { private getComment(html: CheerioAPI, context: HandleContext) {
let component = html('div#interest_sect_level > div.a_stars > span.color_gray').next().next().text().trim(); const component = html('div#interest_sect_level > div.a_stars > span.color_gray').next().next().text().trim();
if (component) { if (component) {
return component; return component;
} }
@ -84,12 +84,12 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
.map(i => { .map(i => {
let item = html(i).text(); let item = html(i).text();
item = super.html_decode(item); item = super.html_decode(item);
let obj = JSON.parse(item.replace(/[\r\n\s+]/g, '')); const obj = JSON.parse(item.replace(/[\r\n+]/g, ''));
let idPattern = /(\d){5,10}/g; const idPattern = /(\d){5,10}/g;
let id = idPattern.exec(obj.url); const id = idPattern.exec(obj.url);
let name = obj.name; const name = obj.name;
let title = super.getTitleNameByMode(name, PersonNameMode.CH_NAME, context)??name; const title = super.getTitleNameByMode(name, PersonNameMode.CH_NAME, context)??name;
let originalTitle = super.getTitleNameByMode(name, PersonNameMode.EN_NAME, context) ?? name; const originalTitle = super.getTitleNameByMode(name, PersonNameMode.EN_NAME, context) ?? name;
const result: DoubanMovieSubject = { const result: DoubanMovieSubject = {
id: id ? id[0] : '', id: id ? id[0] : '',
@ -119,17 +119,22 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
this.handlePersonNameByMeta(html, movie, context, 'video:actor', 'actor'); this.handlePersonNameByMeta(html, movie, context, 'video:actor', 'actor');
this.handlePersonNameByMeta(html, movie, context, 'video:director', 'director'); this.handlePersonNameByMeta(html, movie, context, 'video:director', 'director');
let detailDom = html(html("#info").get(0)); const desc:string = html("span[property='v:summary']").text();
let publish = detailDom.find("span.pl"); if (desc) {
movie.desc = desc;
}
let valueMap = new Map<string, any>(); const detailDom = html(html("#info").get(0));
const publish = detailDom.find("span.pl");
const valueMap = new Map<string, any>();
publish.map((index, info) => { publish.map((index, info) => {
let key = html(info).text().trim(); const key = html(info).text().trim();
let value; let value;
if (key.indexOf('又名') >= 0 || key.indexOf('语言') >= 0 || key.indexOf('制片国家') >= 0) { if (key.indexOf('又名') >= 0 || key.indexOf('语言') >= 0 || key.indexOf('制片国家') >= 0) {
// value = html(info.next.next).text().trim(); // value = html(info.next.next).text().trim();
let vas = html(info.next).text().trim(); const vas = html(info.next).text().trim();
value = vas.split("/").map((v) => v.trim()); value = vas.split("/").map((v) => v.trim());
} else if(key.indexOf('片长') >= 0) { } else if(key.indexOf('片长') >= 0) {
value = html(info.next.next).text().trim() value = html(info.next.next).text().trim()

@ -42,13 +42,13 @@ export default class DoubanMusicLoadHandler extends DoubanAbstractLoadHandler<Do
} }
analysisUser(html: CheerioAPI, context: HandleContext): {data:CheerioAPI , userState: UserStateSubject} { analysisUser(html: CheerioAPI, context: HandleContext): {data:CheerioAPI , userState: UserStateSubject} {
let rate = html('input#n_rating').val(); const rate = html('input#n_rating').val();
let tagsStr = html('span#rating').next().next().text().trim(); const tagsStr = html('span#rating').next().next().text().trim();
let tags = tagsStr ? tagsStr.replace('标签:', '').trim().split(' ') : null; const tags = tagsStr ? tagsStr.replace('标签:', '').trim().split(' ') : null;
let stateWord = html('div#interest_sect_level > div.a_stars > span.mr10').text().trim(); const stateWord = html('div#interest_sect_level > div.a_stars > span.mr10').text().trim();
let collectionDateStr = html('div#interest_sect_level > div.a_stars > span.mr10').next().text().trim(); const collectionDateStr = html('div#interest_sect_level > div.a_stars > span.mr10').next().text().trim();
let userState1 = DoubanAbstractLoadHandler.getUserState(stateWord); const userState1 = DoubanAbstractLoadHandler.getUserState(stateWord);
let component = html('span#rating').next().next().next().next().text().trim(); const component = html('span#rating').next().next().next().next().text().trim();
const userState: UserStateSubject = { const userState: UserStateSubject = {
tags: tags, tags: tags,
@ -61,15 +61,22 @@ export default class DoubanMusicLoadHandler extends DoubanAbstractLoadHandler<Do
} }
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanMusicSubject { parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanMusicSubject {
let title = html(html("head > meta[property= 'og:title']").get(0)).attr("content"); const title = html(html("head > meta[property= 'og:title']").get(0)).attr("content");
let desc = html(html("head > meta[property= 'og:description']").get(0)).attr("content"); let desc:string = html("span.all.hidden").text();
let url = html(html("head > meta[property= 'og:url']").get(0)).attr("content"); if (!desc) {
let image = html(html("head > meta[property= 'og:image']").get(0)).attr("content"); desc = html("span[property='v:summary']").text();
let score = html(html("#interest_sectl > div > div.rating_self.clearfix > strong[property= 'v:average']").get(0)).text(); }
let detailDom = html(html("#info").get(0)); if (!desc) {
let publish = detailDom.find("span.pl"); desc = html(html("head > meta[property= 'og:description']").get(0)).attr("content");
}
let valueMap = new Map<string, string>(); const url = html(html("head > meta[property= 'og:url']").get(0)).attr("content");
const image = html(html("head > meta[property= 'og:image']").get(0)).attr("content");
const score = html(html("#interest_sectl > div > div.rating_self.clearfix > strong[property= 'v:average']").get(0)).text();
const detailDom = html(html("#info").get(0));
const publish = detailDom.find("span.pl");
const valueMap = new Map<string, string>();
publish.map((index, info) => { publish.map((index, info) => {
let key = html(info).text().trim(); let key = html(info).text().trim();
@ -106,6 +113,9 @@ export default class DoubanMusicLoadHandler extends DoubanAbstractLoadHandler<Do
medium: valueMap.has('medium') ? valueMap.get('medium') : "", medium: valueMap.has('medium') ? valueMap.get('medium') : "",
barcode: valueMap.has('barcode') ? valueMap.get('barcode') : "" barcode: valueMap.has('barcode') ? valueMap.get('barcode') : ""
}; };
return result; return result;
} }

@ -49,11 +49,10 @@ export class DoubanSearchChooseItemHandler {
} }
public async parseText(extract: DoubanSubject, context: HandleContext): Promise<HandleResult> { public async parseText(extract: DoubanSubject, context: HandleContext): Promise<HandleResult> {
let doubanSubjectHandlers: DoubanSubjectLoadHandler<DoubanSubject>[] = this._doubanSubjectHandlers const doubanSubjectHandlers: DoubanSubjectLoadHandler<DoubanSubject>[] = this._doubanSubjectHandlers
.filter(h => h.support(extract)); .filter(h => h.support(extract));
let result:string='';
if (doubanSubjectHandlers && doubanSubjectHandlers.length > 0) { if (doubanSubjectHandlers && doubanSubjectHandlers.length > 0) {
let result = await doubanSubjectHandlers.map(h => h.parse(extract, context)); const result = await doubanSubjectHandlers.map(h => h.parse(extract, context));
if (result && result.length > 0) { if (result && result.length > 0) {
return result[0]; return result[0];
} else { } else {

@ -78,12 +78,12 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
.map(i => { .map(i => {
let item = html(i).text(); let item = html(i).text();
item = super.html_decode(item); item = super.html_decode(item);
let obj = JSON.parse(item.replace(/[\r\n\s+]/g, '')); const obj = JSON.parse(item.replace(/[\r\n\s+]/g, ''));
let idPattern = /(\d){5,10}/g; const idPattern = /(\d){5,10}/g;
let id = idPattern.exec(obj.url); const id = idPattern.exec(obj.url);
let name = obj.name; const name = obj.name;
let title = super.getTitleNameByMode(name, PersonNameMode.CH_NAME, context)??name; const title = super.getTitleNameByMode(name, PersonNameMode.CH_NAME, context)??name;
let originalTitle = super.getTitleNameByMode(name, PersonNameMode.EN_NAME, context) ?? name; const originalTitle = super.getTitleNameByMode(name, PersonNameMode.EN_NAME, context) ?? name;
const result: DoubanTeleplaySubject = { const result: DoubanTeleplaySubject = {
id: id ? id[0] : '', id: id ? id[0] : '',
@ -114,19 +114,22 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
this.handlePersonNameByMeta(html, teleplay, context, 'video:actor', 'actor'); this.handlePersonNameByMeta(html, teleplay, context, 'video:actor', 'actor');
this.handlePersonNameByMeta(html, teleplay, context, 'video:director', 'director'); this.handlePersonNameByMeta(html, teleplay, context, 'video:director', 'director');
const desc:string = html("span[property='v:summary']").text();
if (desc) {
teleplay.desc = desc;
}
const detailDom = html(html("#info").get(0));
const publish = detailDom.find("span.pl");
let detailDom = html(html("#info").get(0)); const valueMap = new Map<string, any>();
let publish = detailDom.find("span.pl");
let valueMap = new Map<string, any>();
publish.map((index, info) => { publish.map((index, info) => {
let key = html(info).text().trim(); const key = html(info).text().trim();
let value; let value;
if (key.indexOf('又名') >= 0 || key.indexOf('语言') >= 0 || key.indexOf('制片国家') >= 0) { if (key.indexOf('又名') >= 0 || key.indexOf('语言') >= 0 || key.indexOf('制片国家') >= 0) {
// value = html(info.next.next).text().trim(); // value = html(info.next.next).text().trim();
let vas = html(info.next).text().trim(); const vas = html(info.next).text().trim();
value = vas.split("/").map((v) => v.trim()); value = vas.split("/").map((v) => v.trim());
}else { }else {
value = html(info.next).text().trim(); value = html(info.next).text().trim();

@ -90,7 +90,11 @@ export default class SettingsManager {
return doubanPluginSubjectProperty.selectors; return doubanPluginSubjectProperty.selectors;
} }
} }
return ONLINE_SETTING_DEFAULT.properties.find(subjectProperty => subjectProperty.type === itemType && subjectProperty.name === propertyName).selectors; const doubanPluginSubjectProperty = ONLINE_SETTING_DEFAULT.properties.find(subjectProperty => subjectProperty.type === itemType && subjectProperty.name === propertyName);
if(doubanPluginSubjectProperty) {
return doubanPluginSubjectProperty.selectors;
}
return [];
} }
handleArray(arr: string[]): string { handleArray(arr: string[]): string {

@ -8,6 +8,9 @@ export default class HtmlUtil {
* @param selector * @param selector
*/ */
public static getHtmlText(html: CheerioAPI, selector: string | string[]): string { public static getHtmlText(html: CheerioAPI, selector: string | string[]): string {
if (!selector) {
return null;
}
if (typeof selector == 'string') { if (typeof selector == 'string') {
return html(selector).text().trim(); return html(selector).text().trim();
}else { }else {

@ -25,6 +25,8 @@ export default class StringUtil {
return id; return id;
} }
/** /**
* request headers json * request headers json
* @param text * @param text

@ -16,6 +16,7 @@ export default class YamlUtil {
public static handleText(text: string) { public static handleText(text: string) {
return YamlUtil.hasSpecialChar(text) ? YamlUtil.handleSpecialChar(text.replaceAll('"', '\\"')) return YamlUtil.hasSpecialChar(text) ? YamlUtil.handleSpecialChar(text.replaceAll('"', '\\"'))
.replaceAll(/\s+/g,' ')
.replaceAll('\n', '。') .replaceAll('\n', '。')
.replaceAll('。。', '。') : text; .replaceAll('。。', '。') : text;
} }