mirror of
https://github.com/Wanxp/obsidian-douban.git
synced 2026-04-04 16:48:44 +08:00
fix: 当 JSON-LD 解析失败时添加回退机制
豆瓣现在对未登录请求返回反爬虫验证页面,导致 JSON-LD 解析返回 undefined。 添加从 OG meta 标签提取基本信息的回退机制,防止代码崩溃。 同时给所有数组字段添加默认值 || []。
This commit is contained in:
parent
a0eccf7370
commit
297ccd33cf
@ -35,24 +35,24 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
|||||||
"director",
|
"director",
|
||||||
DataValueType.array,
|
DataValueType.array,
|
||||||
extract.director,
|
extract.director,
|
||||||
extract.director.map(SchemaOrg.getPersonName).filter(c => c)
|
(extract.director || []).map(SchemaOrg.getPersonName).filter(c => c)
|
||||||
));
|
));
|
||||||
|
|
||||||
variableMap.set("actor", new DataField(
|
variableMap.set("actor", new DataField(
|
||||||
"actor",
|
"actor",
|
||||||
DataValueType.array,
|
DataValueType.array,
|
||||||
extract.actor,
|
extract.actor,
|
||||||
extract.actor.map(SchemaOrg.getPersonName).filter(c => c)
|
(extract.actor || []).map(SchemaOrg.getPersonName).filter(c => c)
|
||||||
));
|
));
|
||||||
|
|
||||||
variableMap.set("author", new DataField(
|
variableMap.set("author", new DataField(
|
||||||
"author",
|
"author",
|
||||||
DataValueType.array,
|
DataValueType.array,
|
||||||
extract.author,
|
extract.author,
|
||||||
extract.author.map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
(extract.author || []).map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
||||||
));
|
));
|
||||||
variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases,
|
variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases,
|
||||||
extract.aliases.map(a=>a
|
(extract.aliases || []).map(a=>a
|
||||||
.trim()
|
.trim()
|
||||||
// .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_')
|
// .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_')
|
||||||
// //replase multiple _ to single _
|
// //replase multiple _ to single _
|
||||||
@ -98,7 +98,7 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
|||||||
}
|
}
|
||||||
|
|
||||||
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanMovieSubject {
|
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanMovieSubject {
|
||||||
const movie:DoubanMovieSubject = html('script')
|
let movie: DoubanMovieSubject | undefined = html('script')
|
||||||
.get()
|
.get()
|
||||||
.filter(scd => "application/ld+json" == html(scd).attr("type"))
|
.filter(scd => "application/ld+json" == html(scd).attr("type"))
|
||||||
.map(i => {
|
.map(i => {
|
||||||
@ -119,14 +119,14 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
|||||||
originalTitle: originalTitle,
|
originalTitle: originalTitle,
|
||||||
desc: obj.description,
|
desc: obj.description,
|
||||||
url: "https://movie.douban.com" + obj.url,
|
url: "https://movie.douban.com" + obj.url,
|
||||||
director: obj.director,
|
director: obj.director || [],
|
||||||
author: obj.author,
|
author: obj.author || [],
|
||||||
actor: obj.actor,
|
actor: obj.actor || [],
|
||||||
aggregateRating: obj.aggregateRating,
|
aggregateRating: obj.aggregateRating,
|
||||||
datePublished: obj.datePublished ? new Date(obj.datePublished) : undefined,
|
datePublished: obj.datePublished ? new Date(obj.datePublished) : undefined,
|
||||||
image: obj.image,
|
image: obj.image,
|
||||||
imageUrl: obj.image,
|
imageUrl: obj.image,
|
||||||
genre: obj.genre,
|
genre: obj.genre || [],
|
||||||
publisher: '',
|
publisher: '',
|
||||||
aliases: [""],
|
aliases: [""],
|
||||||
language: [""],
|
language: [""],
|
||||||
@ -136,6 +136,48 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
})[0];
|
})[0];
|
||||||
|
|
||||||
|
// Fallback: if JSON-LD parsing failed (e.g., anti-bot page), extract from meta tags
|
||||||
|
if (!movie) {
|
||||||
|
const title = html(html("head > meta[property='og:title']").get(0)).attr("content") || '';
|
||||||
|
const image = html(html("head > meta[property='og:image']").get(0)).attr("content") || '';
|
||||||
|
const urlMeta = html(html("head > meta[property='og:url']").get(0)).attr("content") || '';
|
||||||
|
const desc = html(html("head > meta[property='og:description']").get(0)).attr("content") || '';
|
||||||
|
|
||||||
|
// Extract ID from URL
|
||||||
|
const idPattern = /(\d){5,10}/g;
|
||||||
|
const idMatch = idPattern.exec(urlMeta);
|
||||||
|
const id = idMatch ? idMatch[0] : '';
|
||||||
|
|
||||||
|
// Extract score from HTML
|
||||||
|
const scoreText = html("#interest_sectl strong[property='v:average']").text();
|
||||||
|
const score = scoreText ? parseFloat(scoreText) : undefined;
|
||||||
|
|
||||||
|
movie = {
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
type: this.getSupportType(),
|
||||||
|
score,
|
||||||
|
originalTitle: title,
|
||||||
|
desc,
|
||||||
|
url: urlMeta || (id ? `https://movie.douban.com/subject/${id}/` : ''),
|
||||||
|
director: [],
|
||||||
|
author: [],
|
||||||
|
actor: [],
|
||||||
|
aggregateRating: undefined,
|
||||||
|
datePublished: undefined,
|
||||||
|
image,
|
||||||
|
imageUrl: image,
|
||||||
|
genre: [],
|
||||||
|
publisher: '',
|
||||||
|
aliases: [],
|
||||||
|
language: [],
|
||||||
|
country: [],
|
||||||
|
time: null,
|
||||||
|
IMDb: null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
this.handlePersonNameByMeta(html, movie, context, 'video:actor', 'actor');
|
this.handlePersonNameByMeta(html, movie, context, 'video:actor', 'actor');
|
||||||
this.handlePersonNameByMeta(html, movie, context, 'video:director', 'director');
|
this.handlePersonNameByMeta(html, movie, context, 'video:director', 'director');
|
||||||
|
|
||||||
|
|||||||
@ -25,22 +25,22 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
|||||||
}
|
}
|
||||||
|
|
||||||
parseVariable(beforeContent: string, variableMap:Map<string, DataField>, extract: DoubanTeleplaySubject, context: HandleContext): void {
|
parseVariable(beforeContent: string, variableMap:Map<string, DataField>, extract: DoubanTeleplaySubject, context: HandleContext): void {
|
||||||
variableMap.set("director", new DataField("director", DataValueType.array, extract.director,extract.director.map(SchemaOrg.getPersonName).filter(c => c)));
|
variableMap.set("director", new DataField("director", DataValueType.array, extract.director,(extract.director || []).map(SchemaOrg.getPersonName).filter(c => c)));
|
||||||
variableMap.set("actor", new DataField(
|
variableMap.set("actor", new DataField(
|
||||||
"actor",
|
"actor",
|
||||||
DataValueType.array,
|
DataValueType.array,
|
||||||
extract.actor,
|
extract.actor,
|
||||||
extract.actor.map(SchemaOrg.getPersonName).filter(c => c)
|
(extract.actor || []).map(SchemaOrg.getPersonName).filter(c => c)
|
||||||
));
|
));
|
||||||
|
|
||||||
variableMap.set("author", new DataField(
|
variableMap.set("author", new DataField(
|
||||||
"author",
|
"author",
|
||||||
DataValueType.array,
|
DataValueType.array,
|
||||||
extract.author,
|
extract.author,
|
||||||
extract.author.map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
(extract.author || []).map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
||||||
));
|
));
|
||||||
variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases,
|
variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases,
|
||||||
extract.aliases.map(a=>a
|
(extract.aliases || []).map(a=>a
|
||||||
.trim()
|
.trim()
|
||||||
// .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_')
|
// .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_')
|
||||||
// //replase multiple _ to single _
|
// //replase multiple _ to single _
|
||||||
@ -84,7 +84,7 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
|||||||
}
|
}
|
||||||
|
|
||||||
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanTeleplaySubject {
|
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanTeleplaySubject {
|
||||||
const teleplay:DoubanTeleplaySubject = html('script')
|
let teleplay: DoubanTeleplaySubject | undefined = html('script')
|
||||||
.get()
|
.get()
|
||||||
.filter(scd => "application/ld+json" == html(scd).attr("type"))
|
.filter(scd => "application/ld+json" == html(scd).attr("type"))
|
||||||
.map(i => {
|
.map(i => {
|
||||||
@ -104,14 +104,14 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
|||||||
originalTitle: originalTitle,
|
originalTitle: originalTitle,
|
||||||
desc: obj.description,
|
desc: obj.description,
|
||||||
url: "https://movie.douban.com" + obj.url,
|
url: "https://movie.douban.com" + obj.url,
|
||||||
director: obj.director,
|
director: obj.director || [],
|
||||||
author: obj.author,
|
author: obj.author || [],
|
||||||
actor: obj.actor,
|
actor: obj.actor || [],
|
||||||
aggregateRating: obj.aggregateRating,
|
aggregateRating: obj.aggregateRating,
|
||||||
datePublished: obj.datePublished ? new Date(obj.datePublished) : undefined,
|
datePublished: obj.datePublished ? new Date(obj.datePublished) : undefined,
|
||||||
image: obj.image,
|
image: obj.image,
|
||||||
imageUrl: obj.image,
|
imageUrl: obj.image,
|
||||||
genre: obj.genre,
|
genre: obj.genre || [],
|
||||||
score: obj.aggregateRating ? obj.aggregateRating.ratingValue : undefined,
|
score: obj.aggregateRating ? obj.aggregateRating.ratingValue : undefined,
|
||||||
publisher: "",
|
publisher: "",
|
||||||
aliases: [""],
|
aliases: [""],
|
||||||
@ -124,6 +124,46 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
|||||||
return result;
|
return result;
|
||||||
})[0];
|
})[0];
|
||||||
|
|
||||||
|
// Fallback: if JSON-LD parsing failed, extract from meta tags
|
||||||
|
if (!teleplay) {
|
||||||
|
const title = html(html("head > meta[property='og:title']").get(0)).attr("content") || '';
|
||||||
|
const image = html(html("head > meta[property='og:image']").get(0)).attr("content") || '';
|
||||||
|
const urlMeta = html(html("head > meta[property='og:url']").get(0)).attr("content") || '';
|
||||||
|
const desc = html(html("head > meta[property='og:description']").get(0)).attr("content") || '';
|
||||||
|
|
||||||
|
const idPattern = /(\d){5,10}/g;
|
||||||
|
const idMatch = idPattern.exec(urlMeta);
|
||||||
|
const id = idMatch ? idMatch[0] : '';
|
||||||
|
|
||||||
|
const scoreText = html("#interest_sectl strong[property='v:average']").text();
|
||||||
|
const score = scoreText ? parseFloat(scoreText) : undefined;
|
||||||
|
|
||||||
|
teleplay = {
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
type: this.getSupportType(),
|
||||||
|
score,
|
||||||
|
originalTitle: title,
|
||||||
|
desc,
|
||||||
|
url: urlMeta || (id ? `https://movie.douban.com/subject/${id}/` : ''),
|
||||||
|
director: [],
|
||||||
|
author: [],
|
||||||
|
actor: [],
|
||||||
|
aggregateRating: undefined,
|
||||||
|
datePublished: undefined,
|
||||||
|
image,
|
||||||
|
imageUrl: image,
|
||||||
|
genre: [],
|
||||||
|
publisher: '',
|
||||||
|
aliases: [],
|
||||||
|
language: [],
|
||||||
|
country: [],
|
||||||
|
episode: null,
|
||||||
|
time: null,
|
||||||
|
IMDb: null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
this.handlePersonNameByMeta(html, teleplay, context, 'video:actor', 'actor');
|
this.handlePersonNameByMeta(html, teleplay, context, 'video:actor', 'actor');
|
||||||
this.handlePersonNameByMeta(html, teleplay, context, 'video:director', 'director');
|
this.handlePersonNameByMeta(html, teleplay, context, 'video:director', 'director');
|
||||||
const desc:string = html("span[property='v:summary']").text();
|
const desc:string = html("span[property='v:summary']").text();
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user