mirror of
https://github.com/Wanxp/obsidian-douban.git
synced 2026-04-05 01:28:46 +08:00
fix: 当 JSON-LD 解析失败时添加回退机制
豆瓣现在对未登录请求返回反爬虫验证页面,导致 JSON-LD 解析返回 undefined。 添加从 OG meta 标签提取基本信息的回退机制,防止代码崩溃。 同时给所有数组字段添加默认值 || []。
This commit is contained in:
parent
a0eccf7370
commit
297ccd33cf
@ -35,24 +35,24 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
||||
"director",
|
||||
DataValueType.array,
|
||||
extract.director,
|
||||
extract.director.map(SchemaOrg.getPersonName).filter(c => c)
|
||||
(extract.director || []).map(SchemaOrg.getPersonName).filter(c => c)
|
||||
));
|
||||
|
||||
variableMap.set("actor", new DataField(
|
||||
"actor",
|
||||
DataValueType.array,
|
||||
extract.actor,
|
||||
extract.actor.map(SchemaOrg.getPersonName).filter(c => c)
|
||||
(extract.actor || []).map(SchemaOrg.getPersonName).filter(c => c)
|
||||
));
|
||||
|
||||
variableMap.set("author", new DataField(
|
||||
"author",
|
||||
DataValueType.array,
|
||||
extract.author,
|
||||
extract.author.map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
||||
(extract.author || []).map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
||||
));
|
||||
variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases,
|
||||
extract.aliases.map(a=>a
|
||||
(extract.aliases || []).map(a=>a
|
||||
.trim()
|
||||
// .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_')
|
||||
// //replase multiple _ to single _
|
||||
@ -98,7 +98,7 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
||||
}
|
||||
|
||||
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanMovieSubject {
|
||||
const movie:DoubanMovieSubject = html('script')
|
||||
let movie: DoubanMovieSubject | undefined = html('script')
|
||||
.get()
|
||||
.filter(scd => "application/ld+json" == html(scd).attr("type"))
|
||||
.map(i => {
|
||||
@ -119,14 +119,14 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
||||
originalTitle: originalTitle,
|
||||
desc: obj.description,
|
||||
url: "https://movie.douban.com" + obj.url,
|
||||
director: obj.director,
|
||||
author: obj.author,
|
||||
actor: obj.actor,
|
||||
director: obj.director || [],
|
||||
author: obj.author || [],
|
||||
actor: obj.actor || [],
|
||||
aggregateRating: obj.aggregateRating,
|
||||
datePublished: obj.datePublished ? new Date(obj.datePublished) : undefined,
|
||||
image: obj.image,
|
||||
imageUrl: obj.image,
|
||||
genre: obj.genre,
|
||||
genre: obj.genre || [],
|
||||
publisher: '',
|
||||
aliases: [""],
|
||||
language: [""],
|
||||
@ -136,6 +136,48 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler<Do
|
||||
}
|
||||
return result;
|
||||
})[0];
|
||||
|
||||
// Fallback: if JSON-LD parsing failed (e.g., anti-bot page), extract from meta tags
|
||||
if (!movie) {
|
||||
const title = html(html("head > meta[property='og:title']").get(0)).attr("content") || '';
|
||||
const image = html(html("head > meta[property='og:image']").get(0)).attr("content") || '';
|
||||
const urlMeta = html(html("head > meta[property='og:url']").get(0)).attr("content") || '';
|
||||
const desc = html(html("head > meta[property='og:description']").get(0)).attr("content") || '';
|
||||
|
||||
// Extract ID from URL
|
||||
const idPattern = /(\d){5,10}/g;
|
||||
const idMatch = idPattern.exec(urlMeta);
|
||||
const id = idMatch ? idMatch[0] : '';
|
||||
|
||||
// Extract score from HTML
|
||||
const scoreText = html("#interest_sectl strong[property='v:average']").text();
|
||||
const score = scoreText ? parseFloat(scoreText) : undefined;
|
||||
|
||||
movie = {
|
||||
id,
|
||||
title,
|
||||
type: this.getSupportType(),
|
||||
score,
|
||||
originalTitle: title,
|
||||
desc,
|
||||
url: urlMeta || (id ? `https://movie.douban.com/subject/${id}/` : ''),
|
||||
director: [],
|
||||
author: [],
|
||||
actor: [],
|
||||
aggregateRating: undefined,
|
||||
datePublished: undefined,
|
||||
image,
|
||||
imageUrl: image,
|
||||
genre: [],
|
||||
publisher: '',
|
||||
aliases: [],
|
||||
language: [],
|
||||
country: [],
|
||||
time: null,
|
||||
IMDb: null,
|
||||
};
|
||||
}
|
||||
|
||||
this.handlePersonNameByMeta(html, movie, context, 'video:actor', 'actor');
|
||||
this.handlePersonNameByMeta(html, movie, context, 'video:director', 'director');
|
||||
|
||||
|
||||
@ -25,22 +25,22 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
||||
}
|
||||
|
||||
parseVariable(beforeContent: string, variableMap:Map<string, DataField>, extract: DoubanTeleplaySubject, context: HandleContext): void {
|
||||
variableMap.set("director", new DataField("director", DataValueType.array, extract.director,extract.director.map(SchemaOrg.getPersonName).filter(c => c)));
|
||||
variableMap.set("director", new DataField("director", DataValueType.array, extract.director,(extract.director || []).map(SchemaOrg.getPersonName).filter(c => c)));
|
||||
variableMap.set("actor", new DataField(
|
||||
"actor",
|
||||
DataValueType.array,
|
||||
extract.actor,
|
||||
extract.actor.map(SchemaOrg.getPersonName).filter(c => c)
|
||||
(extract.actor || []).map(SchemaOrg.getPersonName).filter(c => c)
|
||||
));
|
||||
|
||||
variableMap.set("author", new DataField(
|
||||
"author",
|
||||
DataValueType.array,
|
||||
extract.author,
|
||||
extract.author.map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
||||
(extract.author || []).map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c)
|
||||
));
|
||||
variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases,
|
||||
extract.aliases.map(a=>a
|
||||
(extract.aliases || []).map(a=>a
|
||||
.trim()
|
||||
// .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_')
|
||||
// //replase multiple _ to single _
|
||||
@ -84,7 +84,7 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
||||
}
|
||||
|
||||
parseSubjectFromHtml(html: CheerioAPI, context: HandleContext): DoubanTeleplaySubject {
|
||||
const teleplay:DoubanTeleplaySubject = html('script')
|
||||
let teleplay: DoubanTeleplaySubject | undefined = html('script')
|
||||
.get()
|
||||
.filter(scd => "application/ld+json" == html(scd).attr("type"))
|
||||
.map(i => {
|
||||
@ -104,14 +104,14 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
||||
originalTitle: originalTitle,
|
||||
desc: obj.description,
|
||||
url: "https://movie.douban.com" + obj.url,
|
||||
director: obj.director,
|
||||
author: obj.author,
|
||||
actor: obj.actor,
|
||||
director: obj.director || [],
|
||||
author: obj.author || [],
|
||||
actor: obj.actor || [],
|
||||
aggregateRating: obj.aggregateRating,
|
||||
datePublished: obj.datePublished ? new Date(obj.datePublished) : undefined,
|
||||
image: obj.image,
|
||||
imageUrl: obj.image,
|
||||
genre: obj.genre,
|
||||
genre: obj.genre || [],
|
||||
score: obj.aggregateRating ? obj.aggregateRating.ratingValue : undefined,
|
||||
publisher: "",
|
||||
aliases: [""],
|
||||
@ -124,6 +124,46 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler<DoubanT
|
||||
return result;
|
||||
})[0];
|
||||
|
||||
// Fallback: if JSON-LD parsing failed, extract from meta tags
|
||||
if (!teleplay) {
|
||||
const title = html(html("head > meta[property='og:title']").get(0)).attr("content") || '';
|
||||
const image = html(html("head > meta[property='og:image']").get(0)).attr("content") || '';
|
||||
const urlMeta = html(html("head > meta[property='og:url']").get(0)).attr("content") || '';
|
||||
const desc = html(html("head > meta[property='og:description']").get(0)).attr("content") || '';
|
||||
|
||||
const idPattern = /(\d){5,10}/g;
|
||||
const idMatch = idPattern.exec(urlMeta);
|
||||
const id = idMatch ? idMatch[0] : '';
|
||||
|
||||
const scoreText = html("#interest_sectl strong[property='v:average']").text();
|
||||
const score = scoreText ? parseFloat(scoreText) : undefined;
|
||||
|
||||
teleplay = {
|
||||
id,
|
||||
title,
|
||||
type: this.getSupportType(),
|
||||
score,
|
||||
originalTitle: title,
|
||||
desc,
|
||||
url: urlMeta || (id ? `https://movie.douban.com/subject/${id}/` : ''),
|
||||
director: [],
|
||||
author: [],
|
||||
actor: [],
|
||||
aggregateRating: undefined,
|
||||
datePublished: undefined,
|
||||
image,
|
||||
imageUrl: image,
|
||||
genre: [],
|
||||
publisher: '',
|
||||
aliases: [],
|
||||
language: [],
|
||||
country: [],
|
||||
episode: null,
|
||||
time: null,
|
||||
IMDb: null,
|
||||
};
|
||||
}
|
||||
|
||||
this.handlePersonNameByMeta(html, teleplay, context, 'video:actor', 'actor');
|
||||
this.handlePersonNameByMeta(html, teleplay, context, 'video:director', 'director');
|
||||
const desc:string = html("span[property='v:summary']").text();
|
||||
|
||||
Loading…
Reference in New Issue
Block a user