From 297ccd33cfe4c71aca96e62bec23b22527b6f8a2 Mon Sep 17 00:00:00 2001 From: YuBai Date: Mon, 2 Feb 2026 11:25:56 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=BD=93=20JSON-LD=20=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E5=A4=B1=E8=B4=A5=E6=97=B6=E6=B7=BB=E5=8A=A0=E5=9B=9E=E9=80=80?= =?UTF-8?q?=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 豆瓣现在对未登录请求返回反爬虫验证页面,导致 JSON-LD 解析返回 undefined。 添加从 OG meta 标签提取基本信息的回退机制,防止代码崩溃。 同时给所有数组字段添加默认值 || []。 --- .../data/handler/DoubanMovieLoadHandler.ts | 82 ++++++++++++++----- .../data/handler/DoubanTeleplayLoadHandler.ts | 58 +++++++++++-- 2 files changed, 111 insertions(+), 29 deletions(-) diff --git a/src/org/wanxp/douban/data/handler/DoubanMovieLoadHandler.ts b/src/org/wanxp/douban/data/handler/DoubanMovieLoadHandler.ts index 1ff1ea0..908ca42 100644 --- a/src/org/wanxp/douban/data/handler/DoubanMovieLoadHandler.ts +++ b/src/org/wanxp/douban/data/handler/DoubanMovieLoadHandler.ts @@ -35,24 +35,24 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler c) + (extract.director || []).map(SchemaOrg.getPersonName).filter(c => c) )); variableMap.set("actor", new DataField( "actor", DataValueType.array, extract.actor, - extract.actor.map(SchemaOrg.getPersonName).filter(c => c) + (extract.actor || []).map(SchemaOrg.getPersonName).filter(c => c) )); variableMap.set("author", new DataField( "author", DataValueType.array, extract.author, - extract.author.map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c) + (extract.author || []).map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c) )); variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases, - extract.aliases.map(a=>a + (extract.aliases || []).map(a=>a .trim() // .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_') // //replase multiple _ to single _ @@ -98,7 +98,7 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler "application/ld+json" == html(scd).attr("type")) .map(i => { @@ -108,8 +108,8 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler meta[property='og:title']").get(0)).attr("content") || ''; + const image = html(html("head > meta[property='og:image']").get(0)).attr("content") || ''; + const urlMeta = html(html("head > meta[property='og:url']").get(0)).attr("content") || ''; + const desc = html(html("head > meta[property='og:description']").get(0)).attr("content") || ''; + + // Extract ID from URL + const idPattern = /(\d){5,10}/g; + const idMatch = idPattern.exec(urlMeta); + const id = idMatch ? idMatch[0] : ''; + + // Extract score from HTML + const scoreText = html("#interest_sectl strong[property='v:average']").text(); + const score = scoreText ? parseFloat(scoreText) : undefined; + + movie = { + id, + title, + type: this.getSupportType(), + score, + originalTitle: title, + desc, + url: urlMeta || (id ? `https://movie.douban.com/subject/${id}/` : ''), + director: [], + author: [], + actor: [], + aggregateRating: undefined, + datePublished: undefined, + image, + imageUrl: image, + genre: [], + publisher: '', + aliases: [], + language: [], + country: [], + time: null, + IMDb: null, + }; + } + + this.handlePersonNameByMeta(html, movie, context, 'video:actor', 'actor'); + this.handlePersonNameByMeta(html, movie, context, 'video:director', 'director'); + + const desc: string = html("span[property='v:summary']").text(); if (desc) { movie.desc = desc; } @@ -156,7 +198,7 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler v.trim()); - } else if(key.indexOf('片长') >= 0) { + } else if (key.indexOf('片长') >= 0) { value = html(info.next.next).text().trim() } else { value = html(info.next).text().trim(); @@ -164,11 +206,11 @@ export default class DoubanMovieLoadHandler extends DoubanAbstractLoadHandler, extract: DoubanTeleplaySubject, context: HandleContext): void { - variableMap.set("director", new DataField("director", DataValueType.array, extract.director,extract.director.map(SchemaOrg.getPersonName).filter(c => c))); + variableMap.set("director", new DataField("director", DataValueType.array, extract.director,(extract.director || []).map(SchemaOrg.getPersonName).filter(c => c))); variableMap.set("actor", new DataField( "actor", DataValueType.array, extract.actor, - extract.actor.map(SchemaOrg.getPersonName).filter(c => c) + (extract.actor || []).map(SchemaOrg.getPersonName).filter(c => c) )); variableMap.set("author", new DataField( "author", DataValueType.array, extract.author, - extract.author.map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c) + (extract.author || []).map(SchemaOrg.getPersonName).map(name => super.getPersonName(name, context)).filter(c => c) )); variableMap.set("aliases", new DataField("aliases", DataValueType.array, extract.aliases, - extract.aliases.map(a=>a + (extract.aliases || []).map(a=>a .trim() // .replace(TITLE_ALIASES_SPECIAL_CHAR_REG_G, '_') // //replase multiple _ to single _ @@ -84,7 +84,7 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler "application/ld+json" == html(scd).attr("type")) .map(i => { @@ -104,14 +104,14 @@ export class DoubanTeleplayLoadHandler extends DoubanAbstractLoadHandler meta[property='og:title']").get(0)).attr("content") || ''; + const image = html(html("head > meta[property='og:image']").get(0)).attr("content") || ''; + const urlMeta = html(html("head > meta[property='og:url']").get(0)).attr("content") || ''; + const desc = html(html("head > meta[property='og:description']").get(0)).attr("content") || ''; + + const idPattern = /(\d){5,10}/g; + const idMatch = idPattern.exec(urlMeta); + const id = idMatch ? idMatch[0] : ''; + + const scoreText = html("#interest_sectl strong[property='v:average']").text(); + const score = scoreText ? parseFloat(scoreText) : undefined; + + teleplay = { + id, + title, + type: this.getSupportType(), + score, + originalTitle: title, + desc, + url: urlMeta || (id ? `https://movie.douban.com/subject/${id}/` : ''), + director: [], + author: [], + actor: [], + aggregateRating: undefined, + datePublished: undefined, + image, + imageUrl: image, + genre: [], + publisher: '', + aliases: [], + language: [], + country: [], + episode: null, + time: null, + IMDb: null, + }; + } + this.handlePersonNameByMeta(html, teleplay, context, 'video:actor', 'actor'); this.handlePersonNameByMeta(html, teleplay, context, 'video:director', 'director'); const desc:string = html("span[property='v:summary']").text();