py_bilimd.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. # from base.htmlParser import jsoup
  7. import json
  8. from requests import session, utils
  9. import os
  10. import time
  11. import base64
  12. # def main3():
  13. # html = """
  14. # <div>
  15. # <p>内容1<span id='exd1'>我不获取的内容1</span><span id='exd2'>我不获取的内容2</span>内容2</p>
  16. # </div>
  17. # """
  18. # jsp = jsoup('https://www.cnblogs.com/lizhibk/p/8623543.html')
  19. # a = jsp.pdfh(html, 'div p:eq(0)--span&&Text')
  20. # print(a)
  21. # return a
  22. class Spider(Spider): # 元类 默认的元类 type
  23. def getName(self):
  24. return "哔哩影视"
  25. def init(self, extend=""):
  26. print("============{0}============".format(extend))
  27. pass
  28. def isVideoFormat(self, url):
  29. pass
  30. def manualVideoCheck(self):
  31. pass
  32. def homeContent(self, filter):
  33. result = {}
  34. cateManual = {
  35. # main3():"1",
  36. "番剧": "1",
  37. "国创": "4",
  38. "电影": "2",
  39. "电视剧": "5",
  40. "纪录片": "3",
  41. "综艺": "7",
  42. "全部": "全部",
  43. "追番": "追番",
  44. "追剧": "追剧",
  45. "时间表": "时间表",
  46. }
  47. classes = []
  48. for k in cateManual:
  49. classes.append({
  50. 'type_name': k,
  51. 'type_id': cateManual[k]
  52. })
  53. result['class'] = classes
  54. if (filter):
  55. result['filters'] = self.config['filter']
  56. return result
  57. cookies = ''
  58. userid = ''
  59. def getCookie(self):
  60. # --------↓↓↓↓↓↓↓------在下方cookies_str后的双引号内填写-------↓↓↓↓↓↓↓--------
  61. cookies_str = ""
  62. if cookies_str:
  63. cookies = dict([co.strip().split('=', 1) for co in cookies_str.split(';')])
  64. bili_jct = cookies['bili_jct']
  65. SESSDATA = cookies['SESSDATA']
  66. DedeUserID = cookies['DedeUserID']
  67. cookies_jar = {"bili_jct": bili_jct,
  68. 'SESSDATA': SESSDATA,
  69. 'DedeUserID': DedeUserID
  70. }
  71. rsp = session()
  72. rsp.cookies = cookies_jar
  73. content = self.fetch("https://api.bilibili.com/x/web-interface/nav", cookies=rsp.cookies)
  74. res = json.loads(content.text)
  75. if res["code"] == 0:
  76. self.cookies = rsp.cookies
  77. self.userid = res["data"].get('mid')
  78. return rsp.cookies
  79. rsp = self.fetch("https://www.bilibili.com/")
  80. self.cookies = rsp.cookies
  81. return rsp.cookies
  82. # 将超过10000的数字换成成以万和亿为单位
  83. def zh(self, num):
  84. if int(num) >= 100000000:
  85. p = round(float(num) / float(100000000), 1)
  86. p = str(p) + '亿'
  87. else:
  88. if int(num) >= 10000:
  89. p = round(float(num) / float(10000), 1)
  90. p = str(p) + '万'
  91. else:
  92. p = str(num)
  93. return p
  94. def homeVideoContent(self):
  95. result = {}
  96. videos = self.get_rank(1)['list'][0:5]
  97. for i in [4, 2, 5, 3, 7]:
  98. videos += self.get_rank2(i)['list'][0:5]
  99. result['list'] = videos
  100. return result
  101. def get_rank(self, tid):
  102. result = {}
  103. url = 'https://api.bilibili.com/pgc/web/rank/list?season_type={0}&day=3'.format(tid)
  104. rsp = self.fetch(url, cookies=self.cookies)
  105. content = rsp.text
  106. jo = json.loads(content)
  107. if jo['code'] == 0:
  108. videos = []
  109. vodList = jo['result']['list']
  110. for vod in vodList:
  111. aid = str(vod['season_id']).strip()
  112. title = vod['title'].strip()
  113. img = vod['cover'].strip()
  114. remark = vod['new_ep']['index_show']
  115. videos.append({
  116. "vod_id": aid,
  117. "vod_name": title,
  118. "vod_pic": img,
  119. "vod_remarks": remark
  120. })
  121. result['list'] = videos
  122. result['page'] = 1
  123. result['pagecount'] = 1
  124. result['limit'] = 90
  125. result['total'] = 999999
  126. return result
  127. def get_rank2(self, tid):
  128. result = {}
  129. url = 'https://api.bilibili.com/pgc/season/rank/web/list?season_type={0}&day=3'.format(tid)
  130. rsp = self.fetch(url, cookies=self.cookies)
  131. content = rsp.text
  132. jo = json.loads(content)
  133. if jo['code'] == 0:
  134. videos = []
  135. vodList = jo['data']['list']
  136. for vod in vodList:
  137. aid = str(vod['season_id']).strip()
  138. title = vod['title'].strip()
  139. img = vod['cover'].strip()
  140. remark = vod['new_ep']['index_show']
  141. videos.append({
  142. "vod_id": aid,
  143. "vod_name": title,
  144. "vod_pic": img,
  145. "vod_remarks": remark
  146. })
  147. result['list'] = videos
  148. result['page'] = 1
  149. result['pagecount'] = 1
  150. result['limit'] = 90
  151. result['total'] = 999999
  152. return result
  153. def get_zhui(self, pg, mode):
  154. result = {}
  155. if len(self.cookies) <= 0:
  156. self.getCookie()
  157. url = 'https://api.bilibili.com/x/space/bangumi/follow/list?type={2}&follow_status=0&pn={1}&ps=10&vmid={0}'.format(self.userid, pg, mode)
  158. rsp = self.fetch(url, cookies=self.cookies)
  159. content = rsp.text
  160. jo = json.loads(content)
  161. videos = []
  162. vodList = jo['data']['list']
  163. for vod in vodList:
  164. aid = str(vod['season_id']).strip()
  165. title = vod['title']
  166. img = vod['cover'].strip()
  167. remark = vod['new_ep']['index_show'].strip()
  168. videos.append({
  169. "vod_id": aid,
  170. "vod_name": title,
  171. "vod_pic": img,
  172. "vod_remarks": remark
  173. })
  174. result['list'] = videos
  175. result['page'] = pg
  176. result['pagecount'] = 9999
  177. result['limit'] = 90
  178. result['total'] = 999999
  179. return result
  180. def get_all(self, tid, pg, order, season_status, extend):
  181. result = {}
  182. if len(self.cookies) <= 0:
  183. self.getCookie()
  184. url = 'https://api.bilibili.com/pgc/season/index/result?order={2}&pagesize=20&type=1&season_type={0}&page={1}&season_status={3}'.format(tid, pg, order, season_status)
  185. rsp = self.fetch(url, cookies=self.cookies)
  186. content = rsp.text
  187. jo = json.loads(content)
  188. videos = []
  189. vodList = jo['data']['list']
  190. for vod in vodList:
  191. aid = str(vod['season_id']).strip()
  192. title = vod['title']
  193. img = vod['cover'].strip()
  194. remark = vod['index_show'].strip()
  195. videos.append({
  196. "vod_id": aid,
  197. "vod_name": title,
  198. "vod_pic": img,
  199. "vod_remarks": remark
  200. })
  201. result['list'] = videos
  202. result['page'] = pg
  203. result['pagecount'] = 9999
  204. result['limit'] = 90
  205. result['total'] = 999999
  206. return result
  207. def get_timeline(self, tid, pg):
  208. result = {}
  209. url = 'https://api.bilibili.com/pgc/web/timeline/v2?season_type={0}&day_before=2&day_after=4'.format(tid)
  210. rsp = self.fetch(url, cookies=self.cookies)
  211. content = rsp.text
  212. jo = json.loads(content)
  213. if jo['code'] == 0:
  214. videos1 = []
  215. vodList = jo['result']['latest']
  216. for vod in vodList:
  217. aid = str(vod['season_id']).strip()
  218. title = vod['title'].strip()
  219. img = vod['cover'].strip()
  220. remark = vod['pub_index'] + ' ' + vod['follows'].replace('系列', '')
  221. videos1.append({
  222. "vod_id": aid,
  223. "vod_name": title,
  224. "vod_pic": img,
  225. "vod_remarks": remark
  226. })
  227. videos2 = []
  228. for i in range(0, 7):
  229. vodList = jo['result']['timeline'][i]['episodes']
  230. for vod in vodList:
  231. if str(vod['published']) == "0":
  232. aid = str(vod['season_id']).strip()
  233. title = str(vod['title']).strip()
  234. img = str(vod['cover']).strip()
  235. date = str(time.strftime("%m-%d %H:%M", time.localtime(vod['pub_ts'])))
  236. remark = date + " " + vod['pub_index']
  237. videos2.append({
  238. "vod_id": aid,
  239. "vod_name": title,
  240. "vod_pic": img,
  241. "vod_remarks": remark
  242. })
  243. result['list'] = videos2 + videos1
  244. result['page'] = 1
  245. result['pagecount'] = 1
  246. result['limit'] = 90
  247. result['total'] = 999999
  248. return result
  249. def categoryContent(self, tid, pg, filter, extend):
  250. result = {}
  251. if len(self.cookies) <= 0:
  252. self.getCookie()
  253. if tid == "1":
  254. return self.get_rank(tid=tid)
  255. elif tid in {"2", "3", "4", "5", "7"}:
  256. return self.get_rank2(tid=tid)
  257. elif tid == "全部":
  258. tid = '1' # 全部界面默认展示最多播放的番剧
  259. order = '2'
  260. season_status = '-1'
  261. if 'tid' in extend:
  262. tid = extend['tid']
  263. if 'order' in extend:
  264. order = extend['order']
  265. if 'season_status' in extend:
  266. season_status = extend['season_status']
  267. return self.get_all(tid, pg, order, season_status, extend)
  268. elif tid == "追番":
  269. return self.get_zhui(pg, 1)
  270. elif tid == "追剧":
  271. return self.get_zhui(pg, 2)
  272. elif tid == "时间表":
  273. tid = 1
  274. if 'tid' in extend:
  275. tid = extend['tid']
  276. return self.get_timeline(tid, pg)
  277. else:
  278. result = self.searchContent(key=tid, quick="false")
  279. return result
  280. def cleanSpace(self, str):
  281. return str.replace('\n', '').replace('\t', '').replace('\r', '').replace(' ', '')
  282. def detailContent(self, array):
  283. aid = array[0]
  284. url = "https://api.bilibili.com/pgc/view/web/season?season_id={0}".format(aid)
  285. rsp = self.fetch(url, headers=self.header)
  286. jRoot = json.loads(rsp.text)
  287. jo = jRoot['result']
  288. id = jo['season_id']
  289. title = jo['title']
  290. pic = jo['cover']
  291. # areas = jo['areas']['name'] 改bilidanmu显示弹幕
  292. typeName = jo['share_sub_title']
  293. date = jo['publish']['pub_time'][0:4]
  294. dec = jo['evaluate']
  295. remark = jo['new_ep']['desc']
  296. stat = jo['stat']
  297. # 演员和导演框展示视频状态,包括以下内容:
  298. status = "弹幕: " + self.zh(stat['danmakus']) + " 点赞: " + self.zh(stat['likes']) + " 投币: " + self.zh(
  299. stat['coins']) + " 追番追剧: " + self.zh(stat['favorites'])
  300. if 'rating' in jo:
  301. score = "评分: " + str(jo['rating']['score']) + ' ' + jo['subtitle']
  302. else:
  303. score = "暂无评分" + ' ' + jo['subtitle']
  304. vod = {
  305. "vod_id": id,
  306. "vod_name": title,
  307. "vod_pic": pic,
  308. "type_name": typeName,
  309. "vod_year": date,
  310. "vod_area": "bilidanmu",
  311. "vod_remarks": remark,
  312. "vod_actor": status,
  313. "vod_director": score,
  314. "vod_content": dec
  315. }
  316. ja = jo['episodes']
  317. playUrl = ''
  318. for tmpJo in ja:
  319. eid = tmpJo['id']
  320. cid = tmpJo['cid']
  321. part = tmpJo['title'].replace("#", "-")
  322. playUrl = playUrl + '{0}${1}_{2}#'.format(part, eid, cid)
  323. vod['vod_play_from'] = 'B站'
  324. vod['vod_play_url'] = playUrl
  325. result = {
  326. 'list': [
  327. vod
  328. ]
  329. }
  330. return result
  331. def searchContent(self, key, quick):
  332. if len(self.cookies) <= 0:
  333. self.getCookie()
  334. url1 = 'https://api.bilibili.com/x/web-interface/search/type?search_type=media_bangumi&keyword={0}'.format(
  335. key) # 番剧搜索
  336. rsp1 = self.fetch(url1, cookies=self.cookies)
  337. content1 = rsp1.text
  338. jo1 = json.loads(content1)
  339. rs1 = jo1['data']
  340. url2 = 'https://api.bilibili.com/x/web-interface/search/type?search_type=media_ft&keyword={0}'.format(
  341. key) # 影视搜索
  342. rsp2 = self.fetch(url2, cookies=self.cookies)
  343. content2 = rsp2.text
  344. jo2 = json.loads(content2)
  345. rs2 = jo2['data']
  346. videos = []
  347. if rs1['numResults'] == 0:
  348. vodList = jo2['data']['result']
  349. elif rs2['numResults'] == 0:
  350. vodList = jo1['data']['result']
  351. else:
  352. vodList = jo1['data']['result'] + jo2['data']['result']
  353. for vod in vodList:
  354. aid = str(vod['season_id']).strip()
  355. title = key + '➢' + vod['title'].strip().replace("<em class=\"keyword\">", "").replace("</em>", "")
  356. img = vod['cover'].strip() # vod['eps'][0]['cover'].strip()原来的错误写法
  357. remark = vod['index_show']
  358. videos.append({
  359. "vod_id": aid,
  360. "vod_name": title,
  361. "vod_pic": img,
  362. "vod_remarks": remark
  363. })
  364. result = {
  365. 'list': videos
  366. }
  367. return result
  368. def playerContent(self, flag, id, vipFlags):
  369. result = {}
  370. ids = id.split("_")
  371. header = {
  372. "Referer": "https://www.bilibili.com",
  373. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
  374. }
  375. url = 'https://api.bilibili.com/pgc/player/web/playurl?qn=116&ep_id={0}&cid={1}'.format(ids[0], ids[1])
  376. if len(self.cookies) <= 0:
  377. self.getCookie()
  378. rsp = self.fetch(url, cookies=self.cookies, headers=header)
  379. jRoot = json.loads(rsp.text)
  380. if jRoot['message'] != 'success':
  381. print("需要大会员权限才能观看")
  382. return {}
  383. jo = jRoot['result']
  384. ja = jo['durl']
  385. maxSize = -1
  386. position = -1
  387. for i in range(len(ja)):
  388. tmpJo = ja[i]
  389. if maxSize < int(tmpJo['size']):
  390. maxSize = int(tmpJo['size'])
  391. position = i
  392. url = ''
  393. if len(ja) > 0:
  394. if position == -1:
  395. position = 0
  396. url = ja[position]['url']
  397. result["parse"] = 0
  398. result["playUrl"] = ''
  399. result["url"] = url
  400. result["header"] = {
  401. "Referer": "https://www.bilibili.com",
  402. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
  403. }
  404. result["contentType"] = 'video/x-flv'
  405. return result
  406. config = {
  407. "player": {},
  408. "filter": {
  409. "全部": [
  410. {
  411. "key": "tid",
  412. "name": "分类",
  413. "value": [{
  414. "n": "番剧",
  415. "v": "1"
  416. },
  417. {
  418. "n": "国创",
  419. "v": "4"
  420. },
  421. {
  422. "n": "电影",
  423. "v": "2"
  424. },
  425. {
  426. "n": "电视剧",
  427. "v": "5"
  428. },
  429. {
  430. "n": "记录片",
  431. "v": "3"
  432. },
  433. {
  434. "n": "综艺",
  435. "v": "7"
  436. }
  437. ]
  438. },
  439. {
  440. "key": "order",
  441. "name": "排序",
  442. "value": [
  443. {
  444. "n": "播放数量",
  445. "v": "2"
  446. },
  447. {
  448. "n": "更新时间",
  449. "v": "0"
  450. },
  451. {
  452. "n": "最高评分",
  453. "v": "4"
  454. },
  455. {
  456. "n": "弹幕数量",
  457. "v": "1"
  458. },
  459. {
  460. "n": "追看人数",
  461. "v": "3"
  462. },
  463. {
  464. "n": "开播时间",
  465. "v": "5"
  466. },
  467. {
  468. "n": "上映时间",
  469. "v": "6"
  470. },
  471. ]
  472. },
  473. {
  474. "key": "season_status",
  475. "name": "付费",
  476. "value": [
  477. {
  478. "n": "全部",
  479. "v": "-1"
  480. },
  481. {
  482. "n": "免费",
  483. "v": "1"
  484. },
  485. {
  486. "n": "付费",
  487. "v": "2%2C6"
  488. },
  489. {
  490. "n": "大会员",
  491. "v": "4%2C6"
  492. },
  493. ]
  494. },
  495. ],
  496. "时间表": [{
  497. "key": "tid",
  498. "name": "分类",
  499. "value": [
  500. {
  501. "n": "番剧",
  502. "v": "1"
  503. },
  504. {
  505. "n": "国创",
  506. "v": "4"
  507. },
  508. ]
  509. },
  510. ],
  511. }
  512. }
  513. header = {
  514. "Referer": "https://www.bilibili.com",
  515. "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
  516. }
  517. def localProxy(self, param):
  518. return [200, "video/MP2T", action, ""]