py_czspp.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import base64
  7. import hashlib
  8. import requests
  9. from Crypto.Cipher import AES
  10. import urllib
  11. class Spider(Spider): # 元类 默认的元类 type
  12. def getName(self):
  13. return "厂长资源"
  14. def init(self, extend=""):
  15. print("============{0}============".format(extend))
  16. pass
  17. def homeContent(self, filter):
  18. result = {}
  19. cateManual = {
  20. "豆瓣电影Top250": "dbtop250",
  21. "最新电影": "zuixindianying",
  22. "电视剧": "dsj",
  23. "国产剧": "gcj",
  24. "美剧": "meijutt",
  25. "韩剧": "hanjutv",
  26. "番剧": "fanju",
  27. "动漫": "dm"
  28. }
  29. classes = []
  30. for k in cateManual:
  31. classes.append({
  32. 'type_name': k,
  33. 'type_id': cateManual[k]
  34. })
  35. result['class'] = classes
  36. return result
  37. def homeVideoContent(self):
  38. url = "https://czzy01.com"
  39. header = {
  40. "Connection": "keep-alive",
  41. "Referer": url,
  42. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
  43. }
  44. session = self.getCookie(url,header)
  45. rsp = session.get(url, headers=header)
  46. root = self.html(self.cleanText(rsp.text))
  47. aList = root.xpath("//div[@class='mi_btcon']//ul/li")
  48. videos = []
  49. for a in aList:
  50. name = a.xpath('./a/img/@alt')[0]
  51. pic = a.xpath('./a/img/@data-original')[0]
  52. mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
  53. sid = a.xpath("./a/@href")[0]
  54. sid = self.regStr(sid, "/movie/(\\S+).html")
  55. videos.append({
  56. "vod_id": sid,
  57. "vod_name": name,
  58. "vod_pic": pic,
  59. "vod_remarks": mark
  60. })
  61. result = {}
  62. return result
  63. def getCookie(self,url):
  64. header = {
  65. "Referer": 'https://czzy01.com/',
  66. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
  67. }
  68. session = requests.session()
  69. rsp = session.get(url)
  70. if '人机验证' in rsp.text:
  71. append = self.regStr(rsp.text, 'src=\"(/.*?)\"')
  72. nurl = 'https://czzy01.com' + append
  73. nrsp = session.get(nurl, headers=header)
  74. key = self.regStr(nrsp.text, 'var key=\"(.*?)\"')
  75. avalue = self.regStr(nrsp.text, 'value=\"(.*?)\"')
  76. c = ''
  77. for i in range(0, len(avalue)):
  78. a = avalue[i]
  79. b = ord(a)
  80. c = c + str(b)
  81. value = hashlib.md5(c.encode()).hexdigest()
  82. session.get('https://czzy01.com/a20be899_96a6_40b2_88ba_32f1f75f1552_yanzheng_ip.php?type=96c4e20a0e951f471d32dae103e83881&key={0}&value={1}'.format(key, value), headers=header)
  83. return session.get(url, headers=header)
  84. elif '检测中' in rsp.text:
  85. append = self.regStr(rsp.text, 'href =\"(/.*?)\"')
  86. session.get('https://czzy01.com{0}'.format(append), headers=header)
  87. return session.get(url, headers=header)
  88. else:
  89. return rsp
  90. def categoryContent(self, tid, pg, filter, extend):
  91. result = {}
  92. url = 'https://czzy01.com/{0}/page/{1}'.format(tid,pg)
  93. rsp = self.getCookie(url)
  94. root = self.html(self.cleanText(rsp.text))
  95. aList = root.xpath("//div[contains(@class,'bt_img mi_ne_kd mrb')]/ul/li")
  96. videos = []
  97. for a in aList:
  98. name = a.xpath('./a/img/@alt')[0]
  99. pic = a.xpath('./a/img/@data-original')[0]
  100. mark = a.xpath(".//div[@class='jidi']/span/text()")
  101. if mark ==[]:
  102. mark = a.xpath("./div[@class='hdinfo']/span/text()")
  103. mark = mark[0]
  104. sid = a.xpath("./a/@href")[0]
  105. sid = self.regStr(sid, "/movie/(\\S+).html")
  106. videos.append({
  107. "vod_id": sid,
  108. "vod_name": name,
  109. "vod_pic": pic,
  110. "vod_remarks": mark
  111. })
  112. result['list'] = videos
  113. result['page'] = pg
  114. result['pagecount'] = 9999
  115. result['limit'] = 90
  116. result['total'] = 999999
  117. return result
  118. def detailContent(self, array):
  119. tid = array[0]
  120. url = 'https://czzy01.com/movie/{0}.html'.format(tid)
  121. rsp = self.getCookie(url)
  122. root = self.html(self.cleanText(rsp.text))
  123. node = root.xpath("//div[@class='dyxingq']")[0]
  124. pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0]
  125. title = node.xpath('.//h1/text()')[0]
  126. detail = root.xpath(".//div[@class='yp_context']//p/text()")[0]
  127. vod = {
  128. "vod_id": tid,
  129. "vod_name": title,
  130. "vod_pic": pic,
  131. "type_name": "",
  132. "vod_year": "",
  133. "vod_area": "",
  134. "vod_remarks": "",
  135. "vod_actor": "",
  136. "vod_director": "",
  137. "vod_content": detail
  138. }
  139. infoArray = node.xpath(".//ul[@class='moviedteail_list']/li")
  140. for info in infoArray:
  141. content = info.xpath('string(.)')
  142. if content.startswith('地区'):
  143. tpyeare = ''
  144. for inf in info:
  145. tn = inf.text
  146. tpyeare = tpyeare +'/'+'{0}'.format(tn)
  147. vod['vod_area'] = tpyeare.strip('/')
  148. if content.startswith('年份'):
  149. vod['vod_year'] = content.replace("年份:","")
  150. if content.startswith('主演'):
  151. tpyeact = ''
  152. for inf in info:
  153. tn = inf.text
  154. tpyeact = tpyeact +'/'+'{0}'.format(tn)
  155. vod['vod_actor'] = tpyeact.strip('/')
  156. if content.startswith('导演'):
  157. tpyedire = ''
  158. for inf in info:
  159. tn = inf.text
  160. tpyedire = tpyedire +'/'+'{0}'.format(tn)
  161. vod['vod_director'] = tpyedire .strip('/')
  162. vod_play_from = '$$$'
  163. playFrom = ['厂长']
  164. vod_play_from = vod_play_from.join(playFrom)
  165. vod_play_url = '$$$'
  166. playList = []
  167. vodList = root.xpath("//div[@class='paly_list_btn']")
  168. for vl in vodList:
  169. vodItems = []
  170. aList = vl.xpath('./a')
  171. for tA in aList:
  172. href = tA.xpath('./@href')[0]
  173. name = tA.xpath('./text()')[0].replace('\xa0','')
  174. tId = self.regStr(href, '/v_play/(\\S+).html')
  175. vodItems.append(name + "$" + tId)
  176. joinStr = '#'
  177. joinStr = joinStr.join(vodItems)
  178. playList.append(joinStr)
  179. vod_play_url = vod_play_url.join(playList)
  180. vod['vod_play_from'] = vod_play_from
  181. vod['vod_play_url'] = vod_play_url
  182. result = {
  183. 'list': [
  184. vod
  185. ]
  186. }
  187. return result
  188. def searchContent(self, key, quick):
  189. url = 'https://czzy01.com/xssearch?q={0}'.format(urllib.parse.quote(key))
  190. rsp = self.getCookie(url)
  191. root = self.html(self.cleanText(rsp.text))
  192. vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a")
  193. videos = []
  194. for vod in vodList:
  195. name = vod.xpath('./img/@alt')[0]
  196. pic = vod.xpath('./img/@data-original')[0]
  197. href = vod.xpath('./@href')[0]
  198. tid = self.regStr(href, 'movie/(\\S+).html')
  199. res = vod.xpath('./div[@class="jidi"]/span/text()')
  200. if len(res) == 0:
  201. remark = '全1集'
  202. else:
  203. remark = vod.xpath('./div[@class="jidi"]/span/text()')[0]
  204. videos.append({
  205. "vod_id": tid,
  206. "vod_name": name,
  207. "vod_pic": pic,
  208. "vod_remarks": remark
  209. })
  210. result = {
  211. 'list': videos
  212. }
  213. return result
  214. config = {
  215. "player": {},
  216. "filter": {}
  217. }
  218. header = {
  219. "Referer": "https://czzy01.com/",
  220. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
  221. }
  222. def parseCBC(self, enc, key, iv):
  223. keyBytes = key.encode("utf-8")
  224. ivBytes = iv.encode("utf-8")
  225. cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
  226. msg = cipher.decrypt(enc)
  227. paddingLen = msg[len(msg) - 1]
  228. return msg[0:-paddingLen]
  229. def playerContent(self, flag, id, vipFlags):
  230. result = {}
  231. url = 'https://czzy01.com/v_play/{0}.html'.format(id)
  232. rsp = self.getCookie(url)
  233. pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
  234. html = rsp.text
  235. content = self.regStr(html, pat)
  236. if content == '':
  237. str3 = url
  238. pars = 1
  239. header = {
  240. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
  241. }
  242. else:
  243. key = self.regStr(html, pat, 2)
  244. iv = self.regStr(html, pat, 3)
  245. decontent = self.parseCBC(base64.b64decode(content), key, iv).decode()
  246. urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"'
  247. vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"'
  248. str3 = self.regStr(decontent, urlPat)
  249. str4 = self.regStr(decontent, vttPat)
  250. self.loadVtt(str3)
  251. pars = 0
  252. header = ''
  253. if len(str4) > 0:
  254. result['subf'] = '/vtt/utf-8'
  255. result['subt'] = ''
  256. result = {
  257. 'parse': pars,
  258. 'playUrl': '',
  259. 'url': str3,
  260. 'header': header
  261. }
  262. return result
  263. def loadVtt(self, url):
  264. pass
  265. def isVideoFormat(self, url):
  266. pass
  267. def manualVideoCheck(self):
  268. pass
  269. def localProxy(self, param):
  270. action = {}
  271. return [200, "video/MP2T", action, ""]