py_1905.py 10 KB


  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import json
  7. import re
  8. import time
  9. import uuid
  10. import hashlib
  11. from urllib import request, parse
  12. import urllib
  13. import urllib.request
  14. import ssl
  15. ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
  16. class Spider(Spider): # 元类 默认的元类 type
  17. def getName(self):
  18. return "1905电影"
  19. def init(self,extend=""):
  20. print("============{0}============".format(extend))
  21. pass
  22. def isVideoFormat(self,url):
  23. pass
  24. def manualVideoCheck(self):
  25. pass
  26. def homeContent(self,filter):
  27. result = {}
  28. cateManual = {
  29. "电影": "n_1",
  30. "微电影":"n_1_c_922",
  31. "系列电影":"n_2",
  32. "纪录片":"c_927",
  33. "晚会":"n_1_c_586",
  34. "独家":"n_1_c_178",
  35. "综艺":"n_1_c_1024"
  36. # ,"体育":"n_1_c_1053"
  37. }
  38. classes = []
  39. for k in cateManual:
  40. classes.append({
  41. 'type_name':k,
  42. 'type_id':cateManual[k]
  43. })
  44. result['class'] = classes
  45. if(filter):
  46. result['filters'] = self.config['filter']
  47. return result
  48. def homeVideoContent(self):
  49. url = 'https://www.1905.com/vod/cctv6/lst/'
  50. rsp = self.fetch(url, headers=self.header)
  51. html = self.html(rsp.text)
  52. aList = html.xpath("//div[@class='grid-2x']/a")
  53. videos = self.custom_list(aList=aList)
  54. result = {
  55. 'list':videos
  56. }
  57. return result
  58. def categoryContent(self,tid,pg,filter,extend):
  59. result = {}
  60. videos=[]
  61. by='/o3p'
  62. if 'by' in extend.keys():
  63. by='/{0}p'.format(extend['by'])
  64. url = 'https://www.1905.com/vod/list/{0}{2}{1}.html'.format(tid, pg,by)
  65. HtmlTxt =self.custom_webReadFile(url,header=self.header)
  66. html = self.html(HtmlTxt)
  67. aList = html.xpath("//section[contains(@class,'search-list')]/div/a" if tid != u'n_2' else "//div[@class='mod']/div[1]/a")
  68. videos = self.custom_list(aList=aList)
  69. limit = len(aList)
  70. result['list'] = videos
  71. result['page'] = pg
  72. result['pagecount'] = 100
  73. result['limit'] = limit
  74. result['total'] = 100 * limit
  75. return result
  76. def detailContent(self,array):
  77. result = {}
  78. temporary = array[0].split('###')
  79. title=temporary[1]
  80. aid=temporary[0]
  81. pic=temporary[2]
  82. remark=''
  83. actor=''
  84. direct=''
  85. content=''
  86. vodItems=[]
  87. playList=[]
  88. vod_play_from=['播放线路',]
  89. if aid.isdigit()==False:
  90. HtmlTxt=self.custom_webReadFile(aid,self.header)
  91. url=self.custom_RegexGetText(HtmlTxt,r'<a class="iconBanner-playBtn icon-banner btn-play"\s*href="(.+?)"',1)
  92. if url=='':
  93. url=self.custom_RegexGetText(HtmlTxt,r'property="og:url"\scontent="(.+?)"', 1)
  94. if self.custom_RegexGetText(url,r'/(film)/',1)!='':
  95. HtmlTxt=self.custom_webReadFile(aid+'video',self.header)
  96. url=self.custom_RegexGetText(HtmlTxt,r'<li class="video-position-icon\s{0,1}">\r*\n*\s*<a href="(.+?)"\s{1,4}class="online-list-positive other-vedio-url"', 1)
  97. if len(self.custom_RegexGetText(url,r'(vip.1905)',1))>3:
  98. vod_play_from=['播放线路(需要vip解析)',]
  99. aid=url
  100. title=self.custom_RegexGetText(HtmlTxt,r'<div class="container-right">\s*\r*\n*\t*<h1>(.+?)<',1).replace(' ','')
  101. pic=self.custom_RegexGetText(HtmlTxt,r'<img class="poster" src="(.+?)"',1)
  102. content=self.custom_RegexGetText(HtmlTxt,r'<p>(.+?)</p>',1)
  103. vodItems.append(title + "$" + aid)
  104. joinStr = '#'.join(vodItems)
  105. playList.append(joinStr)
  106. else:
  107. aid=self.custom_RegexGetText(url,r'play/(.*?)\.sh',1)
  108. if aid=='':
  109. return {'list': []}
  110. elif aid.isdigit() and vod_play_from[0].find('需要vip解析')<0:
  111. url = "https://www.1905.com/api/content/?callback=&m=Vod&a=getVodSidebar&id={0}&fomat=json".format(aid)
  112. try:
  113. HtmlTxt=self.custom_webReadFile(url,self.header)
  114. root = json.loads(HtmlTxt)
  115. title = root['title']
  116. pic = root['thumb']
  117. remark = root['commendreason']
  118. content = root['description']
  119. actor = root['starring']
  120. direct = root['direct']
  121. vodItems.append(title + "$" + aid)
  122. series = root['info']['series_data']
  123. series = root['info']['series_data']
  124. for ser in series:
  125. vodItems.append(ser['title'] + "$" + ser['contentid'])
  126. joinStr = '#'.join(vodItems)
  127. playList.append(joinStr)
  128. except:
  129. joinStr = '#'.join([title + "$" + aid])
  130. playList.append(joinStr)
  131. else:
  132. pass
  133. vod = {
  134. "vod_id":array[0],
  135. "vod_name":title,
  136. "vod_pic":pic,
  137. "type_name":'',
  138. "vod_year":"",
  139. "vod_area":"",
  140. "vod_remarks":remark,
  141. "vod_actor":actor,
  142. "vod_director":direct,
  143. "vod_content":content
  144. }
  145. vod['vod_play_from'] = "$$$".join(vod_play_from)
  146. vod['vod_play_url'] = "$$$".join(playList)
  147. result = {
  148. 'list':[
  149. vod
  150. ]
  151. }
  152. return result
  153. def searchContent(self,key,quick):
  154. url = 'https://www.1905.com/search/index-p-type-film-q-{}.html?envod=1&year=0&score=0&order=0'.format(urllib.parse.quote(key))#只搜索能看电影,想搜其它的可以把html之后的字符删掉
  155. #https://www.1905.com/search/index-p-type-all-q-{}.html
  156. html = self.html(self.custom_webReadFile(url,self.header))
  157. aList = html.xpath('//div[@class="main clearfix"]')
  158. videos = self.custom_list_search(aList=aList)
  159. result = {
  160. 'list':videos
  161. }
  162. return result
  163. def playerContent(self,flag,id,vipFlags):
  164. result = {}
  165. if flag.find('vip解析')>0:
  166. result["parse"] = 1#0=直接播放、1=嗅探
  167. result["playUrl"] =''
  168. result["url"] = id
  169. result['jx'] = 1#1=VIP解析,0=不解析
  170. result["header"] = ''
  171. else:
  172. nonce = int(round(time.time() * 1000))
  173. expiretime = nonce + 600
  174. uid = str(uuid.uuid4())
  175. playerid = uid.replace("-", "")[5:20]
  176. signature = 'cid={0}&expiretime={1}&nonce={2}&page=https%3A%2F%2Fwww.1905.com%2Fvod%2Fplay%2F{3}.shtml&playerid={4}&type=hls&uuid={5}.dde3d61a0411511d'.format(id,expiretime,nonce,id,playerid,uid)
  177. signature = hashlib.sha1(signature.encode()).hexdigest()
  178. url = 'https://profile.m1905.com/mvod/getVideoinfo.php?nonce={0}&expiretime={1}&cid={2}&uuid={3}&playerid={4}&page=https%3A%2F%2Fwww.1905.com%2Fvod%2Fplay%2F{5}.shtml&type=hls&signature={6}&callback='.format(nonce,expiretime,id,uid,playerid,id,signature)
  179. HtmlTxt=self.custom_webReadFile(url,self.header)
  180. jo = json.loads(HtmlTxt.replace("(", "").replace(")", ""))
  181. data = jo['data']['sign']
  182. sign = ''
  183. qualityStr = ''
  184. if 'uhd' in data.keys():
  185. sign = data['uhd']['sign']
  186. qualityStr = 'uhd'
  187. elif 'hd' in data.keys():
  188. sign = data['hd']['sign']
  189. qualityStr = 'hd'
  190. elif 'sd' in data.keys():
  191. sign = data['sd']['sign']
  192. qualityStr = 'sd'
  193. host = jo['data']['quality'][qualityStr]['host']
  194. path = jo['data']['path'][qualityStr]['path']
  195. playUrl = host + sign + path
  196. result["parse"] = 0#0=直接播放、1=嗅探
  197. result["playUrl"] =''
  198. result["url"] = playUrl
  199. result["header"] = self.header
  200. return result
  201. config = {
  202. "player": {},
  203. "filter": {
  204. "n_1":[
  205. {"key":"by","name":"排序:","value":[{"n":"默认(最热)","v":"o3"},{"n":"最新","v":"o1"},{"n":"好评","v":"o4"}]}
  206. ],
  207. "n_1_c_922":[
  208. {"key":"by","name":"排序:","value":[{"n":"默认(最热)","v":"o3"},{"n":"最新","v":"o1"},{"n":"好评","v":"o4"}]}
  209. ],
  210. "n_2":[
  211. {"key":"by","name":"排序:","value":[{"n":"默认(最热)","v":"o3"},{"n":"最新","v":"o1"},{"n":"好评","v":"o4"}]}
  212. ],
  213. "c_927":[
  214. {"key":"by","name":"排序:","value":[{"n":"默认(最热)","v":"o3"},{"n":"最新","v":"o1"},{"n":"好评","v":"o4"}]}
  215. ],
  216. "n_1_c_586":[
  217. {"key":"by","name":"排序:","value":[{"n":"默认(最热)","v":"o3"},{"n":"最新","v":"o1"},{"n":"好评","v":"o4"}]}
  218. ],
  219. "n_1_c_178":[
  220. {"key":"by","name":"排序:","value":[{"n":"默认(最热)","v":"o3"},{"n":"最新","v":"o1"},{"n":"好评","v":"o4"}]}
  221. ],
  222. "n_1_c_1024":[
  223. {"key":"by","name":"排序:","value":[{"n":"默认(最热)","v":"o3"},{"n":"最新","v":"o1"},{"n":"好评","v":"o4"}]}
  224. ]
  225. }
  226. }
  227. header = {
  228. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.43',
  229. 'Referer': 'https://www.1905.com/vod/list/n_1/o3p1.html',
  230. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
  231. }
  232. def localProxy(self,param):
  233. return [200, "video/MP2T", action, ""]
  234. #-----------------------------------------------自定义函数-----------------------------------------------
  235. #分类取结果
  236. def custom_list(self,aList):
  237. videos = []
  238. for a in aList:
  239. img=a.xpath('./img/@src')[0]
  240. title=a.xpath('./img/@alt')[0]
  241. url=a.xpath("./@href")[0]
  242. if url.find('vip.1905')>1:#可以除掉
  243. continue
  244. if self.custom_RegexGetText(url,'(play)',1)=='':
  245. vod_id="{0}###{1}###{2}".format(url,title,img)
  246. else:
  247. id=self.custom_RegexGetText(url,r'play/(.*?)\.sh',1)
  248. vod_id="{0}###{1}###{2}".format(id,title,img)
  249. videos.append({
  250. "vod_id":vod_id,
  251. "vod_name":title,
  252. "vod_pic":img,
  253. "vod_remarks":''
  254. })
  255. return videos
  256. def custom_list_search(self,aList):
  257. videos = []
  258. for a in aList:
  259. try:
  260. img=a.xpath('./div[@class="movie-pic"]/a[@class="img-a"]/img/@src')[0]
  261. title=a.xpath('./div[@class="movie-pic"]/a[@class="img-a"]/img/@alt')[0]
  262. url=a.xpath('./ul[@class="cont"]/li[@class="spec paly-tab-icon position-icon"]/a/@href')
  263. if len(url)<1:
  264. url=a.xpath('./div[@class="movie-pic"]/a[@class="img-a"]/@href')[0]
  265. else:
  266. url=url[0]
  267. if url.find('vip.1905')>1:
  268. url=a.xpath('./div[@class="movie-pic"]/a[@class="img-a"]/@href')[0]
  269. if self.custom_RegexGetText(url,'(play)',1)=='':
  270. vod_id="{0}###{1}###{2}".format(url,title,img)
  271. else:
  272. id=self.custom_RegexGetText(url,r'play/(.*?)\.sh',1)
  273. vod_id="{0}###{1}###{2}".format(id,title,img)
  274. videos.append({
  275. "vod_id":vod_id,
  276. "vod_name":title,
  277. "vod_pic":img,
  278. "vod_remarks":''
  279. })
  280. except:
  281. pass
  282. return videos
  283. #访问网页
  284. def custom_webReadFile(self,urlStr,header=None,codeName='utf-8'):
  285. html=''
  286. if header==None:
  287. header={
  288. "Referer":urlStr,
  289. 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
  290. "Host":self.custom_RegexGetText(Text=urlStr,RegexText='https*://(.*?)(/|$)',Index=1)
  291. }
  292. req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
  293. with urllib.request.urlopen(req) as response:
  294. html = response.read().decode(codeName,'ignore')
  295. return html
  296. #正则取文本
  297. def custom_RegexGetText(self,Text,RegexText,Index):
  298. returnTxt=""
  299. Regex=re.search(RegexText, Text, re.M|re.S)
  300. if Regex is None:
  301. returnTxt=""
  302. else:
  303. returnTxt=Regex.group(Index)
  304. return returnTxt