vod.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : vod.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2022/9/6
  6. import functools
  7. import json
  8. from flask import Blueprint, abort, request, render_template, render_template_string, jsonify, make_response, redirect, \
  9. current_app
  10. from time import time
  11. from utils.web import getParmas, get_interval
  12. from utils.cfg import cfg
  13. from utils.env import get_env
  14. from js.rules import getRuleLists, getJxs
  15. from base.R import R
  16. from utils.log import logger
  17. from utils import parser
  18. from controllers.cms import CMS
  19. from base.database import db
  20. from models.ruleclass import RuleClass
  21. from models.playparse import PlayParse
  22. from js.rules import getRules
  23. from controllers.service import storage_service, rules_service
  24. from concurrent.futures import ThreadPoolExecutor, as_completed, thread # 引入线程池
  25. from quickjs import Function, Context
  26. import ujson
  27. vod = Blueprint("vod", __name__)
  28. def search_one_py(rule, wd, before: str = ''):
  29. t1 = time()
  30. if not before:
  31. with open('js/模板.js', encoding='utf-8') as f:
  32. before = f.read().split('export')[0]
  33. js_path = f'js/{rule}.js'
  34. try:
  35. ctx, js_code = parser.runJs(js_path, before=before)
  36. if not js_code:
  37. return None
  38. ruleDict = ctx.rule.to_dict()
  39. ruleDict['id'] = rule # 把路由请求的id装到字典里,后面播放嗅探才能用
  40. logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
  41. cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
  42. data = cms.searchContent(wd, show_name=True)
  43. return data
  44. except Exception as e:
  45. print(f'{rule}发生错误:{e}')
  46. return None
  47. def search_one(rule, wd, before: str = '', env: dict = None, app=None):
  48. t1 = time()
  49. if not before:
  50. with open('js/模板.js', encoding='utf-8') as f:
  51. before = f.read().split('export')[0]
  52. end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
  53. js_path = f'js/{rule}.js'
  54. ctx = Context()
  55. try:
  56. with open(js_path, encoding='utf-8') as f2:
  57. jscode = f2.read()
  58. if env:
  59. # 渲染字符串文本 render_template_string 必须带 flask的上下文
  60. with app.app_context():
  61. for k in env:
  62. # print(f'${k}', f'{env[k]}')
  63. if f'${k}' in jscode:
  64. jscode = jscode.replace(f'${k}', f'{env[k]}')
  65. # jscode = render_template_string(jscode, **env)
  66. # if '007' in rule:
  67. # print(rule,jscode)
  68. jscode = before + jscode + end_code
  69. # print(jscode)
  70. ctx.eval(jscode)
  71. js_ret = ctx.get('rule')
  72. ruleDict = ujson.loads(js_ret.json())
  73. ruleDict['id'] = rule # 把路由请求的id装到字典里,后面播放嗅探才能用
  74. logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
  75. cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
  76. data = cms.searchContent(wd, show_name=True)
  77. return data
  78. except Exception as e:
  79. logger.info(f'{e}')
  80. return R.failed('爬虫规则加载失败')
  81. def multi_search2(wd):
  82. t1 = time()
  83. lsg = storage_service()
  84. try:
  85. timeout = round(int(lsg.getItem('SEARCH_TIMEOUT', 5000)) / 1000, 2)
  86. except:
  87. timeout = 5
  88. rules = getRules('js')['list']
  89. rule_names = list(map(lambda x: x['name'], rules))
  90. rules_exclude = ['drpy']
  91. new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
  92. search_sites = [new_rule['name'] for new_rule in new_rules]
  93. nosearch_sites = set(rule_names) ^ set(search_sites)
  94. nosearch_sites.remove('drpy')
  95. # print(nosearch_sites)
  96. logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
  97. logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
  98. # print(search_sites)
  99. res = []
  100. with open('js/模板.js', encoding='utf-8') as f:
  101. before = f.read().split('export')[0]
  102. logger.info(f'聚搜准备工作耗时:{get_interval(t1)}毫秒')
  103. t2 = time()
  104. thread_pool = ThreadPoolExecutor(len(search_sites)) # 定义线程池来启动多线程执行此任务
  105. obj_list = []
  106. try:
  107. for site in search_sites:
  108. obj = thread_pool.submit(search_one, site, wd, before)
  109. obj_list.append(obj)
  110. thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕
  111. vod_list = [obj.result() for obj in obj_list]
  112. for vod in vod_list:
  113. if vod and isinstance(vod, dict) and vod.get('list') and len(vod['list']) > 0:
  114. res.extend(vod['list'])
  115. result = {
  116. 'list': res
  117. }
  118. logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时{get_interval(t1)}毫秒')
  119. except Exception as e:
  120. result = {
  121. 'list': []
  122. }
  123. logger.info(
  124. f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时:{get_interval(t1)}毫秒,发生错误:{e}')
  125. return jsonify(result)
  126. def merged_hide(merged_rules):
  127. t1 = time()
  128. store_rule = rules_service()
  129. hide_rules = store_rule.getHideRules()
  130. hide_rule_names = list(map(lambda x: x['name'], hide_rules))
  131. # print('隐藏:',hide_rule_names)
  132. all_cnt = len(merged_rules)
  133. # print(merged_rules)
  134. def filter_show(x):
  135. # name = x['api'].split('rule=')[1].split('&')[0] if 'rule=' in x['api'] else x['key'].replace('dr_','')
  136. name = x
  137. # print(name)
  138. return name not in hide_rule_names
  139. merged_rules = list(filter(filter_show, merged_rules))
  140. # print('隐藏后:',merged_rules)
  141. logger.info(
  142. f'数据库筛选隐藏规则耗时{get_interval(t1)}毫秒,共计{all_cnt}条规则,隐藏后可渲染{len(merged_rules)}条规则')
  143. # merged_rules = []
  144. return merged_rules
  145. def disable_exit_for_threadpool_executor():
  146. import atexit
  147. import concurrent.futures
  148. atexit.unregister(concurrent.futures.thread._python_exit)
  149. def sort_lsg_rules(sites: list):
  150. """
  151. 查询结果按order和write_date 联合排序
  152. :param sites:
  153. :return:
  154. """
  155. def comp(x, y):
  156. if x['order'] > y['order']:
  157. return 1
  158. elif x['order'] < y['order']:
  159. return - 1
  160. else:
  161. if x['write_date'] < y['write_date']:
  162. return 1
  163. elif x['write_date'] > y['write_date']:
  164. return -1
  165. else:
  166. return 0
  167. sites.sort(key=functools.cmp_to_key(comp), reverse=False)
  168. return sites
  169. def sort_lsg_rules2(sites: list, lsg_rule_names: list):
  170. """
  171. 查询结果按order和write_date 联合排序
  172. :param sites:
  173. :return:
  174. """
  175. def comp(x, y):
  176. try:
  177. x1 = lsg_rule_names.index(x)
  178. except:
  179. x1 = 999
  180. try:
  181. y1 = lsg_rule_names.index(y)
  182. except:
  183. y1 = 999
  184. if x1 >= y1:
  185. return 1
  186. elif x1 < y1:
  187. return - 1
  188. sites.sort(key=functools.cmp_to_key(comp), reverse=False)
  189. return sites
  190. def getSearchSites():
  191. val = {}
  192. lsg = storage_service()
  193. try:
  194. timeout = round(int(lsg.getItem('SEARCH_TIMEOUT', 5000)) / 1000, 2)
  195. except:
  196. timeout = 5
  197. val['timeout'] = timeout
  198. rules = getRules('js')['list']
  199. rule_names = list(map(lambda x: x['name'], rules))
  200. rules_exclude = ['drpy']
  201. new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
  202. total_search = [new_rule['name'] for new_rule in new_rules]
  203. nosearch_sites = set(rule_names) ^ set(total_search)
  204. nosearch_sites.remove('drpy')
  205. val['total_search'] = total_search
  206. val['nosearch_sites'] = list(nosearch_sites)
  207. search_sites = merged_hide(total_search)
  208. lsg_rules = rules_service()
  209. lsg_rule_list = lsg_rules.query_all()
  210. lsg_rule_list = list(filter(lambda x: x['name'] in search_sites, lsg_rule_list))
  211. lsg_rule_names = list(map(lambda x: x['name'], lsg_rule_list))
  212. search_sites = sort_lsg_rules2(search_sites, lsg_rule_names)
  213. search_limit = lsg.getItem('SEARCH_LIMIT', 24)
  214. try:
  215. search_limit = int(search_limit)
  216. except:
  217. search_limit = 0
  218. if search_limit < 1:
  219. search_limit = 0
  220. search_sites = search_sites[:search_limit]
  221. val['search_limit'] = search_limit
  222. val['search_sites'] = search_sites
  223. return val
  224. def multi_search(wd):
  225. t1 = time()
  226. val = getSearchSites()
  227. timeout = val['timeout']
  228. total_search = val['total_search']
  229. nosearch_sites = val['nosearch_sites']
  230. search_limit = val['search_limit']
  231. search_sites = val['search_sites']
  232. env = get_env()
  233. logger.info(f'开始聚搜{wd},共计{len(total_search)}个规则,聚搜超时{timeout}秒')
  234. logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
  235. msearch_msg = f'搜索限制条数:{search_limit}/{len(search_sites)} {search_sites}'
  236. logger.info(msearch_msg)
  237. print(msearch_msg)
  238. # search_sites = []
  239. res = []
  240. if len(search_sites) > 0:
  241. with open('js/模板.js', encoding='utf-8') as f:
  242. before = f.read().split('export')[0]
  243. with ThreadPoolExecutor(max_workers=len(search_sites)) as executor:
  244. to_do = []
  245. for site in search_sites:
  246. future = executor.submit(search_one, site, wd, before, env, current_app._get_current_object())
  247. to_do.append(future)
  248. try:
  249. for future in as_completed(to_do, timeout=timeout): # 并发执行
  250. ret = future.result()
  251. # print(ret)
  252. if ret and isinstance(ret, dict) and ret.get('list'):
  253. res.extend(ret['list'])
  254. except Exception as e:
  255. print(f'发生错误:{e}')
  256. import atexit
  257. atexit.unregister(thread._python_exit)
  258. executor.shutdown = lambda wait: None
  259. # disable_exit_for_threadpool_executor()
  260. logger.info(f'drpy聚搜{len(search_sites)}个源共计耗时{get_interval(t1)}毫秒')
  261. return jsonify({
  262. "list": res
  263. })
  264. @vod.route('/vods')
  265. def vods_search():
  266. val = getSearchSites()
  267. print(val)
  268. # return jsonify(val)
  269. return render_template('show_search.html', val=val)
  270. @vod.route('/vod')
  271. def vod_home():
  272. lsg = storage_service()
  273. js0_disable = lsg.getItem('JS0_DISABLE', cfg.get('JS0_DISABLE', 0))
  274. if js0_disable:
  275. abort(403)
  276. js0_password = lsg.getItem('JS0_PASSWORD', cfg.get('JS0_PASSWORD', ''))
  277. # print('js0_password:',js0_password)
  278. if js0_password:
  279. pwd = getParmas('pwd')
  280. if pwd != js0_password:
  281. abort(403)
  282. t0 = time()
  283. rule = getParmas('rule')
  284. ac = getParmas('ac')
  285. ids = getParmas('ids')
  286. if ac and ids and ids.find('#') > -1: # 聚搜的二级
  287. id_list = ids.split(',')
  288. rule = id_list[0].split('#')[1]
  289. # print(rule)
  290. ext = getParmas('ext')
  291. filters = getParmas('f')
  292. tp = getParmas('type')
  293. # print(f'type:{tp}')
  294. # if not ext.startswith('http') and not rule:
  295. if not rule:
  296. return R.failed('规则字段必填')
  297. rule_list = getRuleLists()
  298. # if not ext.startswith('http') and not rule in rule_list:
  299. if not ext and not rule in rule_list:
  300. msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}'
  301. return R.failed(msg)
  302. # logger.info(f'检验耗时:{get_interval(t0)}毫秒')
  303. t1 = time()
  304. # js_path = f'js/{rule}.js' if not ext.startswith('http') else ext
  305. js_path = f'js/{rule}.js' if not ext else ext
  306. with open('js/模板.js', encoding='utf-8') as f:
  307. before = f.read().split('export')[0]
  308. # logger.info(f'js读取耗时:{get_interval(t1)}毫秒')
  309. end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
  310. logger.info(f'参数检验js读取共计耗时:{get_interval(t0)}毫秒')
  311. t2 = time()
  312. # ctx, js_code = parser.runJs(js_path,before=before)
  313. # if not js_code:
  314. # return R.failed('爬虫规则加载失败')
  315. # # rule = ctx.eval('rule')
  316. # # print(type(ctx.rule.lazy()),ctx.rule.lazy().toString())
  317. # ruleDict = ctx.rule.to_dict()
  318. ctx = Context()
  319. try:
  320. with open(js_path, encoding='utf-8') as f2:
  321. jscode = f2.read()
  322. env = get_env()
  323. for k in env:
  324. # print(f'${k}',f'{env[k]}')
  325. if f'${k}' in jscode:
  326. jscode = jscode.replace(f'${k}', f'{env[k]}')
  327. # print(env)
  328. # if env:
  329. # jscode = render_template_string(jscode,**env)
  330. # print(jscode)
  331. jscode = before + jscode + end_code
  332. # print(jscode)
  333. ctx.eval(jscode)
  334. js_ret = ctx.get('rule')
  335. rule_json = js_ret.json() # 规则的json字符串
  336. ruleDict = ujson.loads(rule_json)
  337. except Exception as e:
  338. logger.info(f'{e}')
  339. return R.failed('爬虫规则加载失败')
  340. # print(type(ruleDict))
  341. # print(ruleDict)
  342. # print(ruleDict)
  343. ruleDict['id'] = rule # 把路由请求的id装到字典里,后面播放嗅探才能用
  344. # print(ruleDict)
  345. # print(rule)
  346. # print(type(rule))
  347. # print(ruleDict)
  348. logger.info(f'js装载耗时:{get_interval(t2)}毫秒')
  349. # print(ruleDict)
  350. # print(rule)
  351. cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg, ext)
  352. wd = getParmas('wd')
  353. quick = getParmas('quick')
  354. play = getParmas('play') # 类型为4的时候点击播放会带上来
  355. flag = getParmas('flag') # 类型为4的时候点击播放会带上来
  356. # myfilter = getParmas('filter')
  357. t = getParmas('t')
  358. pg = getParmas('pg', '1')
  359. pg = int(pg)
  360. # print('pg:',pg)
  361. q = getParmas('q')
  362. play_url = getParmas('play_url')
  363. if play:
  364. jxs = getJxs()
  365. play_url = play.split('play_url=')[1]
  366. play_url = cms.playContent(play_url, jxs, flag)
  367. if isinstance(play_url, str):
  368. # return redirect(play_url)
  369. # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': play_url})
  370. # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': ''})
  371. return jsonify({'parse': 0, 'playUrl': '', 'jx': 0, 'url': play_url})
  372. elif isinstance(play_url, dict):
  373. return jsonify(play_url)
  374. else:
  375. return play_url
  376. if play_url: # 播放
  377. jxs = getJxs()
  378. play_url = cms.playContent(play_url, jxs)
  379. if isinstance(play_url, str):
  380. return redirect(play_url)
  381. elif isinstance(play_url, dict):
  382. return jsonify(play_url)
  383. else:
  384. return play_url
  385. if ac and t: # 一级
  386. fl = {}
  387. if filters and filters.find('{') > -1 and filters.find('}') > -1:
  388. fl = json.loads(filters)
  389. # print(filters,type(filters))
  390. # print(fl,type(fl))
  391. data = cms.categoryContent(t, pg, fl)
  392. # print(data)
  393. return jsonify(data)
  394. if ac and ids: # 二级
  395. id_list = ids.split(',')
  396. show_name = False
  397. if ids.find('#') > -1:
  398. id_list = list(map(lambda x: x.split('#')[0], id_list))
  399. show_name = True
  400. # print('app:377',len(id_list))
  401. # print(id_list)
  402. data = cms.detailContent(pg, id_list, show_name)
  403. # print(data)
  404. return jsonify(data)
  405. if wd: # 搜索
  406. if rule == 'drpy':
  407. print(f'准备单独处理聚合搜索:{wd}')
  408. return multi_search(wd)
  409. # return multi_search2(wd)
  410. else:
  411. data = cms.searchContent(wd, pg)
  412. # print(data)
  413. return jsonify(data)
  414. # return jsonify({'rule':rule,'js_code':js_code})
  415. logger.info(rule_json)
  416. home_data = cms.homeContent(pg)
  417. return jsonify(home_data)