request.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2023-05-05 17:28
  4. ---------
  5. @summary: 爬虫入口
  6. ---------
  7. @author: pepsi
  8. """
  9. import httpx
  10. import random
  11. from core.config import settings
  12. from utils import define
  13. class Request:
  14. __REQUEST_ATTRS__ = {
  15. # "method",
  16. # "url",
  17. "params",
  18. "data",
  19. "headers",
  20. "cookies",
  21. "files",
  22. "auth",
  23. "timeout",
  24. "allow_redirects",
  25. # "proxies",
  26. "hooks",
  27. "cert",
  28. "json",
  29. "follow_redirects"
  30. }
  31. def __init__(
  32. self,
  33. url,
  34. agent=True,
  35. method="GET",
  36. **kwargs,
  37. ):
  38. """
  39. 以下参数与requests参数使用方式一致
  40. @param method: 请求方式,如POST或GET,默认根据data值是否为空来判断
  41. @param params: 请求参数
  42. @param data: 请求body
  43. @param json: 请求json字符串,同 json.dumps(data)
  44. @param headers:
  45. @param cookies: 字典 或 CookieJar 对象
  46. @param files:
  47. @param auth:
  48. @param timeout: (浮点或元组)等待服务器数据的超时限制,是一个浮点数,或是一个(connect timeout, read timeout) 元组
  49. @param allow_redirects : Boolean. True 表示允许跟踪 POST/PUT/DELETE 方法的重定向
  50. @param proxies: 代理 {"http":"http://xxx", "https":"https://xxx"}
  51. @param verify: 为 True 时将会验证 SSL 证书
  52. @param stream: 如果为 False,将会立即下载响应内容
  53. @param cert:
  54. --
  55. @param **kwargs: 其他值: 如 Request(item=item) 则item可直接用 request.item 取出
  56. ---------
  57. @result:
  58. """
  59. self.url = url
  60. self.agent = agent
  61. self.method = method
  62. # 自定义属性,不参与序列化
  63. self.requests_kwargs = {}
  64. for key, value in kwargs.items():
  65. if key in self.__class__.__REQUEST_ATTRS__: # 取requests参数
  66. self.requests_kwargs[key] = value
  67. self.__dict__[key] = value
  68. def __setattr__(self, key, value):
  69. """
  70. 针对 request.xxx = xxx 的形式,更新reqeust及内部参数值
  71. @param key:
  72. @param value:
  73. @return:
  74. """
  75. self.__dict__[key] = value
  76. if key in self.__class__.__REQUEST_ATTRS__:
  77. self.requests_kwargs[key] = value
  78. def make_requests_kwargs(self):
  79. if not self.requests_kwargs.get("timeout"):
  80. self.requests_kwargs.setdefault(
  81. "timeout", define.DEFAULT_REQUEST_TIMEOUT
  82. )
  83. method = self.__dict__.get("method")
  84. if not method:
  85. if "data" in self.requests_kwargs or "json" in self.requests_kwargs:
  86. method = "POST"
  87. else:
  88. method = "GET"
  89. self.method = method
  90. headers = self.requests_kwargs.get("headers", {})
  91. if "user-agent" not in headers and "User-Agent" not in headers:
  92. headers.update({"User-Agent": define.DEFAULT_USER_AGENT}) # 默认ua
  93. self.requests_kwargs.update(headers=headers)
  94. async def fetch(self):
  95. self.make_requests_kwargs()
  96. if self.agent:
  97. proxies = {
  98. "http://": random.choice(settings.IP_AGENTS),
  99. }
  100. else:
  101. proxies = self.__dict__.get("proxies", {})
  102. async with httpx.AsyncClient(proxies=proxies) as session:
  103. response = await session.request(method=self.method, url=self.url, **self.requests_kwargs)
  104. return response
  105. def request(self):
  106. self.make_requests_kwargs()
  107. if self.agent:
  108. proxies = {
  109. "http://": random.choice(settings.IP_AGENTS),
  110. }
  111. else:
  112. proxies = self.__dict__.get("proxies", {})
  113. response = httpx.Client(proxies=proxies).request(method=self.method, url=self.url, **self.requests_kwargs)
  114. return response