Skip to content

UserAgent 处理

一、获取随机 UA

方法一:fake-useragent

python
pip install fake-useragent
python
from fake_useragent import UserAgent
import requests

ua = UserAgent()
headers = {"User-Agent": ua.random}
response = requests.get("https://www.baidu.com", headers=headers)

支持按浏览器获取:ua.chromeua.firefoxua.ieua.random

方法二:自建 UA 池

python
import random

UA_POOL = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
]
headers = {"User-Agent": random.choice(UA_POOL)}

方法三:程序化生成

python
import random

def generate_user_agent():
    os_list = ['Windows', 'Macintosh', 'Linux i686', 'Linux x86_64']
    browsers = [
        ('Chrome', '122.0.0.0'),
        ('Firefox', '123.0'),
        ('Safari', '17.2'),
        ('Edge', '122.0.0.0'),
    ]
    os = random.choice(os_list)
    browser, version = random.choice(browsers)
    return f'Mozilla/5.0 ({os}) AppleWebKit/537.36 (KHTML, like Gecko) {browser}/{version} Safari/537.36'

二、解析 UA

方法一:ua-parser

python
pip install ua-parser
python
from ua_parser import user_agent_parser

ua_string = 'Mozilla/5.0 (Linux; Android 6.0.1; NX531J Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.126 Mobile Safari/537.36'
parsed = user_agent_parser.Parse(ua_string)
# 返回 device、os、user_agent 等信息

方法二:user-agents

python
pip install pyyaml ua-parser user-agents
python
from user_agents import parse

ua_string = 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Mac OS X) AppleWebKit/534.46 Version/5.1 Mobile/9B179 Safari/7534.48.3'
user_agent = parse(ua_string)

user_agent.browser.family    # 'Mobile Safari'
user_agent.os.family         # 'iOS'
user_agent.device.family     # 'iPhone'
user_agent.is_mobile         # True
user_agent.is_tablet         # False
user_agent.is_bot            # False

三、Scrapy 中使用随机 UA

python
# middlewares.py
from fake_useragent import UserAgent

class RandomUserAgentMiddleware:
    def __init__(self, crawler):
        self.ua = UserAgent()
        self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random")

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)

    def process_request(self, request, spider):
        request.headers.setdefault('User-Agent', getattr(self.ua, self.ua_type))
python
# settings.py
DOWNLOADER_MIDDLEWARES = {
    'your_project.middlewares.RandomUserAgentMiddleware': 543,
    'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
}
RANDOM_UA_TYPE = "random"  # 或 firefox、chrome
评论
  • 按正序
  • 按倒序
  • 按热度
Powered by Waline v3.7.1