北邮人PT种子下载
之前一直通过SS使用VPS的IPv6是可以使用bt.byr.cn的,但是从几个月前一直无法使用,使用浏览器用代理后直接跳转到一个com.com的网站,不清楚是什么原因,但是使用curl确实是没有问题的curl -x socks5://127.0.0.1:1080 -I https://bt.byr.cn/login.php
。前者返回的是302,后者是200。
后来找到了zYeoman的代码,也不清楚当时是怎么搜到的,可以用,但是HTML格式好像有变动,这个好解决,稍微程序动下就好了。
然后自己新建了个byr-spider库,方便自己使用吧,加了requirements.txt,加了代理,这个我可以在没有IPv6的情况下,通过代理一样下载种子,然后看有什么自己喜欢需要下载的,就把种子文件传到VPS自己下载就好了。
在家里的NAS机器上新增了定时任务,自己拉最新的种子文件。
1 | @hourly cd ~/byr && pipenv run python byr.py > log.log 2>&1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Copyright © 2018 Yongwen Zhuang <zeoman@163.com> | |
# Copyright © 2019 Liangwu <glw119@gmail.com> | |
# | |
# Distributed under terms of the MIT license. | |
""" | |
Byr | |
自动从bt.byr.cn上下载第一页种子文件 | |
""" | |
import os | |
from PIL import Image | |
from io import BytesIO | |
import logging | |
import pickle | |
try: | |
from urllib.parse import urlparse, parse_qs | |
except ImportError: | |
from urlparse import urlparse, parse_qs | |
import requests | |
from bs4 import BeautifulSoup | |
from userpass import User | |
from decaptcha.decaptcha import DeCaptcha | |
class Byr(object): | |
"""login/logout/getpage""" | |
def __init__(self): | |
"""Byr Init """ | |
console = logging.StreamHandler() | |
console.setLevel(logging.DEBUG) | |
formatter = logging.Formatter( | |
'%(asctime)10s [%(filename)s %(levelname)6s:%(lineno)4s - %(funcName)10s ] %(message)s' | |
) | |
console.setFormatter(formatter) | |
self.logger = logging.getLogger("byr") | |
self.logger.addHandler(console) | |
self.logger.setLevel(logging.DEBUG) | |
self._session = requests.session() | |
self._session.proxies = { | |
'http': 'socks5://127.0.0.1:1080', | |
'https': 'socks5://127.0.0.1:1080', | |
} | |
self._session.headers = { | |
'User-Agent': 'Magic Browser' | |
} | |
self._root = 'https://bt.byr.cn/' | |
self._user = User('.byr') | |
self.list = [] | |
if os.path.exists('list.csv'): | |
self.logger.debug('Read list.csv') | |
with open('list.csv', 'r') as f: | |
for line in f.readlines(): | |
self.list.append(line.split(',')[0]) | |
def login(self): | |
"""Login to bt.bry.cn""" | |
login_page = self.get_url('login.php') | |
image_url = login_page.find('img', alt='CAPTCHA')['src'] | |
image_hash = login_page.find( | |
'input', attrs={'name': 'imagehash'})['value'] | |
self.logger.debug('Image url: ' + image_url) | |
self.logger.debug('Image hash: ' + image_hash) | |
req = self._session.get(self._root + image_url) | |
image_file = Image.open(BytesIO(req.content)) | |
decaptcha = DeCaptcha() | |
decaptcha.load_model('./decaptcha/captcha_classifier.pkl') | |
captcha_text = decaptcha.decode(image_file) | |
self.logger.debug('Captcha text: ' + captcha_text) | |
login_data = { | |
'username': self._user.username, | |
'password': self._user.password, | |
'imagestring': captcha_text, | |
'imagehash': image_hash | |
} | |
main_page = self._session.post( | |
self._root + 'takelogin.php', login_data) | |
if main_page.url != self._root + 'index.php': | |
self.logger.error('Login error') | |
return | |
self._save() | |
def _save(self): | |
"""Save cookies to file""" | |
self.logger.debug('Save cookies') | |
with open('cookie', 'wb') as f: | |
pickle.dump(self._session.cookies, f) | |
def _load(self): | |
"""Load cookies from file""" | |
if os.path.exists('cookie'): | |
with open('cookie', 'rb') as f: | |
self.logger.debug('Load cookies from file.') | |
self._session.cookies = pickle.load(f) | |
else: | |
self.logger.debug('Load cookies by login') | |
self.login() | |
self._save() | |
@property | |
def pages(self): | |
"""Return pages in torrents.php | |
:returns: yield ByrPage pages | |
""" | |
# free url | |
self.logger.debug('Get pages') | |
page = self.get_url('torrents.php?page=1') | |
n = 0 | |
for line in page.find('table', class_='torrents').form.findChildren('tr', recursive=False)[2:]: | |
if n == 0: | |
yield(ByrPage(line)) | |
n = 1 | |
else: | |
n -= 1 | |
def get_url(self, url): | |
"""Return BeautifulSoup Pages | |
:url: page url | |
:returns: BeautifulSoups | |
""" | |
self.logger.debug('Get url: ' + url) | |
req = self._session.get(self._root + url) | |
return BeautifulSoup(req.text, 'lxml') | |
def start(self): | |
"""Start spider""" | |
self.logger.info('Start Spider') | |
self._load() | |
with open('list.csv', 'a') as f: | |
for page in self.pages: | |
self.logger.debug(page.id + ',' + page.name + ',' + page.type + ',' + str(page.size) + 'GB,' + str(page.seeders) + ',' + str(page.snatched)) | |
self.logger.debug('Check ' + page.name) | |
if page.id not in self.list and page.ok: | |
self.logger.info('Download ' + page.name) | |
self.download(page.id) | |
f.write(page.id + ',' + page.name + ',' + str(page.size) + 'GB,' + str(page.seeders) + '\n') | |
def download(self, id_): | |
"""Download torrent in url | |
:url: url | |
:filename: torrent filename | |
""" | |
url = self._root + 'download.php?id=' + id_ | |
req = self._session.get(url) | |
with open('./tmp/' + id_ + '.torrent', 'wb') as f: | |
f.write(req.content) | |
class ByrPage(object): | |
"""Torrent Page Info""" | |
def __init__(self, soup): | |
"""Init variables | |
:soup: Soup | |
""" | |
url = soup.find(class_='torrentname').a['href'] | |
self.name = soup.find(class_='torrentname').b.text | |
self.type = soup.img['title'] | |
self.size = self.tosize(soup.find_all('td')[-5].text) | |
self.seeders = int(soup.find_all('td')[-4].text.replace(',', '')) | |
self.snatched = int(soup.find_all('td')[-2].text.replace(',', '')) | |
self.id = parse_qs(urlparse(url).query)['id'][0] | |
@property | |
def ok(self): | |
"""Check torrent info | |
:returns: If a torrent are ok to be downloaded | |
""" | |
return self.seeders > 0 | |
def tosize(self, text): | |
"""Convert text 'xxxGB' to int size | |
:text: 123GB or 123MB | |
:returns: 123(GB) or 0.123(GB) | |
""" | |
if text.endswith('MB'): | |
size = float(text[:-2].replace(',', '')) / 1024 | |
else: | |
size = float(text[:-2].replace(',', '')) | |
return size | |
def main(): | |
b = Byr() | |
b.start() | |
if __name__ == "__main__": | |
main() |