2023-02-14 13:10:54

2025-06-06 17:56:57 +00:00 · 2023-02-14 13:10:54 +08:00 · 2023-02-14 13:10:54 +08:00 · e153e2c4c4
commit e153e2c4c4
parent 8fddd147ff d620ce7b6b
7 changed files with 21 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,6 +4,10 @@ v2023.2.14.0
 +   添加新的配置项`sizeLimit`，限制单个 EPUB 总大小
 v2023.1.18.0
 +   修复外部脚本的加载问题
 v2022.8.20.0
 +   添加 Selenium 支持
--- a/EpubCrawler/init.py
+++ b/EpubCrawler/init.py
@ -10,4 +10,4 @@ from . import util
 __author__ = "ApacheCN"
 __email__ = "apachecn@163.com"
 __license__ = "SATA"
-__version__ = "2022.8.20.0"
+__version__ = "2023.1.18.0"
--- a/EpubCrawler/main.py
+++ b/EpubCrawler/main.py
@ -4,6 +4,7 @@
 from urllib.parse import urljoin
 import sys
 import json
 import warnings
 from pyquery import PyQuery as pq
 import time
 from os import path
@ -19,6 +20,8 @@ from .config import config
 from .sele_crawler import crawl_sele
 from .common import *
 warnings.filterwarnings("ignore")
 def get_toc_from_cfg():
    if config['list'] and len(config['list']) > 0:
        return config['list']
@ -34,6 +37,7 @@ def get_toc_from_cfg():
        headers=config['headers'],
        timeout=config['timeout'],
        proxies=config['proxy'],
        verify=False,
    ).content.decode(config['encoding'], 'ignore')
    return get_toc(html, config['url'])
@ -93,6 +97,7 @@ def tr_download_page(url, art, imgs):
        headers=config['headers'],
        timeout=config['timeout'],
        proxies=config['proxy'],
        verify=False,
    ).content.decode(config['encoding'], 'ignore')
    print(f'{url} 下载成功')
    art.update(get_article(html, url))
@ -105,7 +110,7 @@ def tr_download_page(url, art, imgs):
    time.sleep(config['wait'])
-def update_config(user_cfg):
+def update_config(cfg_fname, user_cfg):
    global get_toc
    global get_article
@ -124,7 +129,8 @@ def update_config(user_cfg):
    set_img_pool(ThreadPoolExecutor(config['imgThreads']))
    if config['external']:
-        mod = load_module(config['external'])
+        ex_fname = path.join(path.dirname(cfg_fname), config['external'])
        mod = load_module(ex_fname)
        get_toc = getattr(mod, 'get_toc', get_toc)
        get_article = getattr(mod, 'get_article', get_article)
@ -178,7 +184,7 @@ def main():
        return
    user_cfg = json.loads(open(cfg_fname, encoding='utf-8').read())
-    update_config(user_cfg)
+    update_config(cfg_fname, user_cfg)
    if config['selenium']: 
        crawl_sele()
--- a/EpubCrawler/config.py
+++ b/EpubCrawler/config.py
@ -22,8 +22,8 @@ config = {
    'readTimeout': 60,
 	'imgSrc': ['data-src', 'data-original-src', 'src'],
    'proxy': '',
-    'textThreads': 5,
+    'textThreads': 8,
-    'imgThreads': 5,
+    'imgThreads': 8,
    'external': None,
    'checkStatus': False,
    'cache': True,
--- a/EpubCrawler/img.py
+++ b/EpubCrawler/img.py
@ -46,6 +46,7 @@ def tr_download_img(url, imgs, picname):
        retry=config['retry'],
        timeout=config['timeout'],
        proxies=config['proxy'],
        verify=False,
    ).content
    print(f'{url} 下载成功')
    data = opti_img(data, config['optiMode'], config['colors']) or b''
--- a/publish.sh
+++ b/publish.sh
@ -0,0 +1,3 @@
 rm -rf dist
 python setup.py sdist bdist_wheel
 twine upload dist/* -u $(pip config get pypi.username) -p $(pip config get pypi.password)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
-requests
+requests[socks]
 pyquery
 GenEpub
 imgyaso