1
0
mirror of https://github.com/apachecn/epub-crawler.git synced 2025-06-03 23:58:27 +00:00

2023-03-11 11:51:45

This commit is contained in:
wizardforcel 2023-03-11 11:51:45 +08:00
parent af69f72f1f
commit 8428f07fda
2 changed files with 25 additions and 3 deletions

View File

@ -4,6 +4,7 @@
from urllib.parse import urljoin
import sys
import json
import yaml
import warnings
from pyquery import PyQuery as pq
import time
@ -182,8 +183,25 @@ def main():
if not path.exists(cfg_fname):
print('please provide config file')
return
user_cfg = json.loads(open(cfg_fname, encoding='utf-8').read())
ext = extname(cfg_fname).lower()
cont = open(cfg_fname, encoding='utf-8').read()
if ext == 'json':
user_cfg = json.loads(cont)
elif ext in ['yaml', 'yml']:
user_cfg = yaml.safe_load(cont)
elif ext == 'txt':
urls = [l.strip() for l in cont.split('\n')]
urls = [l for l in user_cfg if l]
name = re.sub('\.\w+$', '', path.basename(cfg_fname))
user_cfg = {
'name': name,
'url': urls[0] if urls else '',
'list': urls,
}
else:
print('配置文件必须为 JSON、YAML 或 TXT')
return
update_config(cfg_fname, user_cfg)
if config['selenium']:

View File

@ -140,4 +140,8 @@ def size_str_to_int(s):
base = float(m.group(1))
factor = factor_map[m.group(2)]
return int(base * factor)
return int(base * factor)
def extname(fname):
m = re.search(r'\.(\w+)$', fname)
return m.group(1) if m else ''