1
0
mirror of https://github.com/apachecn/epub-crawler.git synced 2025-06-06 17:56:57 +00:00

2023-03-11 11:51:45

This commit is contained in:
wizardforcel 2023-03-11 11:51:45 +08:00
parent af69f72f1f
commit 8428f07fda
2 changed files with 25 additions and 3 deletions

View File

@ -4,6 +4,7 @@
from urllib.parse import urljoin from urllib.parse import urljoin
import sys import sys
import json import json
import yaml
import warnings import warnings
from pyquery import PyQuery as pq from pyquery import PyQuery as pq
import time import time
@ -183,7 +184,24 @@ def main():
print('please provide config file') print('please provide config file')
return return
user_cfg = json.loads(open(cfg_fname, encoding='utf-8').read()) ext = extname(cfg_fname).lower()
cont = open(cfg_fname, encoding='utf-8').read()
if ext == 'json':
user_cfg = json.loads(cont)
elif ext in ['yaml', 'yml']:
user_cfg = yaml.safe_load(cont)
elif ext == 'txt':
urls = [l.strip() for l in cont.split('\n')]
urls = [l for l in user_cfg if l]
name = re.sub('\.\w+$', '', path.basename(cfg_fname))
user_cfg = {
'name': name,
'url': urls[0] if urls else '',
'list': urls,
}
else:
print('配置文件必须为 JSON、YAML 或 TXT')
return
update_config(cfg_fname, user_cfg) update_config(cfg_fname, user_cfg)
if config['selenium']: if config['selenium']:

View File

@ -141,3 +141,7 @@ def size_str_to_int(s):
factor = factor_map[m.group(2)] factor = factor_map[m.group(2)]
return int(base * factor) return int(base * factor)
def extname(fname):
m = re.search(r'\.(\w+)$', fname)
return m.group(1) if m else ''