mirror of
https://github.com/apachecn/epub-crawler.git
synced 2025-06-06 17:56:57 +00:00
2023-03-11 11:51:45
This commit is contained in:
parent
af69f72f1f
commit
8428f07fda
@ -4,6 +4,7 @@
|
|||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
|
import yaml
|
||||||
import warnings
|
import warnings
|
||||||
from pyquery import PyQuery as pq
|
from pyquery import PyQuery as pq
|
||||||
import time
|
import time
|
||||||
@ -183,7 +184,24 @@ def main():
|
|||||||
print('please provide config file')
|
print('please provide config file')
|
||||||
return
|
return
|
||||||
|
|
||||||
user_cfg = json.loads(open(cfg_fname, encoding='utf-8').read())
|
ext = extname(cfg_fname).lower()
|
||||||
|
cont = open(cfg_fname, encoding='utf-8').read()
|
||||||
|
if ext == 'json':
|
||||||
|
user_cfg = json.loads(cont)
|
||||||
|
elif ext in ['yaml', 'yml']:
|
||||||
|
user_cfg = yaml.safe_load(cont)
|
||||||
|
elif ext == 'txt':
|
||||||
|
urls = [l.strip() for l in cont.split('\n')]
|
||||||
|
urls = [l for l in user_cfg if l]
|
||||||
|
name = re.sub('\.\w+$', '', path.basename(cfg_fname))
|
||||||
|
user_cfg = {
|
||||||
|
'name': name,
|
||||||
|
'url': urls[0] if urls else '',
|
||||||
|
'list': urls,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
print('配置文件必须为 JSON、YAML 或 TXT')
|
||||||
|
return
|
||||||
update_config(cfg_fname, user_cfg)
|
update_config(cfg_fname, user_cfg)
|
||||||
|
|
||||||
if config['selenium']:
|
if config['selenium']:
|
||||||
|
@ -141,3 +141,7 @@ def size_str_to_int(s):
|
|||||||
|
|
||||||
factor = factor_map[m.group(2)]
|
factor = factor_map[m.group(2)]
|
||||||
return int(base * factor)
|
return int(base * factor)
|
||||||
|
|
||||||
|
def extname(fname):
|
||||||
|
m = re.search(r'\.(\w+)$', fname)
|
||||||
|
return m.group(1) if m else ''
|
||||||
|
Loading…
x
Reference in New Issue
Block a user