mirror of
https://github.com/apachecn/epub-crawler.git
synced 2025-06-10 11:49:10 +00:00
2021-11-14 19:28:05
This commit is contained in:
parent
4e73bd8a5f
commit
d353944f2f
@ -40,7 +40,7 @@ def tr_download_img(url, imgs, picname):
|
|||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print(ex)
|
print(ex)
|
||||||
|
|
||||||
def process_img_data_url(url, el_img, imgs):
|
def process_img_data_url(url, el_img, imgs, **kw):
|
||||||
if not re.search(RE_DATA_URL, url):
|
if not re.search(RE_DATA_URL, url):
|
||||||
return False
|
return False
|
||||||
picname = hashlib.md5(url.encode('utf-8')).hexdigest() + '.png'
|
picname = hashlib.md5(url.encode('utf-8')).hexdigest() + '.png'
|
||||||
@ -64,7 +64,7 @@ def process_img(html, imgs, **kw):
|
|||||||
el_img = el_imgs.eq(i)
|
el_img = el_imgs.eq(i)
|
||||||
url = get_img_src(el_img)
|
url = get_img_src(el_img)
|
||||||
if not url: continue
|
if not url: continue
|
||||||
if process_img_data_url(url, el_img, imgs):
|
if process_img_data_url(url, el_img, imgs, **kw):
|
||||||
continue
|
continue
|
||||||
if not url.startswith('http'):
|
if not url.startswith('http'):
|
||||||
if kw.get('page_url'):
|
if kw.get('page_url'):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user