85 lines
2.1 KiB
Python
Executable File
85 lines
2.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import shutil
|
|
import os.path
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
__author__ = 'Alexander Popov'
|
|
__version__ = '1.0.1'
|
|
__license__ = 'Unlicense'
|
|
|
|
DOWNLOAD_DIR = './images'
|
|
|
|
|
|
def checkResumeFile():
|
|
if not os.path.exists('{0}/.resume'.format(DOWNLOAD_DIR,)):
|
|
if not os.path.exists(DOWNLOAD_DIR):
|
|
os.mkdir(DOWNLOAD_DIR)
|
|
|
|
with open('{0}/.resume'.format(DOWNLOAD_DIR,), 'w') as f:
|
|
f.write('0')
|
|
return([0])
|
|
else:
|
|
with open('{0}/.resume'.format(DOWNLOAD_DIR,), 'r') as f:
|
|
lines = [line.split('\n')[0] for line in f][-20:]
|
|
|
|
return(lines)
|
|
|
|
|
|
def saveResume(resumeList):
|
|
resumeList.sort()
|
|
with open('{0}/.resume'.format(DOWNLOAD_DIR,), 'w', encoding='utf-8') as f:
|
|
for item in resumeList[-20:]:
|
|
f.write('{0}\n'.format(item))
|
|
|
|
|
|
def getImagesLinks(page):
|
|
URL = lambda page: 'http://blog.stanis.ru/?back={0}'.format(page,)
|
|
COOKIES = dict(block='951')
|
|
|
|
r = requests.get(URL(page), cookies=COOKIES)
|
|
soup = BeautifulSoup(r.text.encode('cp1251'),
|
|
"html.parser", from_encoding="windows-1251")
|
|
|
|
imagesData = soup.findAll('img', src=re.compile('img/*'))
|
|
|
|
imagesUrl = list()
|
|
|
|
for image in imagesData:
|
|
imagesUrl.append(image['src'].split('/')[1])
|
|
|
|
return(imagesUrl)
|
|
|
|
|
|
def imageDownload(image):
|
|
response = requests.get('https://blog.stanis.ru/imgs/{0}'.format(image,),
|
|
stream=True)
|
|
|
|
with open('{0}/{1}'.format(DOWNLOAD_DIR, image),
|
|
'wb') as out_image:
|
|
shutil.copyfileobj(response.raw, out_image,)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
resumeFiles = checkResumeFile()
|
|
|
|
LOOP = True
|
|
downloadPage = 0
|
|
|
|
while LOOP:
|
|
imagesLinks = getImagesLinks(downloadPage)
|
|
imagesLinks.sort()
|
|
|
|
for image in imagesLinks:
|
|
if not image.split('.')[0] in resumeFiles:
|
|
imageDownload(image)
|
|
resumeFiles.insert(0, image.split('.')[0],)
|
|
else:
|
|
LOOP = False
|
|
|
|
downloadPage += 1
|
|
|
|
saveResume(resumeFiles)
|