Добрый день есть такой парсер сайта, который должен скачивать изображения с страниц галереи. Изначально был написан его разработчиком на python 2.75. Сейчас пытаюсь адаптировать скрипт под python 3.7.2 Не могу понять почему не может найти имя пользователя в адресе. Сам не программист, адаптирую скрипт для себя.... Помогите пожалуйста. Поправил код немного , теперь выдается другая ошибка:
Код:
from __future__ import print_function from urllib.request import urlopen, urlretrieve import mechanize import pickle import random import sys import re import os
def dl_image(self): if (self.__gif) or (self.__mature): status = ' ! ' else: status = '\t' status += str(self.__quality) + " [.] " status += self.__title print(status, end='\r') mod = 1 while (self.__title + str(mod) + self.__ext in os.listdir('.')): mod += 1 urlretrieve(self.__link, self.__title.replace('/', '') + str(mod) + self.__ext) print(status.replace('.', '+'))
def get_title(self): return self.__title
def is_mat(self): return self.__mature
class NoHistory(object): def add(self, *a, **k): pass
def clear(self): pass
def daSetBrowser(): useragents = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)' ) global BROWSER BROWSER = mechanize.Browser(history=NoHistory()) BROWSER.set_handle_redirect(True) BROWSER.set_handle_robots(False) BROWSER.addheaders = [('Referer', 'http://www.deviantart.com/')] BROWSER.addheaders = [('User-Agent', random.choice(useragents))]
def daLogin(username, password): data = "" try: BROWSER.open('https://www.deviantart.com/users/login', "ref=http%3A%2F%2Fwww.deviantart.com%2F&remember_me=1") BROWSER.select_form(nr=1) BROWSER.form['username'] = username BROWSER.form['password'] = password BROWSER.submit() data = BROWSER.response().read() except urllib.error.HTTPError as e: print("HTTP Error:", e.code) sys.exit() except urllib.error.URLError as e: print("URL Error:", e.reason) sys.exit() if re.search("The password you entered was incorrect", data): print("Wrong password or username. Attempting to download anyway.") elif re.search("\"loggedIn\":true", data): print("Logged in!") global LOGGED_IN LOGGED_IN = True else: print("Login unsuccessful. Attempting to download anyway.")
def startup(): print("\ngetdeviantart 1.0") while True: artist = input("Enter artist: ").lower() try: source = "http://" + artist + ".deviantart.com/gallery/?offset=0" gallery = open_page(source) artist = re.findall('<title>(.*?)&#', gallery)[0] print("Found", artist) if (re.findall("no deviations yet\!", gallery)): print(artist, "has no art.") continue try: os.mkdir(artist) except: pass os.chdir(artist) gallery = open_page(source) break except: print("User not found, try again.") continue global ARTIST ARTIST = artist return gallery, source
def count_pages(text, source): found = re.findall('(\d+)</a></li><li class="next"', text)[0] pages = [] for i in range(0, int(found) * 24, 24): pages.append(source[:-1] + str(i)) return pages
def menu(): print("\n1) Download all pages\ \n2) Select pages\ \n3) Select images page by page\ \n4) Search for image\ \n5) Choose different artist\ \n6) Quit") choice = 0 while (choice < 1) or (choice > 6): try: choice = int(input("Choice> ")) except ValueError: continue return choice
def execute(choice, all_pages, source): global PAGES global ARTIST global IMG_BUFF global TITLES
if (choice == 1): download(range(1, len(all_pages) + 1), source)
def download(pages, source, sel_imgs=False): global PAGES page_nums = pages source = source[:-1] page_links = [] for var in page_nums: page_links.append(source + str((int(var) - 1) * 24))
for link in page_links: PAGES.append(Page(link)) page = PAGES[-1] print("\nPage #", page.get_index()) buff_down(page, sel_imgs)
def buff_down(page, sel_imgs): global IMG_BUFF index = 1 for image in page.get_images(): if (sel_imgs): if (image.is_mat()): s = ' ! ' else: s = '\t' print(s + str(index) + ')', image.get_title()) IMG_BUFF.append(image) index += 1 else: image.dl_image() if (sel_imgs): for img in input_vals(img=True, last=len(IMG_BUFF)): IMG_BUFF[int(img) - 1].dl_image() IMG_BUFF = []
def input_vals(pg=False, img=False, last=0): output = [] while True: try: if (pg): in_vals = input("Pages to get/search (1-3,5,6-8 etc): ") in_vals = in_vals.replace(' ', '').split(',') elif (img): in_vals = input("Images to download (1-3,5,6-8 etc): ") in_vals = in_vals.replace(' ', '').split(',') for val in in_vals: if (val.isdigit()): output.append(int(val)) elif ('-' in val): if not ((val.replace('-', '').isdigit())): raise ValueError else: left = int(val[:val.find('-')]) right = int(val[val.find('-') + 1:]) + 1 if (left > right): raise ValueError else: output += [int(var) for var in range(left, right)] elif (int(val) <= 0): raise ValueError else: raise ValueError
output = [int(a) for a in output] if (max(output) > last): raise ValueError if (output.sort()) != None: output = output.sort() output = [str(a) for a in output] return output
except ValueError: print("Bad format, Try again.") output = [] continue
Сейчас этот форум просматривают: нет зарегистрированных пользователей и гости: 1
Вы не можете начинать темы Вы не можете отвечать на сообщения Вы не можете редактировать свои сообщения Вы не можете удалять свои сообщения Вы не можете добавлять вложения