with open("", "w", encoding='utf-8') as f:
total_page = 1
loupan_list = list()
page = '/loupan/'
headers = create_headers()
response = requests.get(page, timeout=10, headers=headers)
html = response.content
soup = BeautifulSoup(html, "lxml")
try:
page_box = soup.find_all('div', class_='page-box')[0]
matches = ('.*data-total-count="(\d+)".*', str(page_box))
total_page = int((int(matches.group(1)) / 10))
except Exception as e:
print(e)
print('Total pages:' + total_page)
headers = create_headers()
for i in range(1, total_page + 1):
page = '/loupan/pg{0}'.format(i)
print(page)
response = requests.get(page, timeout=10, headers=headers)
html = response.content
soup = BeautifulSoup(html, "lxml")
house_elements = soup.find_all('li', class_="resblock-list")
for house_elem in house_elements:
price = house_elem.find('span', class_="number")
desc = house_elem.find('span', class_="desc")
total = house_elem.find('div', class_="second")
loupan = house_elem.find('a', class_='name')
try:
price = price.text.strip() + desc.text.strip()
except Exception as e:
price = '0'
loupan = .replace("\n", "")
try:
total = ().replace(u 'Total price', '')
total = (u'/set up', '')
except Exception as e:
total = '0'
loupan = NewHouse(loupan, price, total)
print(())
loupan_list.append(loupan)
for loupan in loupan_list:
f.write(() + "\n")