web123456

Watch how I grab the latest house price data

# Create file ready to writewith open("", "w", encoding='utf-8') as f: # Obtain data on new homes needed total_page = 1 loupan_list = list() page = '/loupan/' # Call request header headers = create_headers() # request url and return result response = requests.get(page, timeout=10, headers=headers) html = response.content # Parsing returns html soup = BeautifulSoup(html, "lxml") # Get total number of pages try: page_box = soup.find_all('div', class_='page-box')[0] matches = ('.*data-total-count="(\d+)".*', str(page_box)) total_page = int((int(matches.group(1)) / 10)) except Exception as e: print(e) print('Total pages:' + total_page) # Configure request headers headers = create_headers() # Iterate from the first page for i in range(1, total_page + 1): page = '/loupan/pg{0}'.format(i) print(page) response = requests.get(page, timeout=10, headers=headers) html = response.content # Interpretation of returned results soup = BeautifulSoup(html, "lxml") # Obtaining cell information house_elements = soup.find_all('li', class_="resblock-list") # Loop through to get the desired element for house_elem in house_elements: price = house_elem.find('span', class_="number") desc = house_elem.find('span', class_="desc") total = house_elem.find('div', class_="second") loupan = house_elem.find('a', class_='name') # Commencement of data cleansing try: price = price.text.strip() + desc.text.strip() except Exception as e: price = '0' loupan = .replace("\n", "") # Continue to clean up the data try: total = ().replace(u 'Total price', '') total = (u'/set up', '') except Exception as e: total = '0' # Saved as an object to a variable loupan = NewHouse(loupan, price, total) print(()) # Add new home information to the list loupan_list.append(loupan) # Loop through the acquired data and write it to a file for loupan in loupan_list: f.write(() + "\n")