Wednesday, March 25, 2015

GUI web scrape program.

 
#Appraiser Data scraper example.
 #Written by Steve Atchison for Shawnee County March 17 2015
import urllib2
from Tkinter import *

def getwebhtml():
     #This gets the html text from the appraiser webpage using the parcel number entered by user: assines it to the_page
     pid = pinnum.get()
     req = urllib2.Request('http://www.snco.us/ap/R_prop/Comp.asp?PRCL_ID='+str(pid)+'&PRCL_CD=01&YEAR=2015')
     response = urllib2.urlopen(req)
     the_page = response.read()

     startchar = the_page.find('PID1=')
     #compareable pin
     comp1=(the_page[int(startchar):int(startchar)+21])
     #saleprice
     saleprice=(the_page[int(startchar+88):int(startchar)+99]) 
     #sale date
     saledate=(the_page[int(startchar+63):int(startchar)+67])

     txtbox.insert("1.0",comp1+'\n')
     txtbox.insert("2.0",'Date: '+saledate+'\n')
     txtbox.insert("3.0",'Sale price:'+saleprice+'\n\n')

     startchar = the_page.find('PID2=')
     #compareable pin
     comp1=(the_page[int(startchar):int(startchar)+21])
     #saleprice
     saleprice=(the_page[int(startchar+88):int(startchar)+99]) 
     #sale date
     saledate=(the_page[int(startchar+63):int(startchar)+67])
   
     txtbox.insert("1.0",comp1+'\n')
     txtbox.insert("2.0",'Date: '+saledate+'\n')
     txtbox.insert("3.0",'Sale price:'+saleprice+'\n\n')
  
     startchar = the_page.find('PID3=')

    #compareable pin
     comp1=(the_page[int(startchar):int(startchar)+21])
     #saleprice
     saleprice=(the_page[int(startchar+88):int(startchar)+99]) 
     #sale date
     saledate=(the_page[int(startchar+63):int(startchar)+67])
   
     txtbox.insert("1.0",comp1+'\n')
     txtbox.insert("2.0",'Date: '+saledate+'\n')
     txtbox.insert("3.0",'Sale price:'+saleprice+'\n\n')

     startchar = the_page.find('PID4=')
     #compareable pin
     comp1=(the_page[int(startchar):int(startchar)+21])
     #saleprice
     saleprice=(the_page[int(startchar+88):int(startchar)+99]) 
     #sale date
     saledate=(the_page[int(startchar+63):int(startchar)+67])
   
     txtbox.insert("1.0",comp1+'\n')
     txtbox.insert("2.0",'Date: '+saledate+'\n')
     txtbox.insert("3.0",'Sale price:'+saleprice+'\n\n')

     startchar = the_page.find('PID5=')
     #compareable pin
     comp1=(the_page[int(startchar):int(startchar)+21])
     #saleprice
     saleprice=(the_page[int(startchar+88):int(startchar)+99]) 
     #sale date
     saledate=(the_page[int(startchar+63):int(startchar)+67])
   
     txtbox.insert("1.0",comp1+'\n')
     txtbox.insert("2.0",'Date: '+saledate+'\n')
     txtbox.insert("3.0",'Sale price:'+saleprice+'\n\n')

     theweb()#opens up webpages for all properties.

def cleartext():
    txtbox.delete("1.0",  END)
    pinnum.delete(0,END)

def theweb():
    for row in arcpy.SearchCursor("Owners"):
        parcelnumber= row.PID
        print(parcelnumber)
       
        webbrowser.open('http://www.snco.us/Ap/R_prop/Listing.asp?PRCL_ID='+str(parcelnumber))
        open

root = Tk()
btn = Button(root, text = "Get Comparables", command = getwebhtml)
btn.pack()

btn2=Button(root, text="Clear", command=cleartext)
btn2.pack()

pinnum=Entry(root)
pinnum.pack()

txtbox = Text(root)
txtbox.pack()

root.mainloop()

Snapshot of the Simple Scrape Program


This is a snap shot of my simple scrape python program.


#!/usr/bin/python
#Appraiser Data scraper
#Written by Steve Atchison for Shawnee County March 17 2015
import urllib2

pid = raw_input('Enter parcel number: ')
#This gets the html text from the appraiser webpage using the parcel number entered by user.
req = urllib2.Request('http://www.snco.us/ap/R_prop/Comp.asp?PRCL_ID='+str(pid)+'&PRCL_CD=01&YEAR=2015')
response = urllib2.urlopen(req)
the_page = response.read()

#erase all data in scrape_data.txt file
f2=open("scrape_data.txt",'w')
f2.close()

def getmoredata(start):
    #This function grabs sales and date from appraiser webpage and saves it to scrape_data.txt
   
    print('--------------------------------')
    #Compare PIN
    thepin=((the_page[int(start+5):int(start)+21]))
    print(thepin)
    #Actual sale price
    saleprice=(the_page[int(start+88):int(start)+99])
    print('Actual sales price = %s') % saleprice  
    #sale date
    saledate=(the_page[int(start+63):int(start)+67])
    print('Sale date = %s') % saledate
   
    f2=open("scrape_data.txt",'a')
    f2.write(thepin +'  ')
    f2.write(saleprice +'  ')
    f2.write(saledate + '\n')
    f2.close()   
   
#Finds the begining point of each parcel number in the html file,
#and then calls the getmoredata function
startchar = the_page.find('PID1=')
getmoredata(startchar)

startchar = the_page.find('PID2=')
getmoredata(startchar)

startchar = the_page.find('PID3=')
getmoredata(startchar)

startchar = the_page.find('PID4=')
getmoredata(startchar)

startchar = the_page.find('PID5=')
getmoredata(startchar)

Python pictures

This is a snapshot of my arcmap desktop after running the appraiser scrape program.

Followers

Blog Archive

About Me

My photo
Biking helps me to cope with life.