Scootsy Crawler / Scrapper
WARNING - Contents of this page are for educational purposes only. It is strongly suggested that you do not use this
knowledge for illegal purposes!
Below are the code of statement to scrape or extract information from website.
- Library used in the codes are sys, Selenium, pandas and datetime
- web driver Firefox. you can used chrome web driver instead.
python code view
Scrapped Data view
Scootsy Scrapper Code
click here to view project online
import sys
import selenium
import selenium.webdriver.common.keys
import pandas
import datetime
URL = 'https://scootsy.com/get_vendor_listing.php'
#*************************************************************************************************
maxcharLen = 60
print (maxcharLen + 1) * '-'
strToPrint = 'Scootsy Crawler 1.0'
print (maxcharLen - len(strToPrint))/2 * '*', strToPrint , (maxcharLen - len(strToPrint))/2 * '*'
strToPrint = "Please don't change the code "
print (maxcharLen - len(strToPrint))/2 * '*' , strToPrint , (maxcharLen - len(strToPrint))/2 * '*'
strToPrint = "If you change it will leads to raise an error"
print (maxcharLen - len(strToPrint))/2 * '*' , strToPrint , (maxcharLen - len(strToPrint))/2 * '*'
print (maxcharLen + 1) * '-'
#*************************************************************************************************
sys.stdout.write("\r" + "Driver Initializing ...")
#driver = selenium.webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chrome.exe")
driver = selenium.webdriver.Firefox()
sys.stdout.write("\r" + "Navigating to url : " + URL )
driver.get(URL)
#assert "tabs-container2" in driver.find_element_by_id('tabs-container2')
ul = driver.find_element_by_tag_name("ul")
sys.stdout.write("\r" + "start crawling for Restaurant : " + URL )
columns = ['Id','City','Restaurant Name','Cuisines','Delivery Time','URL']
restaurants = [[]]
i = 0
print "\r" + "found available restaurant : ", len(ul.find_elements_by_tag_name("li"))
for li in ul.find_elements_by_tag_name("li"):
div = li.find_element_by_class_name('dish_name')
a = div.find_element_by_tag_name('a')
span = div.find_element_by_tag_name('span')
resid = li.get_attribute('id')
resName = a.text
cousin = span.text
deliverytime = li.find_element_by_class_name('icn').find_element_by_tag_name('span').text
link = a.get_attribute('href')
city = link.split('/',3)[3].split('-')[3]
restaurants[i].append(resid)
restaurants[i].append(city)
restaurants[i].append(resName)
restaurants[i].append(cousin)
restaurants[i].append(deliverytime)
restaurants[i].append(link)
sys.stdout.write("\r" + "Data Extracted For Restaurant : " + resName)
restaurants.append([])
i += 1
sys.stdout.write("\r" + "Data Extraction Finished...")
driver.close()
sys.stdout.write("\r" + "Driver Close...")
data_Table = pandas.DataFrame.from_records(restaurants,columns=columns)
filename = "scootsy crawl " + str(datetime.datetime.today().strftime("%d%m%y %H%M%S")) + ".xlsx"
sys.stdout.write("\r" + "File Saving TO : " + filename)
data_Table.to_excel(filename,'Sccotsy',index=False,encoding="UTF-8")
print "\r" + "File Saved @ : ", filename
sys.stdout.read()