Scootsy Crawler / Scrapper
WARNING - Contents of this page are for educational purposes only. It is strongly suggested that you do not use this
knowledge for illegal purposes!
Below are the code of statement to scrape or extract information from website.
- Library used in the codes are sys, Selenium, pandas and datetime
- web driver Firefox. you can used chrome web driver instead.
python code view
Scrapped Data view
Scootsy Scrapper Code
click here to view project online
import sys
import selenium
import selenium.webdriver.common.keys
import pandas
import datetime
URL = 'https://scootsy.com/get_vendor_listing.php'
#*************************************************************************************************
maxcharLen = 60
print (maxcharLen + 1) * '-'
strToPrint = 'Scootsy Crawler 1.0'
print (maxcharLen - len(strToPrint))/2 * '*', strToPrint , (maxcharLen - len(strToPrint))/2 * '*'
strToPrint = "Please don't change the code "
print (maxcharLen - len(strToPrint))/2 * '*' , strToPrint , (maxcharLen - len(strToPrint))/2 * '*'
strToPrint = "If you change it will leads to raise an error"
print (maxcharLen - len(strToPrint))/2 * '*' , strToPrint , (maxcharLen - len(strToPrint))/2 * '*'
print (maxcharLen + 1) * '-'
#*************************************************************************************************
sys.stdout.write("\r" + "Driver Initializing ...")
#driver = selenium.webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chrome.exe")
driver = selenium.webdriver.Firefox()
sys.stdout.write("\r" + "Navigating to url : " + URL )
driver.get(URL)
#assert "tabs-container2" in driver.find_element_by_id('tabs-container2')
ul = driver.find_element_by_tag_name("ul")
sys.stdout.write("\r" + "start crawling for Restaurant : " + URL )
columns = ['Id','City','Restaurant Name','Cuisines','Delivery Time','URL']
restaurants = [[]]
i = 0
print "\r" + "found available restaurant : ", len(ul.find_elements_by_tag_name("li"))
for li in ul.find_elements_by_tag_name("li"):
div = li.find_element_by_class_name('dish_name')
a = div.find_element_by_tag_name('a')
span = div.find_element_by_tag_name('span')
resid = li.get_attribute('id')
resName = a.text
cousin = span.text
deliverytime = li.find_element_by_class_name('icn').find_element_by_tag_name('span').text
link = a.get_attribute('href')
city = link.split('/',3)[3].split('-')[3]
restaurants[i].append(resid)
restaurants[i].append(city)
restaurants[i].append(resName)
restaurants[i].append(cousin)
restaurants[i].append(deliverytime)
restaurants[i].append(link)
sys.stdout.write("\r" + "Data Extracted For Restaurant : " + resName)
restaurants.append([])
i += 1
sys.stdout.write("\r" + "Data Extraction Finished...")
driver.close()
sys.stdout.write("\r" + "Driver Close...")
data_Table = pandas.DataFrame.from_records(restaurants,columns=columns)
filename = "scootsy crawl " + str(datetime.datetime.today().strftime("%d%m%y %H%M%S")) + ".xlsx"
sys.stdout.write("\r" + "File Saving TO : " + filename)
data_Table.to_excel(filename,'Sccotsy',index=False,encoding="UTF-8")
print "\r" + "File Saved @ : ", filename
sys.stdout.read()
Smm panel
ReplyDeletesmm panel
https://isilanlariblog.com
İnstagram Takipçi Satın Al
HİRDAVATCİ BURADA
Beyazesyateknikservisi.com.tr
servis
Jeton hilesi
Good content. You write beautiful things.
ReplyDeletevbet
mrbahis
hacklink
mrbahis
vbet
taksi
sportsbet
korsan taksi
sportsbet
Good text Write good content success. Thank you
ReplyDeletekralbet
betpark
kibris bahis siteleri
tipobet
mobil ödeme bahis
bonus veren siteler
slot siteleri
poker siteleri
başakşehir
ReplyDeletebeykoz
gölcük
kütahya
bandırma
Jİ0Fİ8
شركة مكافحة حشرات بالدمام qjaONVWUYn
ReplyDelete