rm(list=ls())

# To do:
# 1. Go through vignette for rvest in help page for rvest library

# Below are two blog posts to explore to try to learn how to use rvest

# 2. https://www.r-bloggers.com/using-rvest-to-scrape-an-html-table/
#    Go trhough this blog post line-by-line to figure out what it is doing.

# 3. https://blog.rstudio.org/2014/11/24/rvest-easy-web-scraping-with-r/
#    Go trhough this blog post line-by-line to figure out what it is doing.

# Note: I will be asking for volunteer to teach us to use rvest! 

# load library
library(rvest)

# URL of apt complex
url <- "http://www.rentals.com/Massachusetts/Amherst/169332/"			

# Next line loads the web page
page <- read_html(url)

# "xpath"s below came from looking at html using Chrome (and trial and error)

# table about appartments
tab <- page %>% html_node(xpath='//*[@id="page"]/div[2]/div[2]/div[2]/div/div[1]/div[2]/table')%>% html_table()

# words about appartments (need to get this into the dataset)
desc <- page %>% html_node(xpath='//*[@id="property_details"]') %>% html_text

# address
address <- page %>% html_node(xpath='//*[@id="summary_address"]') %>% html_text

# need to figure out how to get all codes (like 169332 above) from the page below
url <- "http://www.rentals.com/Massachusetts/Amherst/?per_page=90"