rm(list=ls()) # To do: # 1. Go through vignette for rvest in help page for rvest library # Below are two blog posts to explore to try to learn how to use rvest # 2. https://www.r-bloggers.com/using-rvest-to-scrape-an-html-table/ # Go trhough this blog post line-by-line to figure out what it is doing. # 3. https://blog.rstudio.org/2014/11/24/rvest-easy-web-scraping-with-r/ # Go trhough this blog post line-by-line to figure out what it is doing. # Note: I will be asking for volunteer to teach us to use rvest! # load library library(rvest) # URL of apt complex url <- "http://www.rentals.com/Massachusetts/Amherst/169332/" # Next line loads the web page page <- read_html(url) # "xpath"s below came from looking at html using Chrome (and trial and error) # table about appartments tab <- page %>% html_node(xpath='//*[@id="page"]/div[2]/div[2]/div[2]/div/div[1]/div[2]/table')%>% html_table() # words about appartments (need to get this into the dataset) desc <- page %>% html_node(xpath='//*[@id="property_details"]') %>% html_text # address address <- page %>% html_node(xpath='//*[@id="summary_address"]') %>% html_text # need to figure out how to get all codes (like 169332 above) from the page below url <- "http://www.rentals.com/Massachusetts/Amherst/?per_page=90"