# load library library(rvest) url <- 'http://www.rentals.com/Massachusetts/Amherst/?per_page=90' get_property_ids <- function(url, grab_spotlight=T) { # read in everything from webpage temp <- readLines(con=url) # get the piece of temp with listing numbers in it ids <- temp[grepl('listing_ids', temp)] # remove excess stuff at beginning we don't care about ids <- strsplit(ids, '"listing_ids\":\"')[[1]][2] # remove excess stuff at the end we don't care about ids <- sub('\",\"tpl_source\":null}}', '', ids) # remove excess stuff in the middle we don't care about ids <- sub('\",\"listings_per_page\":86', '', ids) # if we want ot grab the spotlight listings, get those if(grab_spotlight) { # remove label for spotlight ids <- sub('\"spotlight_listing_ids\":\"', '', ids) } # otherwise, remove them else { # remove spotlight label and all ids after it ids <- strsplit(ids, '\"spotlight_listing_ids\":\"')[[1]][1] } # make it into a list ids <- strsplit(ids, ',')[[1]] return(ids) } ids <- get_property_ids(url, grab_spotlight=F)