library(rvest)
myurl <- "http://www.boohee.com/food/group/"
food_cal <- data.frame()
for(i in 1:10){ #循环采集10种类型的食物热量
food_class <- read_html(paste0(myurl,i,"?page=1")) %>%
html_nodes("div[class='widget-food-list pull-right'] h3") %>%
html_text()#获取食物类型数据
food_class <- sub("\n","",food_class)#将多余符号去掉
food_class <- rep(food_class, 100) #重复100遍
food_name <- c()
cal <- c()
for(j in 1:10){ #每种类型循环采集10页
myurlpaste <- paste0(myurl,i,"?page=",j)
myweb <- read_html(myurlpaste)
food_name1 <- myweb %>%
html_nodes(".food-list li div[class = 'text-box pull-left'] a") %>%
html_text() #食物名称
cal1 <- myweb %>%
html_nodes(".food-list li div[class = 'text-box pull-left'] p") %>%
html_text() #食物热量
food_name <- c(food_name, food_name1)
cal <- c(cal, cal1)
}
food_cal1 <- data.frame(food_class, food_name, cal)
food_cal <- rbind(food_cal, food_cal1)
}
write.csv(food_cal, file = "E:\\薄荷食物热量.csv")