library(rvest)
library(dplyr)
library(tidyr)
# helper function:
# We will use the following function to get the lists of categories and knives from the ChefKnivesToGo website several times
cktg_query <- function(x) {
html_elements(x, ".section-details") %>%
html_elements("a") %>%
html_attrs() %>%
tibble() %>%
unnest_wider(".")
}
flag <- TRUE
mat <- matrix(ncol = 0, nrow = 0)
df <- data.frame(mat)
cktg <- 'https://www.chefknivestogo.com/'
cktg_steels <- 'shopbysteel.html'
cktg_steels_read <- read_html(paste(cktg, cktg_steels, sep = ''))
steels <- cktg_steels_read %>%
cktg_query()
for (x in 1:length(steels$href)) {
cktg_makers_read <- read_html(paste(cktg, steels$href[x], sep = ''))
makers <- cktg_makers_read %>%
cktg_query()
for (y in 1:length(makers$href)) {
tryCatch({
cktg_knives_read <-
read_html(paste(cktg, makers$href[y], sep = ''))
test <- cktg_knives_read %>% html_node(".price")
if (!is.na(test)) {
prices <- cktg_knives_read %>%
html_nodes('.section-details') %>%
html_nodes('.price') %>%
html_text()
knives <- cktg_knives_read %>%
cktg_query() %>%
mutate(
Prices = prices,
SteelGroup = steels$title[x],
Maker = makers$title[y],
SubType = NA
)
if (flag == TRUE) {
df <- bind_rows(df, knives)
flag <- FALSE
}
else {
if (sum(!knives$href %in% df$href) > 0) {
df <- df %>% bind_rows(knives[which(!knives$href %in% df$href),])
}
}
}
else {
if (x == 3 & y == 6) {
cktg_knives_read <-
read_html(paste(cktg, "kohetsu.html", sep = ''))
}
sub_type <- cktg_knives_read %>%
cktg_query()
for (z in 1:length(sub_type)) {
cktg_knives_sub_read <- read_html(paste(cktg, sub_type$href[z], sep = ""))
prices <- cktg_knives_sub_read %>%
html_nodes('.section-details') %>%
html_nodes('.price') %>%
html_text()
knives <- cktg_knives_sub_read %>%
cktg_query() %>%
mutate(
Prices = prices,
SteelGroup = steels$title[x],
Maker = makers$title[y],
SubType = sub_type$title[z]
)
for (t in 1:length(knives$href)) {
if (knives$href[t] %in% df$href) {
df$SubType[which(df$href %in% knives$href[t])] = knives$SubType[t]
}
else {
df <- bind_rows(df, knives[t, ])
}
}
}
}
},
error <- function(e) {
cat("ERROR :", conditionMessage(e), "\n")
})
}
}
df$Style <- NA
df$SubStyle <- NA
df$SubSubStyle <- NA
cktg_types <- 'resources.html'
cktg_types_read <- read_html(paste(cktg, cktg_types, sep = ''))
types <- cktg_types_read %>%
html_nodes('.ysw-lp-row-item') %>%
html_nodes('h2') %>%
html_nodes("a") %>%
html_attrs() %>%
tibble() %>%
unnest_wider('.')
for (x in c(1:length(types$href))[-16]) {
tryCatch({
cktg_styles_knives_read <- read_html(paste(cktg, types$href[x], sep = ""))
test <- cktg_styles_knives_read %>% html_node(".price")
if (!is.na(test)) {
knives <- cktg_styles_knives_read %>%
html_nodes('.section-details') %>%
html_nodes('a') %>%
html_attr("href") %>%
tibble() %>%
unnest_wider('.') %>%
mutate(Style = types$title[x]) %>%
rename(href = ...1)
for (t in 1:length(knives$href)) {
if (knives$href[t] %in% df$href) {
df$Style[which(df$href %in% knives$href[t])] <- knives$Style[t]
}
}
}
else {
styles <- cktg_styles_knives_read %>%
cktg_query()
for (y in 1:length(styles$href)) {
cktg_styles_read <- read_html(paste(cktg, styles$href[y], sep = ""))
test <- cktg_styles_read %>% html_node(".price")
if (!is.na(test)) {
knives <- cktg_styles_read %>%
cktg_query() %>%
mutate(Style = types$title[x],
SubStyle = styles$title[y])
for (t in 1:length(knives$href)) {
if (knives$href[t] %in% df$href) {
df$Style[which(df$href %in% knives$href[t])] <- knives$Style[t]
df$SubStyle[which(df$href %in% knives$href[t])] <- knives$SubStyle[t]
}
}
}
else {
sub_type <- cktg_styles_read %>%
cktg_query()
for (z in 1:length(sub_type)) {
cktg_knives_sub_read <- read_html(paste(cktg, sub_type$href[z], sep = ""))
knives <- cktg_knives_sub_read %>%
cktg_query() %>%
mutate(
Style = types$title[x],
SubStyle = styles$title[y],
SubSubStyle = sub_type$title[z]
)
for (t in 1:length(knives$href)) {
if (knives$href[t] %in% df$href) {
df$Style[which(df$href %in% knives$href[t])] <- knives$Style[t]
df$SubStyle[which(df$href %in% knives$href[t])] <- knives$SubStyle[t]
df$SubSubStyle[which(df$href %in% knives$href[t])] <- knives$SubSubStyle[t]
}
}
}
}
}
}
},
error <- function(e) {
cat("ERROR :", conditionMessage(e), "\n")
})
}
write.csv(df, file = 'CKTG_KNIFE.csv')
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkocnZlc3QpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeSh0aWR5cikNCg0KDQojIGhlbHBlciBmdW5jdGlvbjoNCiMgV2Ugd2lsbCB1c2UgdGhlIGZvbGxvd2luZyBmdW5jdGlvbiB0byBnZXQgdGhlIGxpc3RzIG9mIGNhdGVnb3JpZXMgYW5kIGtuaXZlcyBmcm9tIHRoZSBDaGVmS25pdmVzVG9HbyB3ZWJzaXRlIHNldmVyYWwgdGltZXMNCg0KY2t0Z19xdWVyeSA8LSBmdW5jdGlvbih4KSB7DQogIGh0bWxfZWxlbWVudHMoeCwgIi5zZWN0aW9uLWRldGFpbHMiKSAlPiUNCiAgICBodG1sX2VsZW1lbnRzKCJhIikgJT4lDQogICAgaHRtbF9hdHRycygpICU+JQ0KICAgIHRpYmJsZSgpICU+JQ0KICAgIHVubmVzdF93aWRlcigiLiIpDQp9DQoNCg0KZmxhZyA8LSBUUlVFDQptYXQgPC0gbWF0cml4KG5jb2wgPSAwLCBucm93ID0gMCkNCmRmIDwtIGRhdGEuZnJhbWUobWF0KQ0KY2t0ZyA8LSAnaHR0cHM6Ly93d3cuY2hlZmtuaXZlc3RvZ28uY29tLycNCmNrdGdfc3RlZWxzIDwtICdzaG9wYnlzdGVlbC5odG1sJw0KY2t0Z19zdGVlbHNfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0ZywgY2t0Z19zdGVlbHMsIHNlcCA9ICcnKSkNCnN0ZWVscyA8LSBja3RnX3N0ZWVsc19yZWFkICU+JQ0KICBja3RnX3F1ZXJ5KCkNCg0KZm9yICh4IGluIDE6bGVuZ3RoKHN0ZWVscyRocmVmKSkgew0KICBja3RnX21ha2Vyc19yZWFkIDwtIHJlYWRfaHRtbChwYXN0ZShja3RnLCBzdGVlbHMkaHJlZlt4XSwgc2VwID0gJycpKQ0KICANCiAgbWFrZXJzIDwtIGNrdGdfbWFrZXJzX3JlYWQgJT4lDQogICAgY2t0Z19xdWVyeSgpDQogIA0KICANCiAgZm9yICh5IGluIDE6bGVuZ3RoKG1ha2VycyRocmVmKSkgew0KICAgIHRyeUNhdGNoKHsNCiAgICAgIGNrdGdfa25pdmVzX3JlYWQgPC0NCiAgICAgICAgcmVhZF9odG1sKHBhc3RlKGNrdGcsIG1ha2VycyRocmVmW3ldLCBzZXAgPSAnJykpDQogICAgICANCiAgICAgIHRlc3QgPC0gY2t0Z19rbml2ZXNfcmVhZCAlPiUgaHRtbF9ub2RlKCIucHJpY2UiKQ0KICAgICAgaWYgKCFpcy5uYSh0ZXN0KSkgew0KICAgICAgICBwcmljZXMgPC0gY2t0Z19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgICBodG1sX25vZGVzKCcuc2VjdGlvbi1kZXRhaWxzJykgJT4lDQogICAgICAgICAgaHRtbF9ub2RlcygnLnByaWNlJykgJT4lDQogICAgICAgICAgaHRtbF90ZXh0KCkNCiAgICAgICAga25pdmVzIDwtIGNrdGdfa25pdmVzX3JlYWQgJT4lDQogICAgICAgICAgY2t0Z19xdWVyeSgpICU+JQ0KICAgICAgICAgIG11dGF0ZSgNCiAgICAgICAgICAgIFByaWNlcyA9IHByaWNlcywNCiAgICAgICAgICAgIFN0ZWVsR3JvdXAgPSBzdGVlbHMkdGl0bGVbeF0sDQogICAgICAgICAgICBNYWtlciA9IG1ha2VycyR0aXRsZVt5XSwNCiAgICAgICAgICAgIFN1YlR5cGUgPSBOQQ0KICAgICAgICAgICkNCiAgICAgICAgaWYgKGZsYWcgPT0gVFJVRSkgew0KICAgICAgICAgIGRmIDwtIGJpbmRfcm93cyhkZiwga25pdmVzKQ0KICAgICAgICAgIGZsYWcgPC0gRkFMU0UNCiAgICAgICAgfQ0KICAgICAgICBlbHNlIHsNCiAgICAgICAgICBpZiAoc3VtKCFrbml2ZXMkaHJlZiAlaW4lIGRmJGhyZWYpID4gMCkgew0KICAgICAgICAgICAgZGYgPC0gZGYgICU+JSBiaW5kX3Jvd3Moa25pdmVzW3doaWNoKCFrbml2ZXMkaHJlZiAlaW4lIGRmJGhyZWYpLF0pDQogICAgICAgICAgfQ0KICAgICAgICB9DQogICAgICB9DQogICAgICANCiAgICAgIGVsc2Ugew0KICAgICAgICBpZiAoeCA9PSAzICYgeSA9PSA2KSB7DQogICAgICAgICAgY2t0Z19rbml2ZXNfcmVhZCA8LQ0KICAgICAgICAgICAgcmVhZF9odG1sKHBhc3RlKGNrdGcsICJrb2hldHN1Lmh0bWwiLCBzZXAgPSAnJykpDQogICAgICAgIH0NCiAgICAgICAgc3ViX3R5cGUgPC0gY2t0Z19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgICBja3RnX3F1ZXJ5KCkNCiAgICAgICAgDQogICAgICAgIGZvciAoeiBpbiAxOmxlbmd0aChzdWJfdHlwZSkpIHsNCiAgICAgICAgICBja3RnX2tuaXZlc19zdWJfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0Zywgc3ViX3R5cGUkaHJlZlt6XSwgc2VwID0gIiIpKQ0KICAgICAgICAgIHByaWNlcyA8LSBja3RnX2tuaXZlc19zdWJfcmVhZCAlPiUNCiAgICAgICAgICAgIGh0bWxfbm9kZXMoJy5zZWN0aW9uLWRldGFpbHMnKSAlPiUNCiAgICAgICAgICAgIGh0bWxfbm9kZXMoJy5wcmljZScpICU+JQ0KICAgICAgICAgICAgaHRtbF90ZXh0KCkNCiAgICAgICAgICBrbml2ZXMgPC0gY2t0Z19rbml2ZXNfc3ViX3JlYWQgJT4lDQogICAgICAgICAgICBja3RnX3F1ZXJ5KCkgJT4lDQogICAgICAgICAgICBtdXRhdGUoDQogICAgICAgICAgICAgIFByaWNlcyA9IHByaWNlcywNCiAgICAgICAgICAgICAgU3RlZWxHcm91cCA9IHN0ZWVscyR0aXRsZVt4XSwNCiAgICAgICAgICAgICAgTWFrZXIgPSBtYWtlcnMkdGl0bGVbeV0sDQogICAgICAgICAgICAgIFN1YlR5cGUgPSBzdWJfdHlwZSR0aXRsZVt6XQ0KICAgICAgICAgICAgKQ0KICAgICAgICAgIGZvciAodCBpbiAxOmxlbmd0aChrbml2ZXMkaHJlZikpIHsNCiAgICAgICAgICAgIGlmIChrbml2ZXMkaHJlZlt0XSAlaW4lIGRmJGhyZWYpIHsNCiAgICAgICAgICAgICAgZGYkU3ViVHlwZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA9IGtuaXZlcyRTdWJUeXBlW3RdDQogICAgICAgICAgICB9DQogICAgICAgICAgICBlbHNlIHsNCiAgICAgICAgICAgICAgZGYgPC0gYmluZF9yb3dzKGRmLCBrbml2ZXNbdCwgXSkNCiAgICAgICAgICAgIH0NCiAgICAgICAgICB9DQogICAgICAgIH0NCiAgICAgIH0NCiAgICB9LA0KICAgIGVycm9yIDwtIGZ1bmN0aW9uKGUpIHsNCiAgICAgIGNhdCgiRVJST1IgOiIsIGNvbmRpdGlvbk1lc3NhZ2UoZSksICJcbiIpDQogICAgfSkNCiAgfQ0KfQ0KDQpgYGANCg0KYGBge3J9DQpkZiRTdHlsZSA8LSBOQQ0KZGYkU3ViU3R5bGUgPC0gTkENCmRmJFN1YlN1YlN0eWxlIDwtIE5BDQpja3RnX3R5cGVzIDwtICdyZXNvdXJjZXMuaHRtbCcNCmNrdGdfdHlwZXNfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0ZywgY2t0Z190eXBlcywgc2VwID0gJycpKQ0KdHlwZXMgPC0gY2t0Z190eXBlc19yZWFkICU+JQ0KICBodG1sX25vZGVzKCcueXN3LWxwLXJvdy1pdGVtJykgJT4lDQogIGh0bWxfbm9kZXMoJ2gyJykgJT4lDQogIGh0bWxfbm9kZXMoImEiKSAlPiUNCiAgaHRtbF9hdHRycygpICU+JQ0KICB0aWJibGUoKSAlPiUNCiAgdW5uZXN0X3dpZGVyKCcuJykNCg0KZm9yICh4IGluIGMoMTpsZW5ndGgodHlwZXMkaHJlZikpWy0xNl0pIHsNCiAgdHJ5Q2F0Y2goew0KICAgIGNrdGdfc3R5bGVzX2tuaXZlc19yZWFkIDwtIHJlYWRfaHRtbChwYXN0ZShja3RnLCB0eXBlcyRocmVmW3hdLCBzZXAgPSAiIikpDQogICAgdGVzdCA8LSBja3RnX3N0eWxlc19rbml2ZXNfcmVhZCAlPiUgaHRtbF9ub2RlKCIucHJpY2UiKQ0KICAgIGlmICghaXMubmEodGVzdCkpIHsNCiAgICAgIGtuaXZlcyA8LSBja3RnX3N0eWxlc19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgaHRtbF9ub2RlcygnLnNlY3Rpb24tZGV0YWlscycpICU+JQ0KICAgICAgICBodG1sX25vZGVzKCdhJykgJT4lDQogICAgICAgIGh0bWxfYXR0cigiaHJlZiIpICU+JQ0KICAgICAgICB0aWJibGUoKSAlPiUNCiAgICAgICAgdW5uZXN0X3dpZGVyKCcuJykgJT4lDQogICAgICAgIG11dGF0ZShTdHlsZSA9IHR5cGVzJHRpdGxlW3hdKSAlPiUNCiAgICAgICAgcmVuYW1lKGhyZWYgPSAuLi4xKQ0KICAgICAgZm9yICh0IGluIDE6bGVuZ3RoKGtuaXZlcyRocmVmKSkgew0KICAgICAgICBpZiAoa25pdmVzJGhyZWZbdF0gJWluJSBkZiRocmVmKSB7DQogICAgICAgICAgZGYkU3R5bGVbd2hpY2goZGYkaHJlZiAlaW4lIGtuaXZlcyRocmVmW3RdKV0gPC0ga25pdmVzJFN0eWxlW3RdDQogICAgICAgIH0NCiAgICAgIH0NCiAgICAgIA0KICAgIH0NCiAgICBlbHNlIHsNCiAgICAgIHN0eWxlcyA8LSBja3RnX3N0eWxlc19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgY2t0Z19xdWVyeSgpDQogICAgICANCiAgICAgIGZvciAoeSBpbiAxOmxlbmd0aChzdHlsZXMkaHJlZikpIHsNCiAgICAgICAgY2t0Z19zdHlsZXNfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0Zywgc3R5bGVzJGhyZWZbeV0sIHNlcCA9ICIiKSkNCiAgICAgICAgdGVzdCA8LSBja3RnX3N0eWxlc19yZWFkICU+JSBodG1sX25vZGUoIi5wcmljZSIpDQogICAgICAgIA0KICAgICAgICBpZiAoIWlzLm5hKHRlc3QpKSB7DQogICAgICAgICAga25pdmVzIDwtICBja3RnX3N0eWxlc19yZWFkICU+JQ0KICAgICAgICAgICAgY2t0Z19xdWVyeSgpICU+JQ0KICAgICAgICAgICAgbXV0YXRlKFN0eWxlID0gdHlwZXMkdGl0bGVbeF0sDQogICAgICAgICAgICAgICAgICAgU3ViU3R5bGUgPSBzdHlsZXMkdGl0bGVbeV0pDQogICAgICAgICAgDQogICAgICAgICAgZm9yICh0IGluIDE6bGVuZ3RoKGtuaXZlcyRocmVmKSkgew0KICAgICAgICAgICAgaWYgKGtuaXZlcyRocmVmW3RdICVpbiUgZGYkaHJlZikgew0KICAgICAgICAgICAgICBkZiRTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3R5bGVbdF0NCiAgICAgICAgICAgICAgZGYkU3ViU3R5bGVbd2hpY2goZGYkaHJlZiAlaW4lIGtuaXZlcyRocmVmW3RdKV0gPC0ga25pdmVzJFN1YlN0eWxlW3RdDQogICAgICAgICAgICB9DQogICAgICAgICAgfQ0KICAgICAgICB9DQogICAgICAgIGVsc2Ugew0KICAgICAgICAgIHN1Yl90eXBlIDwtIGNrdGdfc3R5bGVzX3JlYWQgJT4lDQogICAgICAgICAgICBja3RnX3F1ZXJ5KCkNCiAgICAgICAgICBmb3IgKHogaW4gMTpsZW5ndGgoc3ViX3R5cGUpKSB7DQogICAgICAgICAgICBja3RnX2tuaXZlc19zdWJfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0Zywgc3ViX3R5cGUkaHJlZlt6XSwgc2VwID0gIiIpKQ0KICAgICAgICAgICAgDQogICAgICAgICAgICBrbml2ZXMgPC0gY2t0Z19rbml2ZXNfc3ViX3JlYWQgJT4lDQogICAgICAgICAgICAgIGNrdGdfcXVlcnkoKSAlPiUNCiAgICAgICAgICAgICAgbXV0YXRlKA0KICAgICAgICAgICAgICAgIFN0eWxlID0gdHlwZXMkdGl0bGVbeF0sDQogICAgICAgICAgICAgICAgU3ViU3R5bGUgPSBzdHlsZXMkdGl0bGVbeV0sDQogICAgICAgICAgICAgICAgU3ViU3ViU3R5bGUgPSBzdWJfdHlwZSR0aXRsZVt6XQ0KICAgICAgICAgICAgICApDQogICAgICAgICAgICBmb3IgKHQgaW4gMTpsZW5ndGgoa25pdmVzJGhyZWYpKSB7DQogICAgICAgICAgICAgIGlmIChrbml2ZXMkaHJlZlt0XSAlaW4lIGRmJGhyZWYpIHsNCiAgICAgICAgICAgICAgICBkZiRTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3R5bGVbdF0NCiAgICAgICAgICAgICAgICBkZiRTdWJTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3ViU3R5bGVbdF0NCiAgICAgICAgICAgICAgICBkZiRTdWJTdWJTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3ViU3ViU3R5bGVbdF0NCiAgICAgICAgICAgICAgfQ0KICAgICAgICAgICAgfQ0KICAgICAgICAgIH0NCiAgICAgICAgfQ0KICAgICAgICANCiAgICAgIH0NCiAgICB9DQogIH0sDQogIGVycm9yIDwtIGZ1bmN0aW9uKGUpIHsNCiAgICBjYXQoIkVSUk9SIDoiLCBjb25kaXRpb25NZXNzYWdlKGUpLCAiXG4iKQ0KICB9KQ0KfQ0KDQp3cml0ZS5jc3YoZGYsIGZpbGUgPSAnQ0tUR19LTklGRS5jc3YnKQ0KDQoNCmBgYA0KDQo=