library(rvest)
library(dplyr)
library(tidyr)


# helper function:
# We will use the following function to get the lists of categories and knives from the ChefKnivesToGo website several times

cktg_query <- function(x) {
  html_elements(x, ".section-details") %>%
    html_elements("a") %>%
    html_attrs() %>%
    tibble() %>%
    unnest_wider(".")
}


flag <- TRUE
mat <- matrix(ncol = 0, nrow = 0)
df <- data.frame(mat)
cktg <- 'https://www.chefknivestogo.com/'
cktg_steels <- 'shopbysteel.html'
cktg_steels_read <- read_html(paste(cktg, cktg_steels, sep = ''))
steels <- cktg_steels_read %>%
  cktg_query()

for (x in 1:length(steels$href)) {
  cktg_makers_read <- read_html(paste(cktg, steels$href[x], sep = ''))
  
  makers <- cktg_makers_read %>%
    cktg_query()
  
  
  for (y in 1:length(makers$href)) {
    tryCatch({
      cktg_knives_read <-
        read_html(paste(cktg, makers$href[y], sep = ''))
      
      test <- cktg_knives_read %>% html_node(".price")
      if (!is.na(test)) {
        prices <- cktg_knives_read %>%
          html_nodes('.section-details') %>%
          html_nodes('.price') %>%
          html_text()
        knives <- cktg_knives_read %>%
          cktg_query() %>%
          mutate(
            Prices = prices,
            SteelGroup = steels$title[x],
            Maker = makers$title[y],
            SubType = NA
          )
        if (flag == TRUE) {
          df <- bind_rows(df, knives)
          flag <- FALSE
        }
        else {
          if (sum(!knives$href %in% df$href) > 0) {
            df <- df  %>% bind_rows(knives[which(!knives$href %in% df$href),])
          }
        }
      }
      
      else {
        if (x == 3 & y == 6) {
          cktg_knives_read <-
            read_html(paste(cktg, "kohetsu.html", sep = ''))
        }
        sub_type <- cktg_knives_read %>%
          cktg_query()
        
        for (z in 1:length(sub_type)) {
          cktg_knives_sub_read <- read_html(paste(cktg, sub_type$href[z], sep = ""))
          prices <- cktg_knives_sub_read %>%
            html_nodes('.section-details') %>%
            html_nodes('.price') %>%
            html_text()
          knives <- cktg_knives_sub_read %>%
            cktg_query() %>%
            mutate(
              Prices = prices,
              SteelGroup = steels$title[x],
              Maker = makers$title[y],
              SubType = sub_type$title[z]
            )
          for (t in 1:length(knives$href)) {
            if (knives$href[t] %in% df$href) {
              df$SubType[which(df$href %in% knives$href[t])] = knives$SubType[t]
            }
            else {
              df <- bind_rows(df, knives[t, ])
            }
          }
        }
      }
    },
    error <- function(e) {
      cat("ERROR :", conditionMessage(e), "\n")
    })
  }
}
df$Style <- NA
df$SubStyle <- NA
df$SubSubStyle <- NA
cktg_types <- 'resources.html'
cktg_types_read <- read_html(paste(cktg, cktg_types, sep = ''))
types <- cktg_types_read %>%
  html_nodes('.ysw-lp-row-item') %>%
  html_nodes('h2') %>%
  html_nodes("a") %>%
  html_attrs() %>%
  tibble() %>%
  unnest_wider('.')

for (x in c(1:length(types$href))[-16]) {
  tryCatch({
    cktg_styles_knives_read <- read_html(paste(cktg, types$href[x], sep = ""))
    test <- cktg_styles_knives_read %>% html_node(".price")
    if (!is.na(test)) {
      knives <- cktg_styles_knives_read %>%
        html_nodes('.section-details') %>%
        html_nodes('a') %>%
        html_attr("href") %>%
        tibble() %>%
        unnest_wider('.') %>%
        mutate(Style = types$title[x]) %>%
        rename(href = ...1)
      for (t in 1:length(knives$href)) {
        if (knives$href[t] %in% df$href) {
          df$Style[which(df$href %in% knives$href[t])] <- knives$Style[t]
        }
      }
      
    }
    else {
      styles <- cktg_styles_knives_read %>%
        cktg_query()
      
      for (y in 1:length(styles$href)) {
        cktg_styles_read <- read_html(paste(cktg, styles$href[y], sep = ""))
        test <- cktg_styles_read %>% html_node(".price")
        
        if (!is.na(test)) {
          knives <-  cktg_styles_read %>%
            cktg_query() %>%
            mutate(Style = types$title[x],
                   SubStyle = styles$title[y])
          
          for (t in 1:length(knives$href)) {
            if (knives$href[t] %in% df$href) {
              df$Style[which(df$href %in% knives$href[t])] <- knives$Style[t]
              df$SubStyle[which(df$href %in% knives$href[t])] <- knives$SubStyle[t]
            }
          }
        }
        else {
          sub_type <- cktg_styles_read %>%
            cktg_query()
          for (z in 1:length(sub_type)) {
            cktg_knives_sub_read <- read_html(paste(cktg, sub_type$href[z], sep = ""))
            
            knives <- cktg_knives_sub_read %>%
              cktg_query() %>%
              mutate(
                Style = types$title[x],
                SubStyle = styles$title[y],
                SubSubStyle = sub_type$title[z]
              )
            for (t in 1:length(knives$href)) {
              if (knives$href[t] %in% df$href) {
                df$Style[which(df$href %in% knives$href[t])] <- knives$Style[t]
                df$SubStyle[which(df$href %in% knives$href[t])] <- knives$SubStyle[t]
                df$SubSubStyle[which(df$href %in% knives$href[t])] <- knives$SubSubStyle[t]
              }
            }
          }
        }
        
      }
    }
  },
  error <- function(e) {
    cat("ERROR :", conditionMessage(e), "\n")
  })
}

write.csv(df, file = 'CKTG_KNIFE.csv')
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkocnZlc3QpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeSh0aWR5cikNCg0KDQojIGhlbHBlciBmdW5jdGlvbjoNCiMgV2Ugd2lsbCB1c2UgdGhlIGZvbGxvd2luZyBmdW5jdGlvbiB0byBnZXQgdGhlIGxpc3RzIG9mIGNhdGVnb3JpZXMgYW5kIGtuaXZlcyBmcm9tIHRoZSBDaGVmS25pdmVzVG9HbyB3ZWJzaXRlIHNldmVyYWwgdGltZXMNCg0KY2t0Z19xdWVyeSA8LSBmdW5jdGlvbih4KSB7DQogIGh0bWxfZWxlbWVudHMoeCwgIi5zZWN0aW9uLWRldGFpbHMiKSAlPiUNCiAgICBodG1sX2VsZW1lbnRzKCJhIikgJT4lDQogICAgaHRtbF9hdHRycygpICU+JQ0KICAgIHRpYmJsZSgpICU+JQ0KICAgIHVubmVzdF93aWRlcigiLiIpDQp9DQoNCg0KZmxhZyA8LSBUUlVFDQptYXQgPC0gbWF0cml4KG5jb2wgPSAwLCBucm93ID0gMCkNCmRmIDwtIGRhdGEuZnJhbWUobWF0KQ0KY2t0ZyA8LSAnaHR0cHM6Ly93d3cuY2hlZmtuaXZlc3RvZ28uY29tLycNCmNrdGdfc3RlZWxzIDwtICdzaG9wYnlzdGVlbC5odG1sJw0KY2t0Z19zdGVlbHNfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0ZywgY2t0Z19zdGVlbHMsIHNlcCA9ICcnKSkNCnN0ZWVscyA8LSBja3RnX3N0ZWVsc19yZWFkICU+JQ0KICBja3RnX3F1ZXJ5KCkNCg0KZm9yICh4IGluIDE6bGVuZ3RoKHN0ZWVscyRocmVmKSkgew0KICBja3RnX21ha2Vyc19yZWFkIDwtIHJlYWRfaHRtbChwYXN0ZShja3RnLCBzdGVlbHMkaHJlZlt4XSwgc2VwID0gJycpKQ0KICANCiAgbWFrZXJzIDwtIGNrdGdfbWFrZXJzX3JlYWQgJT4lDQogICAgY2t0Z19xdWVyeSgpDQogIA0KICANCiAgZm9yICh5IGluIDE6bGVuZ3RoKG1ha2VycyRocmVmKSkgew0KICAgIHRyeUNhdGNoKHsNCiAgICAgIGNrdGdfa25pdmVzX3JlYWQgPC0NCiAgICAgICAgcmVhZF9odG1sKHBhc3RlKGNrdGcsIG1ha2VycyRocmVmW3ldLCBzZXAgPSAnJykpDQogICAgICANCiAgICAgIHRlc3QgPC0gY2t0Z19rbml2ZXNfcmVhZCAlPiUgaHRtbF9ub2RlKCIucHJpY2UiKQ0KICAgICAgaWYgKCFpcy5uYSh0ZXN0KSkgew0KICAgICAgICBwcmljZXMgPC0gY2t0Z19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgICBodG1sX25vZGVzKCcuc2VjdGlvbi1kZXRhaWxzJykgJT4lDQogICAgICAgICAgaHRtbF9ub2RlcygnLnByaWNlJykgJT4lDQogICAgICAgICAgaHRtbF90ZXh0KCkNCiAgICAgICAga25pdmVzIDwtIGNrdGdfa25pdmVzX3JlYWQgJT4lDQogICAgICAgICAgY2t0Z19xdWVyeSgpICU+JQ0KICAgICAgICAgIG11dGF0ZSgNCiAgICAgICAgICAgIFByaWNlcyA9IHByaWNlcywNCiAgICAgICAgICAgIFN0ZWVsR3JvdXAgPSBzdGVlbHMkdGl0bGVbeF0sDQogICAgICAgICAgICBNYWtlciA9IG1ha2VycyR0aXRsZVt5XSwNCiAgICAgICAgICAgIFN1YlR5cGUgPSBOQQ0KICAgICAgICAgICkNCiAgICAgICAgaWYgKGZsYWcgPT0gVFJVRSkgew0KICAgICAgICAgIGRmIDwtIGJpbmRfcm93cyhkZiwga25pdmVzKQ0KICAgICAgICAgIGZsYWcgPC0gRkFMU0UNCiAgICAgICAgfQ0KICAgICAgICBlbHNlIHsNCiAgICAgICAgICBpZiAoc3VtKCFrbml2ZXMkaHJlZiAlaW4lIGRmJGhyZWYpID4gMCkgew0KICAgICAgICAgICAgZGYgPC0gZGYgICU+JSBiaW5kX3Jvd3Moa25pdmVzW3doaWNoKCFrbml2ZXMkaHJlZiAlaW4lIGRmJGhyZWYpLF0pDQogICAgICAgICAgfQ0KICAgICAgICB9DQogICAgICB9DQogICAgICANCiAgICAgIGVsc2Ugew0KICAgICAgICBpZiAoeCA9PSAzICYgeSA9PSA2KSB7DQogICAgICAgICAgY2t0Z19rbml2ZXNfcmVhZCA8LQ0KICAgICAgICAgICAgcmVhZF9odG1sKHBhc3RlKGNrdGcsICJrb2hldHN1Lmh0bWwiLCBzZXAgPSAnJykpDQogICAgICAgIH0NCiAgICAgICAgc3ViX3R5cGUgPC0gY2t0Z19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgICBja3RnX3F1ZXJ5KCkNCiAgICAgICAgDQogICAgICAgIGZvciAoeiBpbiAxOmxlbmd0aChzdWJfdHlwZSkpIHsNCiAgICAgICAgICBja3RnX2tuaXZlc19zdWJfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0Zywgc3ViX3R5cGUkaHJlZlt6XSwgc2VwID0gIiIpKQ0KICAgICAgICAgIHByaWNlcyA8LSBja3RnX2tuaXZlc19zdWJfcmVhZCAlPiUNCiAgICAgICAgICAgIGh0bWxfbm9kZXMoJy5zZWN0aW9uLWRldGFpbHMnKSAlPiUNCiAgICAgICAgICAgIGh0bWxfbm9kZXMoJy5wcmljZScpICU+JQ0KICAgICAgICAgICAgaHRtbF90ZXh0KCkNCiAgICAgICAgICBrbml2ZXMgPC0gY2t0Z19rbml2ZXNfc3ViX3JlYWQgJT4lDQogICAgICAgICAgICBja3RnX3F1ZXJ5KCkgJT4lDQogICAgICAgICAgICBtdXRhdGUoDQogICAgICAgICAgICAgIFByaWNlcyA9IHByaWNlcywNCiAgICAgICAgICAgICAgU3RlZWxHcm91cCA9IHN0ZWVscyR0aXRsZVt4XSwNCiAgICAgICAgICAgICAgTWFrZXIgPSBtYWtlcnMkdGl0bGVbeV0sDQogICAgICAgICAgICAgIFN1YlR5cGUgPSBzdWJfdHlwZSR0aXRsZVt6XQ0KICAgICAgICAgICAgKQ0KICAgICAgICAgIGZvciAodCBpbiAxOmxlbmd0aChrbml2ZXMkaHJlZikpIHsNCiAgICAgICAgICAgIGlmIChrbml2ZXMkaHJlZlt0XSAlaW4lIGRmJGhyZWYpIHsNCiAgICAgICAgICAgICAgZGYkU3ViVHlwZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA9IGtuaXZlcyRTdWJUeXBlW3RdDQogICAgICAgICAgICB9DQogICAgICAgICAgICBlbHNlIHsNCiAgICAgICAgICAgICAgZGYgPC0gYmluZF9yb3dzKGRmLCBrbml2ZXNbdCwgXSkNCiAgICAgICAgICAgIH0NCiAgICAgICAgICB9DQogICAgICAgIH0NCiAgICAgIH0NCiAgICB9LA0KICAgIGVycm9yIDwtIGZ1bmN0aW9uKGUpIHsNCiAgICAgIGNhdCgiRVJST1IgOiIsIGNvbmRpdGlvbk1lc3NhZ2UoZSksICJcbiIpDQogICAgfSkNCiAgfQ0KfQ0KDQpgYGANCg0KYGBge3J9DQpkZiRTdHlsZSA8LSBOQQ0KZGYkU3ViU3R5bGUgPC0gTkENCmRmJFN1YlN1YlN0eWxlIDwtIE5BDQpja3RnX3R5cGVzIDwtICdyZXNvdXJjZXMuaHRtbCcNCmNrdGdfdHlwZXNfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0ZywgY2t0Z190eXBlcywgc2VwID0gJycpKQ0KdHlwZXMgPC0gY2t0Z190eXBlc19yZWFkICU+JQ0KICBodG1sX25vZGVzKCcueXN3LWxwLXJvdy1pdGVtJykgJT4lDQogIGh0bWxfbm9kZXMoJ2gyJykgJT4lDQogIGh0bWxfbm9kZXMoImEiKSAlPiUNCiAgaHRtbF9hdHRycygpICU+JQ0KICB0aWJibGUoKSAlPiUNCiAgdW5uZXN0X3dpZGVyKCcuJykNCg0KZm9yICh4IGluIGMoMTpsZW5ndGgodHlwZXMkaHJlZikpWy0xNl0pIHsNCiAgdHJ5Q2F0Y2goew0KICAgIGNrdGdfc3R5bGVzX2tuaXZlc19yZWFkIDwtIHJlYWRfaHRtbChwYXN0ZShja3RnLCB0eXBlcyRocmVmW3hdLCBzZXAgPSAiIikpDQogICAgdGVzdCA8LSBja3RnX3N0eWxlc19rbml2ZXNfcmVhZCAlPiUgaHRtbF9ub2RlKCIucHJpY2UiKQ0KICAgIGlmICghaXMubmEodGVzdCkpIHsNCiAgICAgIGtuaXZlcyA8LSBja3RnX3N0eWxlc19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgaHRtbF9ub2RlcygnLnNlY3Rpb24tZGV0YWlscycpICU+JQ0KICAgICAgICBodG1sX25vZGVzKCdhJykgJT4lDQogICAgICAgIGh0bWxfYXR0cigiaHJlZiIpICU+JQ0KICAgICAgICB0aWJibGUoKSAlPiUNCiAgICAgICAgdW5uZXN0X3dpZGVyKCcuJykgJT4lDQogICAgICAgIG11dGF0ZShTdHlsZSA9IHR5cGVzJHRpdGxlW3hdKSAlPiUNCiAgICAgICAgcmVuYW1lKGhyZWYgPSAuLi4xKQ0KICAgICAgZm9yICh0IGluIDE6bGVuZ3RoKGtuaXZlcyRocmVmKSkgew0KICAgICAgICBpZiAoa25pdmVzJGhyZWZbdF0gJWluJSBkZiRocmVmKSB7DQogICAgICAgICAgZGYkU3R5bGVbd2hpY2goZGYkaHJlZiAlaW4lIGtuaXZlcyRocmVmW3RdKV0gPC0ga25pdmVzJFN0eWxlW3RdDQogICAgICAgIH0NCiAgICAgIH0NCiAgICAgIA0KICAgIH0NCiAgICBlbHNlIHsNCiAgICAgIHN0eWxlcyA8LSBja3RnX3N0eWxlc19rbml2ZXNfcmVhZCAlPiUNCiAgICAgICAgY2t0Z19xdWVyeSgpDQogICAgICANCiAgICAgIGZvciAoeSBpbiAxOmxlbmd0aChzdHlsZXMkaHJlZikpIHsNCiAgICAgICAgY2t0Z19zdHlsZXNfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0Zywgc3R5bGVzJGhyZWZbeV0sIHNlcCA9ICIiKSkNCiAgICAgICAgdGVzdCA8LSBja3RnX3N0eWxlc19yZWFkICU+JSBodG1sX25vZGUoIi5wcmljZSIpDQogICAgICAgIA0KICAgICAgICBpZiAoIWlzLm5hKHRlc3QpKSB7DQogICAgICAgICAga25pdmVzIDwtICBja3RnX3N0eWxlc19yZWFkICU+JQ0KICAgICAgICAgICAgY2t0Z19xdWVyeSgpICU+JQ0KICAgICAgICAgICAgbXV0YXRlKFN0eWxlID0gdHlwZXMkdGl0bGVbeF0sDQogICAgICAgICAgICAgICAgICAgU3ViU3R5bGUgPSBzdHlsZXMkdGl0bGVbeV0pDQogICAgICAgICAgDQogICAgICAgICAgZm9yICh0IGluIDE6bGVuZ3RoKGtuaXZlcyRocmVmKSkgew0KICAgICAgICAgICAgaWYgKGtuaXZlcyRocmVmW3RdICVpbiUgZGYkaHJlZikgew0KICAgICAgICAgICAgICBkZiRTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3R5bGVbdF0NCiAgICAgICAgICAgICAgZGYkU3ViU3R5bGVbd2hpY2goZGYkaHJlZiAlaW4lIGtuaXZlcyRocmVmW3RdKV0gPC0ga25pdmVzJFN1YlN0eWxlW3RdDQogICAgICAgICAgICB9DQogICAgICAgICAgfQ0KICAgICAgICB9DQogICAgICAgIGVsc2Ugew0KICAgICAgICAgIHN1Yl90eXBlIDwtIGNrdGdfc3R5bGVzX3JlYWQgJT4lDQogICAgICAgICAgICBja3RnX3F1ZXJ5KCkNCiAgICAgICAgICBmb3IgKHogaW4gMTpsZW5ndGgoc3ViX3R5cGUpKSB7DQogICAgICAgICAgICBja3RnX2tuaXZlc19zdWJfcmVhZCA8LSByZWFkX2h0bWwocGFzdGUoY2t0Zywgc3ViX3R5cGUkaHJlZlt6XSwgc2VwID0gIiIpKQ0KICAgICAgICAgICAgDQogICAgICAgICAgICBrbml2ZXMgPC0gY2t0Z19rbml2ZXNfc3ViX3JlYWQgJT4lDQogICAgICAgICAgICAgIGNrdGdfcXVlcnkoKSAlPiUNCiAgICAgICAgICAgICAgbXV0YXRlKA0KICAgICAgICAgICAgICAgIFN0eWxlID0gdHlwZXMkdGl0bGVbeF0sDQogICAgICAgICAgICAgICAgU3ViU3R5bGUgPSBzdHlsZXMkdGl0bGVbeV0sDQogICAgICAgICAgICAgICAgU3ViU3ViU3R5bGUgPSBzdWJfdHlwZSR0aXRsZVt6XQ0KICAgICAgICAgICAgICApDQogICAgICAgICAgICBmb3IgKHQgaW4gMTpsZW5ndGgoa25pdmVzJGhyZWYpKSB7DQogICAgICAgICAgICAgIGlmIChrbml2ZXMkaHJlZlt0XSAlaW4lIGRmJGhyZWYpIHsNCiAgICAgICAgICAgICAgICBkZiRTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3R5bGVbdF0NCiAgICAgICAgICAgICAgICBkZiRTdWJTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3ViU3R5bGVbdF0NCiAgICAgICAgICAgICAgICBkZiRTdWJTdWJTdHlsZVt3aGljaChkZiRocmVmICVpbiUga25pdmVzJGhyZWZbdF0pXSA8LSBrbml2ZXMkU3ViU3ViU3R5bGVbdF0NCiAgICAgICAgICAgICAgfQ0KICAgICAgICAgICAgfQ0KICAgICAgICAgIH0NCiAgICAgICAgfQ0KICAgICAgICANCiAgICAgIH0NCiAgICB9DQogIH0sDQogIGVycm9yIDwtIGZ1bmN0aW9uKGUpIHsNCiAgICBjYXQoIkVSUk9SIDoiLCBjb25kaXRpb25NZXNzYWdlKGUpLCAiXG4iKQ0KICB9KQ0KfQ0KDQp3cml0ZS5jc3YoZGYsIGZpbGUgPSAnQ0tUR19LTklGRS5jc3YnKQ0KDQoNCmBgYA0KDQo=