This is made for 1920x1080 screens minimum!

Server Setup:

  • Windows Server 2012 R2 Datacenter, without Meltdown / Spectre patch
  • Intel Core i7-7700HQ, 3.8 GHz singlethread, 3.4 GHz turbo boost all cores + NVIDIA 1050 Ti, 1.75 GHz
  • 32GB RAM 2400 MHz DDR4
  • 3GBps read SSD
  • R 3.4.3, Visual Studio 2015, MinGW 4.9
  • xgboost: commit 98be9ae (Jan 28, 2018, 2:06 AM GMT+1)

1 Setup

1.1 CSS Injection

cat("
<style>
body .main-container {
    max-width: 100% !important;
}
body .main-container/*, .toc-content*/ {
  padding-left: 5px !important;
  padding-right: 5px !important;
}
@media (min-width: 1919px) {
  .col-md-3 {
    width: 15% !important;
  }
  .col-md-9 {
    width: 85% !important;
  }
  .toc-content {
    padding-left: 10px !important;
    padding-right: 10px !important;
  }
}
.dygraph-label, .dygraph-title, .dygraph-xlabel, .dygraph-ylabel, .dygraph-y2label, .dygraph-axis-label, .dygraph-axis-label-x, .dygraph-axis-label-y, .dygraph-axis-label-y2 {
  font-family: \"Roboto Condensed\"; !important
}
</style>
")

1.2 Load Libraries

library(extrafont)
## Registering fonts with R
library(ggplot2)
library(data.table)
library(dygraphs)

1.3 Load Data

xgb_gpu_roam <- fread("xgb_bench_gpu_fast_142.csv")[, c("Threads", "Time", "Efficiency", "Normalized")]
xgb_gpu_pin <- fread("xgb_bench_gpu_fast_142_pin.csv")[, c("Threads", "Time", "Efficiency", "Normalized")]

1.4 Settings

cpu_name <- "Core i7-7700HQ"
n_sockets <- c(1, 4, 2)
n_cores <- 8
freq_table <- c(3.8, 3.6, rep(3.4, 6))
core_priority <- c(1 + 2 * (0:3), 2 + 2 * (0:3))

element_alpha <- 0.5
text_size <- 8
core_size <- 14
text_vjust <- -0.35
core_vjust <- 2
base_size <- 27
y_mult <- 1.05

1.5 Plotting Functions

make_plot_single_per_thread <- function(input, titles) {
  suppressWarnings(ggplot(data = input, aes(x = Threads, y = Time)) + geom_line(aes(alpha = element_alpha), color = "red") + geom_bar(aes(fill = Time, alpha = element_alpha), color = "black", stat = "identity") + geom_text(aes(label = paste0(sprintf("%.02f", Time), "")), vjust = text_vjust, size = text_size) + geom_text(aes(label = 1:n_cores), vjust = core_vjust, size = core_size) + scale_fill_gradient(low = "lightgreen", high = "red") + scale_alpha(guide = "none") + scale_x_continuous(breaks = c(1:n_cores), expand = c(0, 1)) + scale_y_continuous(expand = c(0, 0)) + theme_bw(base_family = "Roboto Condensed", base_size = base_size) + labs(title = paste0(titles, " Timings (seconds): ", n_sockets[1], " Socket", ifelse(n_sockets[1] > 1, "s, ", ", "), n_sockets[2], " Core", ifelse(n_sockets[2] > 1, "s, ", ", "), n_sockets[3], " Thread", ifelse(n_sockets[3] > 1, "s", ""), " => ", n_sockets[1] * n_sockets[2], "C/", n_sockets[1] * n_sockets[2] * n_sockets[3], "T"), subtitle = paste0(cpu_name, "@", sprintf("%.01f", max(freq_table)), "/", sprintf("%.01f", min(freq_table)), "GHz (total: ", n_cores / 2, "C/", n_cores, "T, approx ", sprintf("%.1f", 0.5 * n_cores * min(freq_table)), "GHz)"), x = "Number of Threads", y = "Time (seconds)") + geom_text(aes(y = Time * y_mult, label = "")))
}

make_plot_single_per_efficiency <- function(input, titles) {
  suppressWarnings(ggplot(data = input, aes(x = Threads, y = Efficiency)) + geom_line(aes(alpha = element_alpha), color = "red") + geom_bar(aes(fill = Efficiency, alpha = element_alpha), color = "black", stat = "identity") + geom_segment(x = 0, y = 0, xend = n_cores, yend = n_cores, alpha = 0.5, size = 3, group = 1, color = "lightgray", lineend = "round", linetype = 3) + geom_segment(x = n_cores, y = n_cores, xend = n_cores * 2 + 1, yend = n_cores, alpha = 0.5, size = 3, group = 1, color = "lightgray", lineend = "round", linetype = 3) + geom_text(aes(label = paste0(sprintf("%.02f", Efficiency), "")), vjust = text_vjust, size = text_size) + geom_text(aes(label = 1:n_cores), vjust = core_vjust, size = core_size) + scale_fill_gradient(low = "red", high = "lightgreen", labels = scales::percent) + scale_size(guide = "none") + scale_alpha(guide = "none") + scale_x_continuous(breaks = c(1:n_cores), expand = c(0, 1)) + scale_y_continuous(labels = scales::percent, expand = c(0, 0)) + theme_bw(base_family = "Roboto Condensed", base_size = base_size) + labs(title = paste0(titles, " Efficiency vs 1 Thread @", sprintf("%.01f", min(freq_table)), "GHz: ", n_sockets[1], " Socket", ifelse(n_sockets[1] > 1, "s, ", ", "), n_sockets[2], " Core", ifelse(n_sockets[2] > 1, "s, ", ", "), n_sockets[3], " Thread", ifelse(n_sockets[3] > 1, "s", ""), " => ", n_sockets[1] * n_sockets[2], "C/", n_sockets[1] * n_sockets[2] * n_sockets[3], "T"), subtitle = paste0(cpu_name, "@", sprintf("%.01f", max(freq_table)), "/", sprintf("%.01f", min(freq_table)), "GHz (total: ", n_cores / 2, "C/", n_cores, "T, approx ", sprintf("%.1f", 0.5 * n_cores * min(freq_table)), "GHz)"), x = "Number of Threads", y = paste0("Efficiency (100% versus 1 thread @", sprintf("%.01f", min(freq_table)), "GHz)")) + geom_text(aes(y = Efficiency * y_mult, label = "")))
}

make_plot_single_per_normalized <- function(input, titles) {
  suppressWarnings(ggplot(data = input, aes(x = Threads, y = Normalized)) + geom_line(aes(alpha = element_alpha), color = "red") + geom_bar(aes(fill = Normalized, alpha = element_alpha), color = "black", stat = "identity") + geom_segment(x = 0, y = 1, xend = n_cores * 2 + 1, yend = 1, alpha = 0.5, size = 3, group = 1, color = "lightgray", lineend = "round", linetype = 3) + geom_text(aes(label = paste0(sprintf("%.02f", 100 * Normalized), "")), vjust = text_vjust, size = text_size) + geom_text(aes(label = 1:n_cores), vjust = core_vjust, size = core_size) + scale_fill_gradientn(colors = c("red", "orange", "lightgreen"), labels = scales::percent) + scale_size(guide = "none") + scale_alpha(guide = "none") + scale_x_continuous(breaks = c(1:n_cores), expand = c(0, 1)) + scale_y_continuous(labels = scales::percent, expand = c(0, 0)) + theme_bw(base_family = "Roboto Condensed", base_size = base_size) + labs(title = paste0(titles, " Normalized vs 1 Thread @", sprintf("%.01f", min(freq_table)), "GHz: ", n_sockets[1], " Socket", ifelse(n_sockets[1] > 1, "s, ", ", "), n_sockets[2], " Core", ifelse(n_sockets[2] > 1, "s, ", ", "), n_sockets[3], " Thread", ifelse(n_sockets[3] > 1, "s", ""), " => ", n_sockets[1] * n_sockets[2], "C/", n_sockets[1] * n_sockets[2] * n_sockets[3], "T"), subtitle = paste0(cpu_name, "@", sprintf("%.01f", max(freq_table)), "/", sprintf("%.01f", min(freq_table)), "GHz (total: ", n_cores / 2, "C/", n_cores, "T, approx ", sprintf("%.1f", 0.5 * n_cores * min(freq_table)), "GHz)"), x = "Number of Threads", y = paste0("Normalized Efficiency (% versus 1 thread @", sprintf("%.01f", min(freq_table)), "GHz)")) + geom_text(aes(y = Normalized * y_mult, label = "")))
}

2 Plot Original Data

2.1 Thread Performance

2.1.1 xgboost

2.1.1.1 xgboost CPU Scheduler-based (Roaming)

make_plot_single_per_thread(xgb_gpu_roam, "[Roaming CPU, VS 2017] GPU xgboost")

2.1.1.2 xgboost CPU Affinity-based (Pinning)

make_plot_single_per_thread(xgb_gpu_pin, "[Pinned CPU, VS 4.9] GPU xgboost")

2.2 Efficiency Performance

2.2.1 xgboost

2.2.1.1 xgboost CPU Scheduler-based (Roaming)

make_plot_single_per_efficiency(xgb_gpu_roam, "[Roaming CPU, VS 2017] GPU xgboost")

2.2.1.2 xgboost CPU Affinity-based (Pinning)

make_plot_single_per_efficiency(xgb_gpu_pin, "[Pinned CPU, VS 4.9] GPU xgboost")

2.3 Normalized Performance

2.3.1 xgboost

2.3.1.1 xgboost CPU Scheduler-based (Roaming)

make_plot_single_per_normalized(xgb_gpu_roam, "[Roaming CPU, VS 2017] GPU xgboost")

2.3.1.2 xgboost CPU Affinity-based (Pinning)

make_plot_single_per_normalized(xgb_gpu_pin, "[Pinned CPU, VS 4.9] GPU xgboost")