diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2310713..40be128 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required (VERSION 3.8 FATAL_ERROR)
 project (SlabHash)
 
-find_package(CUDA 8.0 REQUIRED)
+find_package(CUDA 12.1 REQUIRED)
 
 option(CMAKE_VERBOSE_MAKEFILE ON)
 option(DGTEST,  "DGTEST"  ON)
@@ -9,6 +9,8 @@ option(DGTEST,  "DGTEST"  ON)
 set(CUDA_NVCC_FLAGS -std=c++11)
 set (CMAKE_CXX_STANDARD 11)
 
+set(CMAKE_CUDA_HOST_COMPILER /usr/bin/gcc-10)
+
 if (CUDA_VERBOSE_PTXAS)
   set(VERBOSE_PTXAS --ptxas-options=-v)
 endif (CUDA_VERBOSE_PTXAS)
@@ -35,9 +37,17 @@ set(GENCODE_SM71
   -gencode=arch=compute_71,code=sm_71 -gencode=arch=compute_71,code=compute_71)
 set(GENCODE_SM75
 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_75,code=compute_75)
+set(GENCODE_SM86
+-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86)
+set(GENCODE_SM87
+-gencode=arch=compute_87,code=sm_87 -gencode=arch=compute_87,code=compute_87)
+set(GENCODE_SM89
+-gencode=arch=compute_89,code=sm_89 -gencode=arch=compute_89,code=compute_89)
+set(GENCODE_SM90
+-gencode=arch=compute_90,code=sm_90 -gencode=arch=compute_90,code=compute_90)
 
 option(SLABHASH_GENCODE_SM30 "GENCODE_SM30" OFF)
-option(SLABHASH_GENCODE_SM35 "GENCODE_SM35" ON)
+option(SLABHASH_GENCODE_SM35 "GENCODE_SM35" OFF)
 option(SLABHASH_GENCODE_SM37 "GENCODE_SM37" OFF)
 option(SLABHASH_GENCODE_SM50 "GENCODE_SM50" OFF)
 option(SLABHASH_GENCODE_SM60 "GENCODE_SM60" OFF)
@@ -45,6 +55,10 @@ option(SLABHASH_GENCODE_SM61 "GENCODE_SM61" OFF)
 option(SLABHASH_GENCODE_SM70 "GENCODE_SM70" OFF)
 option(SLABHASH_GENCODE_SM71 "GENCODE_SM71" OFF)
 option(SLABHASH_GENCODE_SM75 "GENCODE_SM75" OFF)
+option(SLABHASH_GENCODE_SM86 "GENCODE_SM86" ON)
+option(SLABHASH_GENCODE_SM87 "GENCODE_SM87" OFF)
+option(SLABHASH_GENCODE_SM89 "GENCODE_SM89" OFF)
+option(SLABHASH_GENCODE_SM90 "GENCODE_SM90" OFF)
 
 if (SLABHASH_GENCODE_SM30)
   set(GENCODE ${GENCODE} ${GENCODE_SM30})
@@ -82,6 +96,22 @@ if(SLABHASH_GENCODE_SM75)
   set(GENCODE ${GENCODE} ${GENCODE_SM75})
 endif(SLABHASH_GENCODE_SM75)
 
+if(SLABHASH_GENCODE_SM86)
+  set(GENCODE ${GENCODE} ${GENCODE_SM86})
+endif(SLABHASH_GENCODE_SM86)
+
+if(SLABHASH_GENCODE_SM87)
+  set(GENCODE ${GENCODE} ${GENCODE_SM87})
+endif(SLABHASH_GENCODE_SM87)
+
+if(SLABHASH_GENCODE_SM89)
+  set(GENCODE ${GENCODE} ${GENCODE_SM89})
+endif(SLABHASH_GENCODE_SM89)
+
+if(SLABHASH_GENCODE_SM90)
+  set(GENCODE ${GENCODE} ${GENCODE_SM90})
+endif(SLABHASH_GENCODE_SM90)
+
 include_directories(SlabAlloc/src)
 include_directories(src src/concurrent)
 include_directories(ThirdParty/rapidjson/include)
diff --git a/README.md b/README.md
index 2c8c171..09bed68 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ There are a few variations of GpuSlabHash class. The most complete one at the mo
 This class partially owns all the memory allocated on the GPU to actually store all the contents, side by side all units allocated by the dynamic memory allocator. 
 There is another class, named [https://github.com/owensgroup/SlabHash/blob/master/src/concurrent_map/cmap_class.cuh#L26](`GpuSlabHashContext`), which does not own any memory but has all the related member functions to use the data structure itself. The context class is the one which is used by GPU threads on the device. Here's an example of the way to use it for a [https://github.com/owensgroup/SlabHash/blob/master/src/concurrent_map/device/search_kernel.cuh](search kernel):
 
-```
+```c++
 template <typename KeyT, typename ValueT>
 __global__ void search_table(
     KeyT* d_queries,
diff --git a/src/concurrent_map/cmap_class.cuh b/src/concurrent_map/cmap_class.cuh
index 1329740..5bb43f2 100644
--- a/src/concurrent_map/cmap_class.cuh
+++ b/src/concurrent_map/cmap_class.cuh
@@ -179,7 +179,7 @@ template <typename KeyT, typename ValueT>
 class GpuSlabHash<KeyT, ValueT, SlabHashTypeT::ConcurrentMap> {
  private:
   // fixed known parameters:
-  static constexpr uint32_t BLOCKSIZE_ = 128;
+  static constexpr uint32_t BLOCKSIZE_ = 512;
   static constexpr uint32_t WARP_WIDTH_ = 32;
   static constexpr uint32_t PRIME_DIVISOR_ = 4294967291u;