diff --git a/rllm/llama-cpp-low/Cargo.toml b/rllm/llama-cpp-low/Cargo.toml
index 399ae68a..8dac2412 100644
--- a/rllm/llama-cpp-low/Cargo.toml
+++ b/rllm/llama-cpp-low/Cargo.toml
@@ -16,3 +16,6 @@ cmake = "0.1.50"
 [features]
 default = []
 cuda = []
+sycl = []
+sycl_fp16 = []
+sycl_nvidia = []
diff --git a/rllm/llama-cpp-low/build.rs b/rllm/llama-cpp-low/build.rs
index 2ee2cab6..acdc1d16 100644
--- a/rllm/llama-cpp-low/build.rs
+++ b/rllm/llama-cpp-low/build.rs
@@ -5,7 +5,14 @@ const SUBMODULE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/llama.cpp");
 
 fn main() {
     let ccache = true;
-    let cuda = std::env::var("CARGO_FEATURE_CUDA").unwrap_or(String::new());
+    let flag_cuda = env::var("CARGO_FEATURE_CUDA").unwrap_or(String::new()) == "1";
+    let flag_sycl = env::var("CARGO_FEATURE_SYCL").unwrap_or(String::new()) == "1";
+    let flag_sycl_fp16 = env::var("CARGO_FEATURE_SYCL_FP16").unwrap_or(String::new()) == "1";
+    let flag_sycl_nvidia = env::var("CARGO_FEATURE_SYCL_NVIDIA").unwrap_or(String::new()) == "1";
+
+    // oneAPI environment variables
+    let mkl_root = env::var("MKLROOT");
+    let cmplr_root = env::var("CMPLR_ROOT");
 
     let submodule_dir = &PathBuf::from(SUBMODULE_DIR);
     let header_path = submodule_dir.join("llama.h");
@@ -29,15 +36,66 @@ fn main() {
             .configure_arg("-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache");
     }
 
-    if cuda == "1" {
+    if flag_cuda && flag_sycl {
+        panic!("Only cuda or sycl can be activated at the same time!");
+    }
+    if flag_cuda {
         cmake.configure_arg("-DLLAMA_CUBLAS=ON");
         println!("cargo:rustc-link-search=/usr/local/cuda/lib64");
         println!("cargo:rustc-link-lib=cuda");
         println!("cargo:rustc-link-lib=cudart");
         println!("cargo:rustc-link-lib=cublas");
         println!("cargo:rustc-link-lib=cupti");
-    }
+    } else if flag_sycl {
+        assert!(mkl_root.is_ok(), "MKLROOT is not set (plz `source /opt/intel/oneapi/setvars.sh` if OneAPI is installed)");
+        assert!(cmplr_root.is_ok(), "ICPP_COMPILER_ROOT is not set");
+        let mkl_root_str = mkl_root.unwrap();
+        //let cmplr_root_str = cmplr_root.unwrap();
+
+        cmake
+            .define("LLAMA_SYCL", "ON")
+            .define("CMAKE_C_COMPILER", "icx")
+            .define("CMAKE_CXX_COMPILER", "icpx");
 
+        println!("cargo:rustc-link-arg=-fiopenmp");
+        println!("cargo:rustc-link-arg=-fopenmp-targets=spir64_gen");
+        println!("cargo:rustc-link-arg=-fsycl");
+        println!("cargo:rustc-link-arg=-Wl,--no-as-needed");
+        println!("cargo:rustc-link-arg=-Wno-narrowing");
+        println!("cargo:rustc-link-arg=-O3");
+        //println!("cargo:rustc-link-search=native={}/lib", cmplr_root_str);
+        println!("cargo:rustc-link-search=native={}/lib", mkl_root_str);
+        println!("cargo:rustc-link-lib=svml");
+        println!("cargo:rustc-link-lib=mkl_sycl_blas");
+        println!("cargo:rustc-link-lib=mkl_sycl_lapack");
+        println!("cargo:rustc-link-lib=mkl_sycl_dft");
+        println!("cargo:rustc-link-lib=mkl_sycl_sparse");
+        println!("cargo:rustc-link-lib=mkl_sycl_vm");
+        println!("cargo:rustc-link-lib=mkl_sycl_rng");
+        println!("cargo:rustc-link-lib=mkl_sycl_stats");
+        println!("cargo:rustc-link-lib=mkl_sycl_data_fitting");
+        println!("cargo:rustc-link-lib=mkl_intel_ilp64");
+        println!("cargo:rustc-link-lib=mkl_intel_thread");
+        println!("cargo:rustc-link-lib=mkl_tbb_thread");
+        println!("cargo:rustc-link-lib=mkl_core");
+        println!("cargo:rustc-link-lib=iomp5");
+        println!("cargo:rustc-link-lib=sycl");
+        println!("cargo:rustc-link-lib=pthread");
+        println!("cargo:rustc-link-lib=m");
+        println!("cargo:rustc-link-lib=dl");
+        println!("cargo:rustc-link-lib=intlc");
+        println!("cargo:rustc-link-lib=imf");
+        //println!("cargo:rustc-link-lib=static=ggml_sycl");
+        //println!("cargo:rustc-link-arg=")
+    }
+    if flag_sycl_fp16 {
+        cmake.configure_arg("-DLLAMA_SYCL_F16=ON");
+    }
+    if flag_sycl_nvidia {
+        cmake.configure_arg("-DLLAMA_SYCL_TARGET=NVIDIA");
+    }
+    cmake.very_verbose(true);
+    
     let dst = cmake.build();
 
     println!("cargo:rustc-link-search=native={}/lib", dst.display());
diff --git a/rllm/rllm-cuda/server.sh b/rllm/rllm-cuda/server.sh
index 3c6ca131..a8dd7b14 100755
--- a/rllm/rllm-cuda/server.sh
+++ b/rllm/rllm-cuda/server.sh
@@ -41,6 +41,42 @@ while [ "$1" != "" ] ; do
               exit 1
             fi
             ;;
+        --sycl )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl"
+              ADD_ARGS="--gpu-layers 1000"
+            else
+              echo "--sycl only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
+        --sycl-fp16 )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl,sycl_fp16"
+              ADD_ARGS="--gpu-layers 1000"
+            else
+              echo "--sycl-fp16 only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
+        --sycl-nvidia )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl,sycl_nvidia"
+              ADD_ARGS="--gpu-layers 1000"
+            else
+              echo "--sycl-nvidia only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
+        --sycl-nvidia-fp16 )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl,sycl_nvidia,sycl_fp16"
+              ADD_ARGS="--gpu-layers 1000"
+            else
+              echo "--sycl-nvidia-fp16 only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
         --trace )
             R_LOG=info,tokenizers=error,rllm=trace,aicirt=info,llama_cpp_low=trace
             ;;
@@ -84,7 +120,7 @@ if [ "$CPP" = 1 ] ; then
     * )
     SELF="server.sh"
     cat <<EOF
-usage: $SELF [--loop] [--cuda] [--debug] [model_name] [rllm_args...]
+usage: $SELF [--loop] [--cuda] [--sycl] [--sycl-fp16] [--sycl-nvidia] [--debug] [model_name] [rllm_args...]
 
 model_name can a HuggingFace URL pointing to a .gguf file, or one of the following:
 
@@ -96,9 +132,13 @@ model_name can a HuggingFace URL pointing to a .gguf file, or one of the followi
 
 Additionally, "$SELF build" will just build the server, and not run a model.
 
-  --cuda   try to build llama.cpp against installed CUDA
-  --loop   restart server when it crashes and store logs in ./logs
-  --debug  don't build in --release mode
+  --cuda              try to build llama.cpp against installed CUDA
+  --sycl              try to build llama.cpp against SYCL with fp32 support (Make sure the required sycl environement variables are set)
+  --sycl-fp16         try to build llama.cpp against SYCL with fp16 support
+  --sycl-nvidia       try to build llama.cpp against SYCL with nvidia support
+  --sycl-nvidia-fp16  try to build llama.cpp against SYCL with fp16 and nvidia support
+  --loop              restart server when it crashes and store logs in ./logs
+  --debug             don't build in --release mode
 
 Try $SELF phi2 --help to see available rllm_args
 EOF
diff --git a/rllm/rllm-llamacpp/Cargo.toml b/rllm/rllm-llamacpp/Cargo.toml
index 3f9cee4d..29ce2066 100644
--- a/rllm/rllm-llamacpp/Cargo.toml
+++ b/rllm/rllm-llamacpp/Cargo.toml
@@ -21,3 +21,6 @@ path = "src/rllm-llamacpp.rs"
 [features]
 default = []
 cuda = ["llama_cpp_low/cuda"]
+sycl = ["llama_cpp_low/sycl"]
+sycl_fp16 = ["llama_cpp_low/sycl_fp16"]
+sycl_nvidia = ["llama_cpp_low/sycl_nvidia"]