Did somebody say templates?
#include <cstddef>
#include <iostream>
template <std::size_t I, std::size_t N>
struct memcpy_helper {
static void
do_(char* dst, char* src) {
dst[I] = src[I];
memcpy_helper<I + 1, N>::do_(dst, src);
}
};
template <std::size_t N>
struct memcpy_helper<N, N> {
static void
do_(char*, char*) { }
};
template <std::size_t N, typename T>
void
memcpy(T* dst, T* src) {
memcpy_helper<0, N * sizeof(T)>::do_(reinterpret_cast<char*>(dst), reinterpret_cast<char*>(src));
}
int
main() {
int src[]{1, 2, 3};
int dst[3];
memcpy<3>(dst, src);
for (int i : dst)
std::cout << i << '\n';
}
It's fast because there is no run-time loop!