more heuristics in common.h, hopefully fix #50
diff --git a/include/common.h b/include/common.h
index 5dcd72b..1fd3904 100644
--- a/include/common.h
+++ b/include/common.h
@@ -74,15 +74,15 @@ static void write_neutral_s32(u8 * data, s32 value) {
#if defined(HAS_BUILTIN_PREFECTCH)
#define prefetch(address) __builtin_prefetch((const void *)(address), 0, 0)
#define prefetchw(address) __builtin_prefetch((const void *)(address), 1, 0)
-#elif defined(_M_IX86) || defined(_M_AMD64)
+#elif defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(i386) || defined(__i386__) || defined(__i386)
#include <intrin.h>
#define prefetch(address) _mm_prefetch((const void *)(address), _MM_HINT_NTA)
#define prefetchw(address) _m_prefetchw((const void *)(address))
-#elif defined(_M_ARM)
+#elif defined(_M_ARM) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__)
#include <intrin.h>
#define prefetch(address) __prefetch((const void *)(address))
#define prefetchw(address) __prefetchw((const void *)(address))
-#elif defined(_M_ARM64)
+#elif defined(_M_ARM64) || defined(__aarch64__)
#include <intrin.h>
#define prefetch(address) __prefetch2((const void *)(address), 1)
#define prefetchw(address) __prefetch2((const void *)(address), 17)
