: "cc", "memory");
}
-void *
-memcpy(void *d1, const void *s1, size_t len)
+noinline void *
+__memcpy(void *d1, const void *s1, size_t len)
{
- u8 *d = (u8*)d1, *s = (u8*)s1;
- while (len--)
- *d++ = *s++;
+ void *d = d1;
+ if (((u32)d1 | (u32)s1 | len) & 3) {
+ // non-aligned memcpy
+ asm volatile(
+ "rep movsb (%%esi),%%es:(%%edi)\n"
+ : "+c"(len), "+S"(s1), "+D"(d)
+ : : "cc", "memory");
+ return d1;
+ }
+ // Common case - use 4-byte copy
+ len /= 4;
+ asm volatile(
+ "rep movsl (%%esi),%%es:(%%edi)\n"
+ : "+c"(len), "+S"(s1), "+D"(d)
+ : : "cc", "memory");
return d1;
}
size_t strlen(const char *s);
int strcmp(const char *s1, const char *s2);
void *memset(void *s, int c, size_t n);
-void *memcpy(void *d1, const void *s1, size_t len);
+void *__memcpy(void *d1, const void *s1, size_t len);
+#define memcpy(d1, s1, len) ( \
+ (__builtin_constant_p(len) && (len) <= 20) \
+ ? __builtin_memcpy((d1), (s1), (len)) \
+ : __memcpy((d1), (s1), (len)))
inline void memcpy_far(u16 d_seg, void *d_far
, u16 s_seg, const void *s_far, size_t len);
void *memmove(void *d, const void *s, size_t len);