Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 22 additions & 15 deletions src/libc/memcpy.src
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,31 @@
.global _memcpy
.type _memcpy, @function

.ifdef PREFER_OS_LIBC

.set _memcpy, 0x0000A4

.else
; Note: TiOS memcpy works fine, but our implementation is faster
; .set _memcpy, 0x0000A4

_memcpy:
ld iy, -1
; size > 0 : 25F + 15R + 1 + LDIR
; size >= 65536 : 32F + 16R + 3 + LDIR (only when the low 16 bits are zero)
; size == 0 : 26F + 13R + 2

ld iy, 0
add iy, sp
ld bc, (iy + 10) ; Load count
sbc hl, hl
add hl, bc
jr nc, .L.zero
ld de, (iy + 4) ; Load destination
ld hl, (iy + 7) ; Load source
ld bc, (iy + 9) ; Load count
ld a, c
or a, b
ld de, (iy + 3) ; Load destination
jr z, .L.maybe_zero
.L.not_zero:
ld hl, (iy + 6) ; Load source
ldir
.L.zero:
ld hl, (iy + 4) ; Return the destination pointer
ld hl, (iy + 3) ; Return the destination pointer
ret

.endif
.L.maybe_zero:
; low 16 bits are zero
or a, (iy + 11) ; test upper 8 bits
jr nz, .L.not_zero ; size >= 65536
; size == 0
ex de, hl
ret
115 changes: 63 additions & 52 deletions src/libc/memmove.src
Original file line number Diff line number Diff line change
Expand Up @@ -5,97 +5,108 @@
.global _memmove
.type _memmove, @function

.ifdef PREFER_OS_LIBC

.set _memmove, 0x0000A8

.else
; Note: TiOS memmove works fine, but our implementation is faster
; .set _memmove, 0x0000A8

.if 1

; Optimized for when src != dst
_memmove:
; src > dst | LDIR | 32F + 15R + 1
; src < dst | LDDR | 35F + 12R + 2
; src = dst | LDDR | 35F + 12R + 2
; zero size | | 24F + 12R + 2
; src > dst | LDIR | 31F + 15R + 2
; src < dst | LDDR | 33F + 12R + 1
; src = dst | LDIR | 31F + 15R + 2
; zero size | | 26F + 10R + 2
; size >= 65536 + 7F + 1R + 2 (only when the low 16 bits are zero)

ld iy, -1
ld iy, 0
add iy, sp
ld bc, (iy + 10)
sbc hl, hl
add hl, bc
jr nc, .L.zero
ld hl, (iy + 7)
ld de, (iy + 4)
ld bc, (iy + 9)
ld a, c
or a, b
ld de, (iy + 3)
jr z, .L.maybe_zero
.L.not_zero:
ld hl, (iy + 6)
sbc hl, de
; src <= dst
jr c, .L.copy_backwards
; src > dst
; .copy_forwards:
add hl, de
inc hl
ldir
.L.zero:
ld hl, (iy + 4)
ret

.L.copy_backwards:
; src >= dst
jr nc, .L.copy_forwards
; src < dst
; .L.copy_backwards:
; move HL and DE to the end
dec de ; DE = dst - 1
ex de, hl
add hl, bc
ex de, hl ; HL = src - dst - 1, DE = dst + size
add hl, de ; HL = src + size - 1
dec de ; DE = dst + size - 1
add hl, bc ; HL = dst + size - 1, DE = src - dst
ex de, hl
add hl, de ; HL = src + size - 1, DE = dst + size - 1
lddr
ex de, hl
inc hl
ret

.L.copy_forwards:
add hl, de
ldir
ld hl, (iy + 3)
ret

.L.maybe_zero:
; low 16 bits are zero
or a, (iy + 11) ; test upper 8 bits
jr nz, .L.not_zero ; size >= 65536
; size == 0
ex de, hl
ret

.else

; Optimized for when src == dst
_memmove:
; src > dst | LDIR | 33F + 15R + 2
; src < dst | LDDR | 36F + 12R + 2
; src = dst | | 29F + 12R + 2
; zero size | | 24F + 12R + 2
; src > dst | LDIR | 31F + 15R + 2
; src < dst | LDDR | 34F + 12R + 2
; src = dst | | 27F + 12R + 2
; zero size | | 26F + 10R + 2
; size >= 65536 + 7F + 1R + 2 (only when the low 16 bits are zero)

ld iy, -1
ld iy, 0
add iy, sp
ld bc, (iy + 10)
sbc hl, hl
add hl, bc
jr nc, .L.zero
ld de, (iy + 4)
ld hl, (iy + 7)
or a, a
ld bc, (iy + 9)
ld a, c
or a, b
ld de, (iy + 3)
jr z, .L.maybe_zero
.L.not_zero:
ld hl, (iy + 6)
sbc hl, de
; src < dst
jr c, .copy_backwards
jr c, .L.copy_backwards
; src >= dst
; .L.copy_forwards:
add hl, de
; src == dst
ret z ; skips LDIR when src == dst
ret z ; skips LDIR when src == dst
; src > dst
ldir
.L.zero:
ld hl, (iy + 4)
ld hl, (iy + 3)
ret

.L.copy_backwards:
; move HL and DE to the end
dec de ; DE = dst - 1
dec de ; DE = dst - 1
ex de, hl
add hl, bc ; HL = dst + size - 1, DE = src - dst
add hl, bc ; HL = dst + size - 1, DE = src - dst
ex de, hl
add hl, de ; HL = src + size - 1, DE = dst + size - 1
add hl, de ; HL = src + size - 1, DE = dst + size - 1
lddr
ex de, hl
inc hl
ret

.endif
.L.maybe_zero:
; low 16 bits are zero
or a, (iy + 11) ; test upper 8 bits
jr nz, .L.not_zero ; size >= 65536
; size == 0
ex de, hl
ret

.endif
22 changes: 0 additions & 22 deletions src/libc/mempcpy.src
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,6 @@
.global _mempcpy
.type _mempcpy, @function

.if 0

; faster when count is zero
_mempcpy:
ld iy, -1
add iy, sp
ld bc, (iy + 10) ; Load count
sbc hl, hl
add hl, bc
ld hl, (iy + 4) ; Load destination
ret nc ; zero bytes to copy
ld de, (iy + 7) ; Load source
ex de, hl
ldir
ex de, hl
ret

.else

; faster in full execution case by 0F + 1 clock cycles
_mempcpy:
ld iy, -1
add iy, sp
Expand All @@ -38,5 +18,3 @@ _mempcpy:
.L.zero_byte_copy:
ex de, hl
ret

.endif
26 changes: 26 additions & 0 deletions test/standalone/asprintf_fprintf/src/fill_mem32.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.assume adl = 1

.section .text

.global _fill_mem32

; void fill_mem32(void *dst, size_t bytes, uint32_t pattern)
_fill_mem32:
ld iy, 0
add iy, sp
ld de, (iy + 3)
ld hl, (iy + 6)
ld bc, 4
sbc hl, bc
; return if bytes <= pattern_size
ret c
ret z
push hl
; copy pattern once
lea hl, iy + 9
ldir
pop bc
; now copy (bytes - pattern_size)
ld hl, (iy + 3)
ldir
ret
54 changes: 54 additions & 0 deletions test/standalone/asprintf_fprintf/src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ void *T_memmove(void *dest, const void *src, size_t n)
void *T_mempcpy(void *__restrict dest, const void *__restrict src, size_t n)
__attribute__((nonnull(1, 2)));

void *T_memchr(const void *s, int c, size_t n)
__attribute__((nonnull(1)));

void *T_memrchr(const void *s, int c, size_t n)
__attribute__((nonnull(1)));

Expand Down Expand Up @@ -187,6 +190,7 @@ char *T_strtok_r(char *__restrict s, const char *__restrict delim, char **__rest
#define T_memmem memmem
#define T_memmove memmove
#define T_mempcpy mempcpy
#define T_memchr memchr
#define T_memrchr memrchr
#define T_memrmem memrmem
#define T_memset memset
Expand Down Expand Up @@ -1465,6 +1469,53 @@ int strtok_test(void) {

C(T_memcmp(str, truth_str, sizeof(truth_str)) == 0);
}
return 0;
}

int mem65536_test(void) {
void fill_mem32(void *dst, size_t bytes, uint32_t pattern);

uint8_t * const dst = (uint8_t*)0xD40000;
const size_t screen_size = 320 * 240 * 2;
memset(dst, 0, screen_size);
const size_t B16 = 65536;
const size_t B17 = 131072;

/* test return values */

C(T_memcpy(SINK, SINK, B16) == SINK);
C(T_memcpy(SINK, SINK, B17) == SINK);

C(T_memmove(SINK, SINK, B16) == SINK);
C(T_memmove(SINK, SINK, B17) == SINK);

C(T_memmove(SINK + 16, SINK, B16) == SINK + 16);
C(T_memmove(SINK + 16, SINK, B17) == SINK + 16);

C(T_memmove(SINK, SINK + 16, B16) == SINK);
C(T_memmove(SINK, SINK + 16, B17) == SINK);

/* test memcpy and memmove when size is a non-zero multiple of 65536 */

fill_mem32(dst + screen_size - B16, B16, 0x78563412);
C(T_memcpy(dst + 32, dst + screen_size - B16, B16) == dst + 32);
C(T_memchr(dst, 0x00, 32) == dst);
C(T_memchr(dst, 0x12, 32) == NULL_ptr);
C(T_memchr(dst, 0x12, 33) == dst + 32);
C(T_memrchr(dst, 0x78, 32 + B16 + 32) == dst + 32 + B16 - 1);
const uint32_t pattern_1 = 0xA3A0A1A0;
const uint32_t pattern_2 = 0xFECDAB89;
fill_mem32(dst, 32, pattern_1);
fill_mem32(dst + 24576, B16, pattern_2);

C(T_memmove(dst + 61, dst, B16) == dst + 61);
C(T_memmem(dst, B17, &pattern_1, sizeof(pattern_1)) == dst);
C(T_memrmem(dst, B17, &pattern_1, sizeof(pattern_1)) == dst + 61 - 4 + 32);
C(T_memmove(dst + 24578, dst, B16) == dst + 24578);
C(T_memmem(dst, B16, &pattern_1, sizeof(pattern_1)) == dst + 0);
C(T_memrmem(dst, B16, &pattern_1, sizeof(pattern_1)) == dst + 24578 + 61 + 32 - 4);
C(T_memmem(dst, B16, &pattern_2, sizeof(pattern_2)) == dst + 24576 + 24578 + 61);
C(T_memrmem(dst, B16, &pattern_2, sizeof(pattern_2)) == dst + B16 - 4u - (((24578u - 24576u) - 61u) % 4u));

return 0;
}
Expand Down Expand Up @@ -1509,6 +1560,9 @@ int run_tests(void) {
TEST(strchrnul_test());
TEST(strtok_test());

TEST(mem65536_test());
os_ClrHome();

return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion tools/convbin
Submodule convbin updated 10 files
+0 −1 Makefile
+64 −198 src/convert.c
+0 −671 src/elf.c
+0 −55 src/elf.h
+54 −61 src/input.c
+1 −5 src/input.h
+1 −12 src/options.c
+0 −2 src/output.c
+1 −1 src/output.h
+4 −13 src/ti8x.h
Loading