Skip to content

Commit f7678c7

Browse files
authored
Merge branch 'libretro:master' into master
2 parents 8de4fa7 + e87127f commit f7678c7

File tree

24 files changed

+199
-71
lines changed

24 files changed

+199
-71
lines changed

deps/lightrec/.gitrepo

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
[subrepo]
77
remote = https://github.com/pcercuei/lightrec.git
88
branch = master
9-
commit = bd0b82792284f22566bbfc78d8882e1e91b10516
10-
parent = 1229a4ea3dea3e1e47c46cd7afed38860fd91a57
9+
commit = 601afca8e889bdda7040ff5c64f7bbd20d1d5f2c
10+
parent = 459f02ad03fa10b5c403fed724d47fe5adfd5fb1
1111
method = merge
1212
cmdver = 0.4.6

deps/lightrec/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/L
8585
option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON)
8686
option(OPT_PRELOAD_PC "(optimization) Preload PC value into register" ON)
8787

88+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "SH4|sh4")
89+
option(OPT_SH4_USE_GBR "(SH4 optimization) Use GBR register for the state pointer" OFF)
90+
endif()
91+
8892
target_include_directories(lightrec PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
8993

9094
if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")

deps/lightrec/arch.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2+
/*
3+
* Copyright (C) 2024 Paul Cercueil <paul@crapouillou.net>
4+
*/
5+
6+
#ifndef __LIGHTREC_ARCH_H__
7+
#define __LIGHTREC_ARCH_H__
8+
9+
#include <lightning.h>
10+
#include <stdbool.h>
11+
12+
static bool arch_has_fast_mask(void)
13+
{
14+
#if __mips_isa_rev >= 2
15+
/* On MIPS32 >= r2, we can use extr / ins instructions */
16+
return true;
17+
#endif
18+
#ifdef __powerpc__
19+
/* On PowerPC, we can use the RLWINM instruction */
20+
return true;
21+
#endif
22+
#ifdef __aarch64__
23+
/* Aarch64 can use the UBFX instruction */
24+
return true;
25+
#endif
26+
#if defined(__x86__) || defined(__x86_64__)
27+
/* x86 doesn't have enough registers, using cached values make
28+
* little sense. Using jit_andi() will give a better result as it will
29+
* use bit-shifts for low/high masks. */
30+
return true;
31+
#endif
32+
33+
return false;
34+
}
35+
36+
#endif /* __LIGHTREC_ARCH_H__ */

deps/lightrec/emitter.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
44
*/
55

6+
#include "arch.h"
67
#include "blockcache.h"
78
#include "debug.h"
89
#include "disassembler.h"
@@ -103,7 +104,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
103104

104105
if (cycles && update_cycles) {
105106
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
106-
pr_debug("EOB: %u cycles\n", cycles);
107+
pr_debug("EOB: %"PRIu32" cycles\n", cycles);
107108
}
108109

109110
if (has_ds && op_flag_load_delay(ds->flags)
@@ -247,11 +248,11 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
247248
struct lightrec_branch *branch;
248249
const struct opcode *op = &block->opcode_list[offset],
249250
*ds = get_delay_slot(block->opcode_list, offset);
250-
jit_node_t *addr;
251251
bool is_forward = (s16)op->i.imm >= 0;
252252
int op_cycles = lightrec_cycles_of_opcode(state->state, op->c);
253253
u32 target_offset, cycles = state->cycles + op_cycles;
254254
bool no_indirection = false;
255+
jit_node_t *addr = NULL;
255256
u32 next_pc;
256257
u8 rs, rt;
257258

@@ -308,7 +309,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
308309

309310
target_offset = offset + 1 + (s16)op->i.imm
310311
- !!op_flag_no_ds(op->flags);
311-
pr_debug("Adding local branch to offset 0x%x\n",
312+
pr_debug("Adding local branch to offset 0x%"PRIx32"\n",
312313
target_offset << 2);
313314
branch = &state->local_branches[
314315
state->nb_local_branches++];
@@ -941,7 +942,7 @@ static void rec_alu_mult(struct lightrec_cstate *state,
941942
u8 reg_lo = get_mult_div_lo(c);
942943
u8 reg_hi = get_mult_div_hi(c);
943944
jit_state_t *_jit = block->_jit;
944-
u8 lo, hi, rs, rt, rflags = 0;
945+
u8 lo, hi = 0, rs, rt, rflags = 0;
945946
bool no_lo = op_flag_no_lo(flags);
946947
bool no_hi = op_flag_no_hi(flags);
947948

@@ -1276,10 +1277,16 @@ static void rec_and_mask(struct lightrec_cstate *cstate,
12761277
struct regcache *reg_cache = cstate->reg_cache;
12771278
u8 reg_imm;
12781279

1279-
reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, mask);
1280-
jit_andr(reg_out, reg_in, reg_imm);
1280+
if (arch_has_fast_mask()
1281+
&& (is_low_mask(mask) || is_high_mask(mask))) {
1282+
jit_andi(reg_out, reg_in, mask);
1283+
} else {
1284+
reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit,
1285+
mask);
1286+
jit_andr(reg_out, reg_in, reg_imm);
12811287

1282-
lightrec_free_reg(reg_cache, reg_imm);
1288+
lightrec_free_reg(reg_cache, reg_imm);
1289+
}
12831290
}
12841291

12851292
static void rec_store_memory(struct lightrec_cstate *cstate,

deps/lightrec/lightning-wrapper.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include <lightning.h>
1010

11+
#include "lightrec-config.h"
12+
1113
#if __WORDSIZE == 32
1214

1315
#define jit_ldxi_ui(u,v,w) jit_ldxi_i(u,v,w)
@@ -21,6 +23,14 @@
2123

2224
#define jit_b() jit_beqr(0, 0)
2325

26+
#if defined(__sh__) && OPT_SH4_USE_GBR
27+
#define jit_add_state(u,v) \
28+
do { \
29+
jit_new_node_ww(jit_code_movr,_R0,LIGHTREC_REG_STATE); \
30+
jit_new_node_www(jit_code_addr,u,v,_R0); \
31+
} while (0)
32+
#else
2433
#define jit_add_state(u,v) jit_addr(u,v,LIGHTREC_REG_STATE)
34+
#endif
2535

2636
#endif /* __LIGHTNING_WRAPPER_H__ */

deps/lightrec/lightrec-config.h.cmakein

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,7 @@
2525
#cmakedefine01 OPT_EARLY_UNLOAD
2626
#cmakedefine01 OPT_PRELOAD_PC
2727

28+
#cmakedefine01 OPT_SH4_USE_GBR
29+
2830
#endif /* __LIGHTREC_CONFIG_H__ */
2931

deps/lightrec/lightrec-private.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,16 @@ static inline _Bool can_zero_extend(u32 value, u8 order)
372372
return (value >> order) == 0;
373373
}
374374

375+
static inline _Bool is_low_mask(u32 imm)
376+
{
377+
return imm & 1 ? popcount32(imm + 1) <= 1 : 0;
378+
}
379+
380+
static inline _Bool is_high_mask(u32 imm)
381+
{
382+
return imm ? popcount32(imm + BIT(ctz32(imm))) == 0 : 0;
383+
}
384+
375385
static inline const struct opcode *
376386
get_delay_slot(const struct opcode *list, u16 i)
377387
{

deps/lightrec/lightrec.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,7 @@ static struct block * generate_wrapper(struct lightrec_state *state)
959959
jit_tramp(256);
960960

961961
/* Load pointer to C wrapper */
962-
jit_addr(JIT_R1, JIT_R1, LIGHTREC_REG_STATE);
962+
jit_add_state(JIT_R1, JIT_R1);
963963
jit_ldxi(JIT_R1, JIT_R1, lightrec_offset(c_wrappers));
964964

965965
jit_epilog();
@@ -1046,7 +1046,7 @@ static u32 lightrec_memset(struct lightrec_state *state)
10461046
return 0;
10471047
}
10481048

1049-
pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %u bytes\n",
1049+
pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %"PRIu32" bytes\n",
10501050
kunseg_pc, (uintptr_t)host, length);
10511051
memset(host, 0, length);
10521052

@@ -1624,7 +1624,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
16241624
for (i = 0; i < cstate->nb_local_branches; i++) {
16251625
struct lightrec_branch *branch = &cstate->local_branches[i];
16261626

1627-
pr_debug("Patch local branch to offset 0x%x\n",
1627+
pr_debug("Patch local branch to offset 0x%"PRIx32"\n",
16281628
branch->target << 2);
16291629

16301630
if (branch->target == 0) {

deps/lightrec/optimizer.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,7 +1172,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
11721172
break;
11731173
}
11741174

1175-
pr_debug("Multiply by power-of-two: %u\n",
1175+
pr_debug("Multiply by power-of-two: %"PRIu32"\n",
11761176
v[op->r.rt].value);
11771177

11781178
if (op->r.op == OP_SPECIAL_MULT)
@@ -1440,14 +1440,12 @@ static int lightrec_swap_load_delays(struct lightrec_state *state,
14401440
switch (next.i.op) {
14411441
case OP_LWL:
14421442
case OP_LWR:
1443-
case OP_REGIMM:
1444-
case OP_BEQ:
1445-
case OP_BNE:
1446-
case OP_BLEZ:
1447-
case OP_BGTZ:
14481443
continue;
14491444
}
14501445

1446+
if (has_delay_slot(next))
1447+
continue;
1448+
14511449
if (opcode_reads_register(next, c.i.rt)
14521450
&& !opcode_writes_register(next, c.i.rs)) {
14531451
pr_debug("Swapping opcodes at offset 0x%x to "
@@ -1481,7 +1479,7 @@ static int lightrec_local_branches(struct lightrec_state *state, struct block *b
14811479

14821480
offset = i + 1 + (s16)list->c.i.imm;
14831481

1484-
pr_debug("Found local branch to offset 0x%x\n", offset << 2);
1482+
pr_debug("Found local branch to offset 0x%"PRIx32"\n", offset << 2);
14851483

14861484
ds = get_delay_slot(block->opcode_list, i);
14871485
if (op_flag_load_delay(ds->flags) && opcode_is_load(ds->c)) {

deps/lightrec/regcache.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,15 @@
77
#define __REGCACHE_H__
88

99
#include "lightning-wrapper.h"
10+
#include "lightrec-config.h"
1011

11-
#define NUM_REGS (JIT_V_NUM - 1)
12-
#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
12+
#if defined(__sh__) && OPT_SH4_USE_GBR
13+
# define NUM_REGS JIT_V_NUM
14+
# define LIGHTREC_REG_STATE _GBR
15+
#else
16+
# define NUM_REGS (JIT_V_NUM - 1)
17+
# define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
18+
#endif
1319

1420
#if defined(__powerpc__)
1521
# define NUM_TEMPS JIT_R_NUM

0 commit comments

Comments
 (0)