ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.43
Committed: 2008-01-01T09:40:35Z (16 years, 5 months ago) by gbeauche
Branch: MAIN
CVS Tags: HEAD
Changes since 1.42: +1 -1 lines
Log Message:
Happy New Year!

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2008 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 /* NOTE: support for AMD64 assumes translation cache and other code
35 * buffers are allocated into a 32-bit address space because (i) B2/JIT
36 * code is not 64-bit clean and (ii) it's faster to resolve branches
37 * that way.
38 */
39 #if !defined(__i386__) && !defined(__x86_64__)
40 #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41 #endif
42
43 #define USE_MATCH 0
44
45 /* kludge for Brian, so he can compile under MSVC++ */
46 #define USE_NORMAL_CALLING_CONVENTION 0
47
48 #ifndef WIN32
49 #include <unistd.h>
50 #include <sys/types.h>
51 #include <sys/mman.h>
52 #endif
53
54 #include <stdlib.h>
55 #include <fcntl.h>
56 #include <errno.h>
57
58 #include "sysdeps.h"
59 #include "cpu_emulation.h"
60 #include "main.h"
61 #include "prefs.h"
62 #include "user_strings.h"
63 #include "vm_alloc.h"
64
65 #include "m68k.h"
66 #include "memory.h"
67 #include "readcpu.h"
68 #include "newcpu.h"
69 #include "comptbl.h"
70 #include "compiler/compemu.h"
71 #include "fpu/fpu.h"
72 #include "fpu/flags.h"
73
74 #define DEBUG 1
75 #include "debug.h"
76
77 #ifdef ENABLE_MON
78 #include "mon.h"
79 #endif
80
81 #ifndef WIN32
82 #define PROFILE_COMPILE_TIME 1
83 #define PROFILE_UNTRANSLATED_INSNS 1
84 #endif
85
86 #if defined(__x86_64__) && 0
87 #define RECORD_REGISTER_USAGE 1
88 #endif
89
90 #ifdef WIN32
91 #undef write_log
92 #define write_log dummy_write_log
93 static void dummy_write_log(const char *, ...) { }
94 #endif
95
96 #if JIT_DEBUG
97 #undef abort
98 #define abort() do { \
99 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100 exit(EXIT_FAILURE); \
101 } while (0)
102 #endif
103
104 #if RECORD_REGISTER_USAGE
105 static uint64 reg_count[16];
106 static int reg_count_local[16];
107
108 static int reg_count_compare(const void *ap, const void *bp)
109 {
110 const int a = *((int *)ap);
111 const int b = *((int *)bp);
112 return reg_count[b] - reg_count[a];
113 }
114 #endif
115
116 #if PROFILE_COMPILE_TIME
117 #include <time.h>
118 static uae_u32 compile_count = 0;
119 static clock_t compile_time = 0;
120 static clock_t emul_start_time = 0;
121 static clock_t emul_end_time = 0;
122 #endif
123
124 #if PROFILE_UNTRANSLATED_INSNS
125 const int untranslated_top_ten = 20;
126 static uae_u32 raw_cputbl_count[65536] = { 0, };
127 static uae_u16 opcode_nums[65536];
128
129 static int untranslated_compfn(const void *e1, const void *e2)
130 {
131 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132 }
133 #endif
134
135 static compop_func *compfunctbl[65536];
136 static compop_func *nfcompfunctbl[65536];
137 static cpuop_func *nfcpufunctbl[65536];
138 uae_u8* comp_pc_p;
139
140 // From newcpu.cpp
141 extern bool quit_program;
142
143 // gb-- Extra data for Basilisk II/JIT
144 #if JIT_DEBUG
145 static bool JITDebug = false; // Enable runtime disassemblers through mon?
146 #else
147 const bool JITDebug = false; // Don't use JIT debug mode at all
148 #endif
149 #if USE_INLINING
150 static bool follow_const_jumps = true; // Flag: translation through constant jumps
151 #else
152 const bool follow_const_jumps = false;
153 #endif
154
155 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
156 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
157 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
158 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
159 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
160 static bool have_cmov = false; // target has CMOV instructions ?
161 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
162 static bool have_rat_stall = true; // target has partial register stalls ?
163 const bool tune_alignment = true; // Tune code alignments for running CPU ?
164 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
165 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
166 static int align_loops = 32; // Align the start of loops
167 static int align_jumps = 32; // Align the start of jumps
168 static int optcount[10] = {
169 10, // How often a block has to be executed before it is translated
170 0, // How often to use naive translation
171 0, 0, 0, 0,
172 -1, -1, -1, -1
173 };
174
175 struct op_properties {
176 uae_u8 use_flags;
177 uae_u8 set_flags;
178 uae_u8 is_addx;
179 uae_u8 cflow;
180 };
181 static op_properties prop[65536];
182
183 static inline int end_block(uae_u32 opcode)
184 {
185 return (prop[opcode].cflow & fl_end_block);
186 }
187
188 static inline bool is_const_jump(uae_u32 opcode)
189 {
190 return (prop[opcode].cflow == fl_const_jump);
191 }
192
193 static inline bool may_trap(uae_u32 opcode)
194 {
195 return (prop[opcode].cflow & fl_trap);
196 }
197
198 static inline unsigned int cft_map (unsigned int f)
199 {
200 #ifndef HAVE_GET_WORD_UNSWAPPED
201 return f;
202 #else
203 return ((f >> 8) & 255) | ((f & 255) << 8);
204 #endif
205 }
206
207 uae_u8* start_pc_p;
208 uae_u32 start_pc;
209 uae_u32 current_block_pc_p;
210 static uintptr current_block_start_target;
211 uae_u32 needed_flags;
212 static uintptr next_pc_p;
213 static uintptr taken_pc_p;
214 static int branch_cc;
215 static int redo_current_block;
216
217 int segvcount=0;
218 int soft_flush_count=0;
219 int hard_flush_count=0;
220 int checksum_count=0;
221 static uae_u8* current_compile_p=NULL;
222 static uae_u8* max_compile_start;
223 static uae_u8* compiled_code=NULL;
224 static uae_s32 reg_alloc_run;
225 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
226 static uae_u8* popallspace=NULL;
227
228 void* pushall_call_handler=NULL;
229 static void* popall_do_nothing=NULL;
230 static void* popall_exec_nostats=NULL;
231 static void* popall_execute_normal=NULL;
232 static void* popall_cache_miss=NULL;
233 static void* popall_recompile_block=NULL;
234 static void* popall_check_checksum=NULL;
235
236 /* The 68k only ever executes from even addresses. So right now, we
237 * waste half the entries in this array
238 * UPDATE: We now use those entries to store the start of the linked
239 * lists that we maintain for each hash result.
240 */
241 cacheline cache_tags[TAGSIZE];
242 int letit=0;
243 blockinfo* hold_bi[MAX_HOLD_BI];
244 blockinfo* active;
245 blockinfo* dormant;
246
247 /* 68040 */
248 extern struct cputbl op_smalltbl_0_nf[];
249 extern struct comptbl op_smalltbl_0_comp_nf[];
250 extern struct comptbl op_smalltbl_0_comp_ff[];
251
252 /* 68020 + 68881 */
253 extern struct cputbl op_smalltbl_1_nf[];
254
255 /* 68020 */
256 extern struct cputbl op_smalltbl_2_nf[];
257
258 /* 68010 */
259 extern struct cputbl op_smalltbl_3_nf[];
260
261 /* 68000 */
262 extern struct cputbl op_smalltbl_4_nf[];
263
264 /* 68000 slow but compatible. */
265 extern struct cputbl op_smalltbl_5_nf[];
266
267 static void flush_icache_hard(int n);
268 static void flush_icache_lazy(int n);
269 static void flush_icache_none(int n);
270 void (*flush_icache)(int n) = flush_icache_none;
271
272
273
274 bigstate live;
275 smallstate empty_ss;
276 smallstate default_ss;
277 static int optlev;
278
279 static int writereg(int r, int size);
280 static void unlock2(int r);
281 static void setlock(int r);
282 static int readreg_specific(int r, int size, int spec);
283 static int writereg_specific(int r, int size, int spec);
284 static void prepare_for_call_1(void);
285 static void prepare_for_call_2(void);
286 static void align_target(uae_u32 a);
287
288 static uae_s32 nextused[VREGS];
289
290 uae_u32 m68k_pc_offset;
291
292 /* Some arithmetic ooperations can be optimized away if the operands
293 * are known to be constant. But that's only a good idea when the
294 * side effects they would have on the flags are not important. This
295 * variable indicates whether we need the side effects or not
296 */
297 uae_u32 needflags=0;
298
299 /* Flag handling is complicated.
300 *
301 * x86 instructions create flags, which quite often are exactly what we
302 * want. So at times, the "68k" flags are actually in the x86 flags.
303 *
304 * Then again, sometimes we do x86 instructions that clobber the x86
305 * flags, but don't represent a corresponding m68k instruction. In that
306 * case, we have to save them.
307 *
308 * We used to save them to the stack, but now store them back directly
309 * into the regflags.cznv of the traditional emulation. Thus some odd
310 * names.
311 *
312 * So flags can be in either of two places (used to be three; boy were
313 * things complicated back then!); And either place can contain either
314 * valid flags or invalid trash (and on the stack, there was also the
315 * option of "nothing at all", now gone). A couple of variables keep
316 * track of the respective states.
317 *
318 * To make things worse, we might or might not be interested in the flags.
319 * by default, we are, but a call to dont_care_flags can change that
320 * until the next call to live_flags. If we are not, pretty much whatever
321 * is in the register and/or the native flags is seen as valid.
322 */
323
324 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
325 {
326 return cache_tags[cl+1].bi;
327 }
328
329 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
330 {
331 blockinfo* bi=get_blockinfo(cacheline(addr));
332
333 while (bi) {
334 if (bi->pc_p==addr)
335 return bi;
336 bi=bi->next_same_cl;
337 }
338 return NULL;
339 }
340
341
342 /*******************************************************************
343 * All sorts of list related functions for all of the lists *
344 *******************************************************************/
345
346 static __inline__ void remove_from_cl_list(blockinfo* bi)
347 {
348 uae_u32 cl=cacheline(bi->pc_p);
349
350 if (bi->prev_same_cl_p)
351 *(bi->prev_same_cl_p)=bi->next_same_cl;
352 if (bi->next_same_cl)
353 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
354 if (cache_tags[cl+1].bi)
355 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
356 else
357 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
358 }
359
360 static __inline__ void remove_from_list(blockinfo* bi)
361 {
362 if (bi->prev_p)
363 *(bi->prev_p)=bi->next;
364 if (bi->next)
365 bi->next->prev_p=bi->prev_p;
366 }
367
368 static __inline__ void remove_from_lists(blockinfo* bi)
369 {
370 remove_from_list(bi);
371 remove_from_cl_list(bi);
372 }
373
374 static __inline__ void add_to_cl_list(blockinfo* bi)
375 {
376 uae_u32 cl=cacheline(bi->pc_p);
377
378 if (cache_tags[cl+1].bi)
379 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
380 bi->next_same_cl=cache_tags[cl+1].bi;
381
382 cache_tags[cl+1].bi=bi;
383 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
384
385 cache_tags[cl].handler=bi->handler_to_use;
386 }
387
388 static __inline__ void raise_in_cl_list(blockinfo* bi)
389 {
390 remove_from_cl_list(bi);
391 add_to_cl_list(bi);
392 }
393
394 static __inline__ void add_to_active(blockinfo* bi)
395 {
396 if (active)
397 active->prev_p=&(bi->next);
398 bi->next=active;
399
400 active=bi;
401 bi->prev_p=&active;
402 }
403
404 static __inline__ void add_to_dormant(blockinfo* bi)
405 {
406 if (dormant)
407 dormant->prev_p=&(bi->next);
408 bi->next=dormant;
409
410 dormant=bi;
411 bi->prev_p=&dormant;
412 }
413
414 static __inline__ void remove_dep(dependency* d)
415 {
416 if (d->prev_p)
417 *(d->prev_p)=d->next;
418 if (d->next)
419 d->next->prev_p=d->prev_p;
420 d->prev_p=NULL;
421 d->next=NULL;
422 }
423
424 /* This block's code is about to be thrown away, so it no longer
425 depends on anything else */
426 static __inline__ void remove_deps(blockinfo* bi)
427 {
428 remove_dep(&(bi->dep[0]));
429 remove_dep(&(bi->dep[1]));
430 }
431
432 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
433 {
434 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
435 }
436
437 /********************************************************************
438 * Soft flush handling support functions *
439 ********************************************************************/
440
441 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
442 {
443 //write_log("bi is %p\n",bi);
444 if (dh!=bi->direct_handler_to_use) {
445 dependency* x=bi->deplist;
446 //write_log("bi->deplist=%p\n",bi->deplist);
447 while (x) {
448 //write_log("x is %p\n",x);
449 //write_log("x->next is %p\n",x->next);
450 //write_log("x->prev_p is %p\n",x->prev_p);
451
452 if (x->jmp_off) {
453 adjust_jmpdep(x,dh);
454 }
455 x=x->next;
456 }
457 bi->direct_handler_to_use=dh;
458 }
459 }
460
461 static __inline__ void invalidate_block(blockinfo* bi)
462 {
463 int i;
464
465 bi->optlevel=0;
466 bi->count=optcount[0]-1;
467 bi->handler=NULL;
468 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
469 bi->direct_handler=NULL;
470 set_dhtu(bi,bi->direct_pen);
471 bi->needed_flags=0xff;
472 bi->status=BI_INVALID;
473 for (i=0;i<2;i++) {
474 bi->dep[i].jmp_off=NULL;
475 bi->dep[i].target=NULL;
476 }
477 remove_deps(bi);
478 }
479
480 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
481 {
482 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
483
484 Dif(!tbi) {
485 write_log("Could not create jmpdep!\n");
486 abort();
487 }
488 bi->dep[i].jmp_off=jmpaddr;
489 bi->dep[i].source=bi;
490 bi->dep[i].target=tbi;
491 bi->dep[i].next=tbi->deplist;
492 if (bi->dep[i].next)
493 bi->dep[i].next->prev_p=&(bi->dep[i].next);
494 bi->dep[i].prev_p=&(tbi->deplist);
495 tbi->deplist=&(bi->dep[i]);
496 }
497
498 static __inline__ void block_need_recompile(blockinfo * bi)
499 {
500 uae_u32 cl = cacheline(bi->pc_p);
501
502 set_dhtu(bi, bi->direct_pen);
503 bi->direct_handler = bi->direct_pen;
504
505 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
506 bi->handler = (cpuop_func *)popall_execute_normal;
507 if (bi == cache_tags[cl + 1].bi)
508 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
509 bi->status = BI_NEED_RECOMP;
510 }
511
512 static __inline__ void mark_callers_recompile(blockinfo * bi)
513 {
514 dependency *x = bi->deplist;
515
516 while (x) {
517 dependency *next = x->next; /* This disappears when we mark for
518 * recompilation and thus remove the
519 * blocks from the lists */
520 if (x->jmp_off) {
521 blockinfo *cbi = x->source;
522
523 Dif(cbi->status == BI_INVALID) {
524 // write_log("invalid block in dependency list\n"); // FIXME?
525 // abort();
526 }
527 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
528 block_need_recompile(cbi);
529 mark_callers_recompile(cbi);
530 }
531 else if (cbi->status == BI_COMPILING) {
532 redo_current_block = 1;
533 }
534 else if (cbi->status == BI_NEED_RECOMP) {
535 /* nothing */
536 }
537 else {
538 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
539 }
540 }
541 x = next;
542 }
543 }
544
545 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
546 {
547 blockinfo* bi=get_blockinfo_addr(addr);
548 int i;
549
550 if (!bi) {
551 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
552 if (hold_bi[i]) {
553 uae_u32 cl=cacheline(addr);
554
555 bi=hold_bi[i];
556 hold_bi[i]=NULL;
557 bi->pc_p=(uae_u8 *)addr;
558 invalidate_block(bi);
559 add_to_active(bi);
560 add_to_cl_list(bi);
561
562 }
563 }
564 }
565 if (!bi) {
566 write_log("Looking for blockinfo, can't find free one\n");
567 abort();
568 }
569 return bi;
570 }
571
572 static void prepare_block(blockinfo* bi);
573
574 /* Managment of blockinfos.
575
576 A blockinfo struct is allocated whenever a new block has to be
577 compiled. If the list of free blockinfos is empty, we allocate a new
578 pool of blockinfos and link the newly created blockinfos altogether
579 into the list of free blockinfos. Otherwise, we simply pop a structure
580 off the free list.
581
582 Blockinfo are lazily deallocated, i.e. chained altogether in the
583 list of free blockinfos whenvever a translation cache flush (hard or
584 soft) request occurs.
585 */
586
587 template< class T >
588 class LazyBlockAllocator
589 {
590 enum {
591 kPoolSize = 1 + 4096 / sizeof(T)
592 };
593 struct Pool {
594 T chunk[kPoolSize];
595 Pool * next;
596 };
597 Pool * mPools;
598 T * mChunks;
599 public:
600 LazyBlockAllocator() : mPools(0), mChunks(0) { }
601 ~LazyBlockAllocator();
602 T * acquire();
603 void release(T * const);
604 };
605
606 template< class T >
607 LazyBlockAllocator<T>::~LazyBlockAllocator()
608 {
609 Pool * currentPool = mPools;
610 while (currentPool) {
611 Pool * deadPool = currentPool;
612 currentPool = currentPool->next;
613 free(deadPool);
614 }
615 }
616
617 template< class T >
618 T * LazyBlockAllocator<T>::acquire()
619 {
620 if (!mChunks) {
621 // There is no chunk left, allocate a new pool and link the
622 // chunks into the free list
623 Pool * newPool = (Pool *)malloc(sizeof(Pool));
624 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
625 chunk->next = mChunks;
626 mChunks = chunk;
627 }
628 newPool->next = mPools;
629 mPools = newPool;
630 }
631 T * chunk = mChunks;
632 mChunks = chunk->next;
633 return chunk;
634 }
635
636 template< class T >
637 void LazyBlockAllocator<T>::release(T * const chunk)
638 {
639 chunk->next = mChunks;
640 mChunks = chunk;
641 }
642
643 template< class T >
644 class HardBlockAllocator
645 {
646 public:
647 T * acquire() {
648 T * data = (T *)current_compile_p;
649 current_compile_p += sizeof(T);
650 return data;
651 }
652
653 void release(T * const chunk) {
654 // Deallocated on invalidation
655 }
656 };
657
658 #if USE_SEPARATE_BIA
659 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
660 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
661 #else
662 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
663 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
664 #endif
665
666 static __inline__ checksum_info *alloc_checksum_info(void)
667 {
668 checksum_info *csi = ChecksumInfoAllocator.acquire();
669 csi->next = NULL;
670 return csi;
671 }
672
673 static __inline__ void free_checksum_info(checksum_info *csi)
674 {
675 csi->next = NULL;
676 ChecksumInfoAllocator.release(csi);
677 }
678
679 static __inline__ void free_checksum_info_chain(checksum_info *csi)
680 {
681 while (csi != NULL) {
682 checksum_info *csi2 = csi->next;
683 free_checksum_info(csi);
684 csi = csi2;
685 }
686 }
687
688 static __inline__ blockinfo *alloc_blockinfo(void)
689 {
690 blockinfo *bi = BlockInfoAllocator.acquire();
691 #if USE_CHECKSUM_INFO
692 bi->csi = NULL;
693 #endif
694 return bi;
695 }
696
697 static __inline__ void free_blockinfo(blockinfo *bi)
698 {
699 #if USE_CHECKSUM_INFO
700 free_checksum_info_chain(bi->csi);
701 bi->csi = NULL;
702 #endif
703 BlockInfoAllocator.release(bi);
704 }
705
706 static __inline__ void alloc_blockinfos(void)
707 {
708 int i;
709 blockinfo* bi;
710
711 for (i=0;i<MAX_HOLD_BI;i++) {
712 if (hold_bi[i])
713 return;
714 bi=hold_bi[i]=alloc_blockinfo();
715 prepare_block(bi);
716 }
717 }
718
719 /********************************************************************
720 * Functions to emit data into memory, and other general support *
721 ********************************************************************/
722
723 static uae_u8* target;
724
725 static void emit_init(void)
726 {
727 }
728
729 static __inline__ void emit_byte(uae_u8 x)
730 {
731 *target++=x;
732 }
733
734 static __inline__ void emit_word(uae_u16 x)
735 {
736 *((uae_u16*)target)=x;
737 target+=2;
738 }
739
740 static __inline__ void emit_long(uae_u32 x)
741 {
742 *((uae_u32*)target)=x;
743 target+=4;
744 }
745
746 static __inline__ void emit_quad(uae_u64 x)
747 {
748 *((uae_u64*)target)=x;
749 target+=8;
750 }
751
752 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
753 {
754 memcpy((uae_u8 *)target,block,blocklen);
755 target+=blocklen;
756 }
757
758 static __inline__ uae_u32 reverse32(uae_u32 v)
759 {
760 #if 1
761 // gb-- We have specialized byteswapping functions, just use them
762 return do_byteswap_32(v);
763 #else
764 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
765 #endif
766 }
767
768 /********************************************************************
769 * Getting the information about the target CPU *
770 ********************************************************************/
771
772 #include "codegen_x86.cpp"
773
774 void set_target(uae_u8* t)
775 {
776 target=t;
777 }
778
779 static __inline__ uae_u8* get_target_noopt(void)
780 {
781 return target;
782 }
783
784 __inline__ uae_u8* get_target(void)
785 {
786 return get_target_noopt();
787 }
788
789
790 /********************************************************************
791 * Flags status handling. EMIT TIME! *
792 ********************************************************************/
793
794 static void bt_l_ri_noclobber(R4 r, IMM i);
795
796 static void make_flags_live_internal(void)
797 {
798 if (live.flags_in_flags==VALID)
799 return;
800 Dif (live.flags_on_stack==TRASH) {
801 write_log("Want flags, got something on stack, but it is TRASH\n");
802 abort();
803 }
804 if (live.flags_on_stack==VALID) {
805 int tmp;
806 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
807 raw_reg_to_flags(tmp);
808 unlock2(tmp);
809
810 live.flags_in_flags=VALID;
811 return;
812 }
813 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
814 live.flags_in_flags,live.flags_on_stack);
815 abort();
816 }
817
818 static void flags_to_stack(void)
819 {
820 if (live.flags_on_stack==VALID)
821 return;
822 if (!live.flags_are_important) {
823 live.flags_on_stack=VALID;
824 return;
825 }
826 Dif (live.flags_in_flags!=VALID)
827 abort();
828 else {
829 int tmp;
830 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
831 raw_flags_to_reg(tmp);
832 unlock2(tmp);
833 }
834 live.flags_on_stack=VALID;
835 }
836
837 static __inline__ void clobber_flags(void)
838 {
839 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
840 flags_to_stack();
841 live.flags_in_flags=TRASH;
842 }
843
844 /* Prepare for leaving the compiled stuff */
845 static __inline__ void flush_flags(void)
846 {
847 flags_to_stack();
848 return;
849 }
850
851 int touchcnt;
852
853 /********************************************************************
854 * Partial register flushing for optimized calls *
855 ********************************************************************/
856
857 struct regusage {
858 uae_u16 rmask;
859 uae_u16 wmask;
860 };
861
862 static inline void ru_set(uae_u16 *mask, int reg)
863 {
864 #if USE_OPTIMIZED_CALLS
865 *mask |= 1 << reg;
866 #endif
867 }
868
869 static inline bool ru_get(const uae_u16 *mask, int reg)
870 {
871 #if USE_OPTIMIZED_CALLS
872 return (*mask & (1 << reg));
873 #else
874 /* Default: instruction reads & write to register */
875 return true;
876 #endif
877 }
878
879 static inline void ru_set_read(regusage *ru, int reg)
880 {
881 ru_set(&ru->rmask, reg);
882 }
883
884 static inline void ru_set_write(regusage *ru, int reg)
885 {
886 ru_set(&ru->wmask, reg);
887 }
888
889 static inline bool ru_read_p(const regusage *ru, int reg)
890 {
891 return ru_get(&ru->rmask, reg);
892 }
893
894 static inline bool ru_write_p(const regusage *ru, int reg)
895 {
896 return ru_get(&ru->wmask, reg);
897 }
898
899 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
900 wordsizes size, int write_mode)
901 {
902 switch (mode) {
903 case Areg:
904 reg += 8;
905 /* fall through */
906 case Dreg:
907 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
908 break;
909 case Ad16:
910 /* skip displacment */
911 m68k_pc_offset += 2;
912 case Aind:
913 case Aipi:
914 case Apdi:
915 ru_set_read(ru, reg+8);
916 break;
917 case Ad8r:
918 ru_set_read(ru, reg+8);
919 /* fall through */
920 case PC8r: {
921 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
922 reg = (dp >> 12) & 15;
923 ru_set_read(ru, reg);
924 if (dp & 0x100)
925 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
926 break;
927 }
928 case PC16:
929 case absw:
930 case imm0:
931 case imm1:
932 m68k_pc_offset += 2;
933 break;
934 case absl:
935 case imm2:
936 m68k_pc_offset += 4;
937 break;
938 case immi:
939 m68k_pc_offset += (size == sz_long) ? 4 : 2;
940 break;
941 }
942 }
943
944 /* TODO: split into a static initialization part and a dynamic one
945 (instructions depending on extension words) */
946 static void ru_fill(regusage *ru, uae_u32 opcode)
947 {
948 m68k_pc_offset += 2;
949
950 /* Default: no register is used or written to */
951 ru->rmask = 0;
952 ru->wmask = 0;
953
954 uae_u32 real_opcode = cft_map(opcode);
955 struct instr *dp = &table68k[real_opcode];
956
957 bool rw_dest = true;
958 bool handled = false;
959
960 /* Handle some instructions specifically */
961 uae_u16 reg, ext;
962 switch (dp->mnemo) {
963 case i_BFCHG:
964 case i_BFCLR:
965 case i_BFEXTS:
966 case i_BFEXTU:
967 case i_BFFFO:
968 case i_BFINS:
969 case i_BFSET:
970 case i_BFTST:
971 ext = comp_get_iword((m68k_pc_offset+=2)-2);
972 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
973 if (ext & 0x020) ru_set_read(ru, ext & 7);
974 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
975 if (dp->dmode == Dreg)
976 ru_set_read(ru, dp->dreg);
977 switch (dp->mnemo) {
978 case i_BFEXTS:
979 case i_BFEXTU:
980 case i_BFFFO:
981 ru_set_write(ru, (ext >> 12) & 7);
982 break;
983 case i_BFINS:
984 ru_set_read(ru, (ext >> 12) & 7);
985 /* fall through */
986 case i_BFCHG:
987 case i_BFCLR:
988 case i_BSET:
989 if (dp->dmode == Dreg)
990 ru_set_write(ru, dp->dreg);
991 break;
992 }
993 handled = true;
994 rw_dest = false;
995 break;
996
997 case i_BTST:
998 rw_dest = false;
999 break;
1000
1001 case i_CAS:
1002 {
1003 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1004 int Du = ext & 7;
1005 ru_set_read(ru, Du);
1006 int Dc = (ext >> 6) & 7;
1007 ru_set_read(ru, Dc);
1008 ru_set_write(ru, Dc);
1009 break;
1010 }
1011 case i_CAS2:
1012 {
1013 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1014 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1015 Rn1 = (ext >> 12) & 15;
1016 Du1 = (ext >> 6) & 7;
1017 Dc1 = ext & 7;
1018 ru_set_read(ru, Rn1);
1019 ru_set_read(ru, Du1);
1020 ru_set_read(ru, Dc1);
1021 ru_set_write(ru, Dc1);
1022 ext = comp_get_iword((m68k_pc_offset+=2)-2);
1023 Rn2 = (ext >> 12) & 15;
1024 Du2 = (ext >> 6) & 7;
1025 Dc2 = ext & 7;
1026 ru_set_read(ru, Rn2);
1027 ru_set_read(ru, Du2);
1028 ru_set_write(ru, Dc2);
1029 break;
1030 }
1031 case i_DIVL: case i_MULL:
1032 m68k_pc_offset += 2;
1033 break;
1034 case i_LEA:
1035 case i_MOVE: case i_MOVEA: case i_MOVE16:
1036 rw_dest = false;
1037 break;
1038 case i_PACK: case i_UNPK:
1039 rw_dest = false;
1040 m68k_pc_offset += 2;
1041 break;
1042 case i_TRAPcc:
1043 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1044 break;
1045 case i_RTR:
1046 /* do nothing, just for coverage debugging */
1047 break;
1048 /* TODO: handle EXG instruction */
1049 }
1050
1051 /* Handle A-Traps better */
1052 if ((real_opcode & 0xf000) == 0xa000) {
1053 handled = true;
1054 }
1055
1056 /* Handle EmulOps better */
1057 if ((real_opcode & 0xff00) == 0x7100) {
1058 handled = true;
1059 ru->rmask = 0xffff;
1060 ru->wmask = 0;
1061 }
1062
1063 if (dp->suse && !handled)
1064 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1065
1066 if (dp->duse && !handled)
1067 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1068
1069 if (rw_dest)
1070 ru->rmask |= ru->wmask;
1071
1072 handled = handled || dp->suse || dp->duse;
1073
1074 /* Mark all registers as used/written if the instruction may trap */
1075 if (may_trap(opcode)) {
1076 handled = true;
1077 ru->rmask = 0xffff;
1078 ru->wmask = 0xffff;
1079 }
1080
1081 if (!handled) {
1082 write_log("ru_fill: %04x = { %04x, %04x }\n",
1083 real_opcode, ru->rmask, ru->wmask);
1084 abort();
1085 }
1086 }
1087
1088 /********************************************************************
1089 * register allocation per block logging *
1090 ********************************************************************/
1091
1092 static uae_s8 vstate[VREGS];
1093 static uae_s8 vwritten[VREGS];
1094 static uae_s8 nstate[N_REGS];
1095
1096 #define L_UNKNOWN -127
1097 #define L_UNAVAIL -1
1098 #define L_NEEDED -2
1099 #define L_UNNEEDED -3
1100
1101 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1102 {
1103 int i;
1104
1105 for (i = 0; i < VREGS; i++)
1106 s->virt[i] = vstate[i];
1107 for (i = 0; i < N_REGS; i++)
1108 s->nat[i] = nstate[i];
1109 }
1110
1111 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1112 {
1113 int i;
1114 int reverse = 0;
1115
1116 for (i = 0; i < VREGS; i++) {
1117 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1118 return 1;
1119 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1120 reverse++;
1121 }
1122 for (i = 0; i < N_REGS; i++) {
1123 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1124 return 1;
1125 if (nstate[i] < 0 && s->nat[i] >= 0)
1126 reverse++;
1127 }
1128 if (reverse >= 2 && USE_MATCH)
1129 return 1; /* In this case, it might be worth recompiling the
1130 * callers */
1131 return 0;
1132 }
1133
1134 static __inline__ void log_startblock(void)
1135 {
1136 int i;
1137
1138 for (i = 0; i < VREGS; i++) {
1139 vstate[i] = L_UNKNOWN;
1140 vwritten[i] = 0;
1141 }
1142 for (i = 0; i < N_REGS; i++)
1143 nstate[i] = L_UNKNOWN;
1144 }
1145
1146 /* Using an n-reg for a temp variable */
1147 static __inline__ void log_isused(int n)
1148 {
1149 if (nstate[n] == L_UNKNOWN)
1150 nstate[n] = L_UNAVAIL;
1151 }
1152
1153 static __inline__ void log_visused(int r)
1154 {
1155 if (vstate[r] == L_UNKNOWN)
1156 vstate[r] = L_NEEDED;
1157 }
1158
1159 static __inline__ void do_load_reg(int n, int r)
1160 {
1161 if (r == FLAGTMP)
1162 raw_load_flagreg(n, r);
1163 else if (r == FLAGX)
1164 raw_load_flagx(n, r);
1165 else
1166 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1167 }
1168
1169 static __inline__ void check_load_reg(int n, int r)
1170 {
1171 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1172 }
1173
1174 static __inline__ void log_vwrite(int r)
1175 {
1176 vwritten[r] = 1;
1177 }
1178
1179 /* Using an n-reg to hold a v-reg */
1180 static __inline__ void log_isreg(int n, int r)
1181 {
1182 static int count = 0;
1183
1184 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1185 nstate[n] = r;
1186 else {
1187 do_load_reg(n, r);
1188 if (nstate[n] == L_UNKNOWN)
1189 nstate[n] = L_UNAVAIL;
1190 }
1191 if (vstate[r] == L_UNKNOWN)
1192 vstate[r] = L_NEEDED;
1193 }
1194
1195 static __inline__ void log_clobberreg(int r)
1196 {
1197 if (vstate[r] == L_UNKNOWN)
1198 vstate[r] = L_UNNEEDED;
1199 }
1200
1201 /* This ends all possibility of clever register allocation */
1202
1203 static __inline__ void log_flush(void)
1204 {
1205 int i;
1206
1207 for (i = 0; i < VREGS; i++)
1208 if (vstate[i] == L_UNKNOWN)
1209 vstate[i] = L_NEEDED;
1210 for (i = 0; i < N_REGS; i++)
1211 if (nstate[i] == L_UNKNOWN)
1212 nstate[i] = L_UNAVAIL;
1213 }
1214
1215 static __inline__ void log_dump(void)
1216 {
1217 int i;
1218
1219 return;
1220
1221 write_log("----------------------\n");
1222 for (i = 0; i < N_REGS; i++) {
1223 switch (nstate[i]) {
1224 case L_UNKNOWN:
1225 write_log("Nat %d : UNKNOWN\n", i);
1226 break;
1227 case L_UNAVAIL:
1228 write_log("Nat %d : UNAVAIL\n", i);
1229 break;
1230 default:
1231 write_log("Nat %d : %d\n", i, nstate[i]);
1232 break;
1233 }
1234 }
1235 for (i = 0; i < VREGS; i++) {
1236 if (vstate[i] == L_UNNEEDED)
1237 write_log("Virt %d: UNNEEDED\n", i);
1238 }
1239 }
1240
1241 /********************************************************************
1242 * register status handling. EMIT TIME! *
1243 ********************************************************************/
1244
1245 static __inline__ void set_status(int r, int status)
1246 {
1247 if (status == ISCONST)
1248 log_clobberreg(r);
1249 live.state[r].status=status;
1250 }
1251
1252 static __inline__ int isinreg(int r)
1253 {
1254 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1255 }
1256
1257 static __inline__ void adjust_nreg(int r, uae_u32 val)
1258 {
1259 if (!val)
1260 return;
1261 raw_lea_l_brr(r,r,val);
1262 }
1263
1264 static void tomem(int r)
1265 {
1266 int rr=live.state[r].realreg;
1267
1268 if (isinreg(r)) {
1269 if (live.state[r].val && live.nat[rr].nholds==1
1270 && !live.nat[rr].locked) {
1271 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1272 // live.state[r].val,r,rr,target);
1273 adjust_nreg(rr,live.state[r].val);
1274 live.state[r].val=0;
1275 live.state[r].dirtysize=4;
1276 set_status(r,DIRTY);
1277 }
1278 }
1279
1280 if (live.state[r].status==DIRTY) {
1281 switch (live.state[r].dirtysize) {
1282 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1283 case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1284 case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1285 default: abort();
1286 }
1287 log_vwrite(r);
1288 set_status(r,CLEAN);
1289 live.state[r].dirtysize=0;
1290 }
1291 }
1292
1293 static __inline__ int isconst(int r)
1294 {
1295 return live.state[r].status==ISCONST;
1296 }
1297
1298 int is_const(int r)
1299 {
1300 return isconst(r);
1301 }
1302
1303 static __inline__ void writeback_const(int r)
1304 {
1305 if (!isconst(r))
1306 return;
1307 Dif (live.state[r].needflush==NF_HANDLER) {
1308 write_log("Trying to write back constant NF_HANDLER!\n");
1309 abort();
1310 }
1311
1312 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1313 log_vwrite(r);
1314 live.state[r].val=0;
1315 set_status(r,INMEM);
1316 }
1317
1318 static __inline__ void tomem_c(int r)
1319 {
1320 if (isconst(r)) {
1321 writeback_const(r);
1322 }
1323 else
1324 tomem(r);
1325 }
1326
1327 static void evict(int r)
1328 {
1329 int rr;
1330
1331 if (!isinreg(r))
1332 return;
1333 tomem(r);
1334 rr=live.state[r].realreg;
1335
1336 Dif (live.nat[rr].locked &&
1337 live.nat[rr].nholds==1) {
1338 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1339 abort();
1340 }
1341
1342 live.nat[rr].nholds--;
1343 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1344 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1345 int thisind=live.state[r].realind;
1346
1347 live.nat[rr].holds[thisind]=topreg;
1348 live.state[topreg].realind=thisind;
1349 }
1350 live.state[r].realreg=-1;
1351 set_status(r,INMEM);
1352 }
1353
1354 static __inline__ void free_nreg(int r)
1355 {
1356 int i=live.nat[r].nholds;
1357
1358 while (i) {
1359 int vr;
1360
1361 --i;
1362 vr=live.nat[r].holds[i];
1363 evict(vr);
1364 }
1365 Dif (live.nat[r].nholds!=0) {
1366 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1367 abort();
1368 }
1369 }
1370
1371 /* Use with care! */
1372 static __inline__ void isclean(int r)
1373 {
1374 if (!isinreg(r))
1375 return;
1376 live.state[r].validsize=4;
1377 live.state[r].dirtysize=0;
1378 live.state[r].val=0;
1379 set_status(r,CLEAN);
1380 }
1381
1382 static __inline__ void disassociate(int r)
1383 {
1384 isclean(r);
1385 evict(r);
1386 }
1387
1388 static __inline__ void set_const(int r, uae_u32 val)
1389 {
1390 disassociate(r);
1391 live.state[r].val=val;
1392 set_status(r,ISCONST);
1393 }
1394
1395 static __inline__ uae_u32 get_offset(int r)
1396 {
1397 return live.state[r].val;
1398 }
1399
1400 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1401 {
1402 int bestreg;
1403 uae_s32 when;
1404 int i;
1405 uae_s32 badness=0; /* to shut up gcc */
1406 bestreg=-1;
1407 when=2000000000;
1408
1409 /* XXX use a regalloc_order table? */
1410 for (i=0;i<N_REGS;i++) {
1411 badness=live.nat[i].touched;
1412 if (live.nat[i].nholds==0)
1413 badness=0;
1414 if (i==hint)
1415 badness-=200000000;
1416 if (!live.nat[i].locked && badness<when) {
1417 if ((size==1 && live.nat[i].canbyte) ||
1418 (size==2 && live.nat[i].canword) ||
1419 (size==4)) {
1420 bestreg=i;
1421 when=badness;
1422 if (live.nat[i].nholds==0 && hint<0)
1423 break;
1424 if (i==hint)
1425 break;
1426 }
1427 }
1428 }
1429 Dif (bestreg==-1)
1430 abort();
1431
1432 if (live.nat[bestreg].nholds>0) {
1433 free_nreg(bestreg);
1434 }
1435 if (isinreg(r)) {
1436 int rr=live.state[r].realreg;
1437 /* This will happen if we read a partially dirty register at a
1438 bigger size */
1439 Dif (willclobber || live.state[r].validsize>=size)
1440 abort();
1441 Dif (live.nat[rr].nholds!=1)
1442 abort();
1443 if (size==4 && live.state[r].validsize==2) {
1444 log_isused(bestreg);
1445 log_visused(r);
1446 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1447 raw_bswap_32(bestreg);
1448 raw_zero_extend_16_rr(rr,rr);
1449 raw_zero_extend_16_rr(bestreg,bestreg);
1450 raw_bswap_32(bestreg);
1451 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1452 live.state[r].validsize=4;
1453 live.nat[rr].touched=touchcnt++;
1454 return rr;
1455 }
1456 if (live.state[r].validsize==1) {
1457 /* Nothing yet */
1458 }
1459 evict(r);
1460 }
1461
1462 if (!willclobber) {
1463 if (live.state[r].status!=UNDEF) {
1464 if (isconst(r)) {
1465 raw_mov_l_ri(bestreg,live.state[r].val);
1466 live.state[r].val=0;
1467 live.state[r].dirtysize=4;
1468 set_status(r,DIRTY);
1469 log_isused(bestreg);
1470 }
1471 else {
1472 log_isreg(bestreg, r); /* This will also load it! */
1473 live.state[r].dirtysize=0;
1474 set_status(r,CLEAN);
1475 }
1476 }
1477 else {
1478 live.state[r].val=0;
1479 live.state[r].dirtysize=0;
1480 set_status(r,CLEAN);
1481 log_isused(bestreg);
1482 }
1483 live.state[r].validsize=4;
1484 }
1485 else { /* this is the easiest way, but not optimal. FIXME! */
1486 /* Now it's trickier, but hopefully still OK */
1487 if (!isconst(r) || size==4) {
1488 live.state[r].validsize=size;
1489 live.state[r].dirtysize=size;
1490 live.state[r].val=0;
1491 set_status(r,DIRTY);
1492 if (size == 4) {
1493 log_clobberreg(r);
1494 log_isused(bestreg);
1495 }
1496 else {
1497 log_visused(r);
1498 log_isused(bestreg);
1499 }
1500 }
1501 else {
1502 if (live.state[r].status!=UNDEF)
1503 raw_mov_l_ri(bestreg,live.state[r].val);
1504 live.state[r].val=0;
1505 live.state[r].validsize=4;
1506 live.state[r].dirtysize=4;
1507 set_status(r,DIRTY);
1508 log_isused(bestreg);
1509 }
1510 }
1511 live.state[r].realreg=bestreg;
1512 live.state[r].realind=live.nat[bestreg].nholds;
1513 live.nat[bestreg].touched=touchcnt++;
1514 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1515 live.nat[bestreg].nholds++;
1516
1517 return bestreg;
1518 }
1519
1520 static int alloc_reg(int r, int size, int willclobber)
1521 {
1522 return alloc_reg_hinted(r,size,willclobber,-1);
1523 }
1524
1525 static void unlock2(int r)
1526 {
1527 Dif (!live.nat[r].locked)
1528 abort();
1529 live.nat[r].locked--;
1530 }
1531
1532 static void setlock(int r)
1533 {
1534 live.nat[r].locked++;
1535 }
1536
1537
1538 static void mov_nregs(int d, int s)
1539 {
1540 int ns=live.nat[s].nholds;
1541 int nd=live.nat[d].nholds;
1542 int i;
1543
1544 if (s==d)
1545 return;
1546
1547 if (nd>0)
1548 free_nreg(d);
1549
1550 log_isused(d);
1551 raw_mov_l_rr(d,s);
1552
1553 for (i=0;i<live.nat[s].nholds;i++) {
1554 int vs=live.nat[s].holds[i];
1555
1556 live.state[vs].realreg=d;
1557 live.state[vs].realind=i;
1558 live.nat[d].holds[i]=vs;
1559 }
1560 live.nat[d].nholds=live.nat[s].nholds;
1561
1562 live.nat[s].nholds=0;
1563 }
1564
1565
1566 static __inline__ void make_exclusive(int r, int size, int spec)
1567 {
1568 int clobber;
1569 reg_status oldstate;
1570 int rr=live.state[r].realreg;
1571 int nr;
1572 int nind;
1573 int ndirt=0;
1574 int i;
1575
1576 if (!isinreg(r))
1577 return;
1578 if (live.nat[rr].nholds==1)
1579 return;
1580 for (i=0;i<live.nat[rr].nholds;i++) {
1581 int vr=live.nat[rr].holds[i];
1582 if (vr!=r &&
1583 (live.state[vr].status==DIRTY || live.state[vr].val))
1584 ndirt++;
1585 }
1586 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1587 /* Everything else is clean, so let's keep this register */
1588 for (i=0;i<live.nat[rr].nholds;i++) {
1589 int vr=live.nat[rr].holds[i];
1590 if (vr!=r) {
1591 evict(vr);
1592 i--; /* Try that index again! */
1593 }
1594 }
1595 Dif (live.nat[rr].nholds!=1) {
1596 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1597 rr,live.nat[rr].nholds,r);
1598 abort();
1599 }
1600 return;
1601 }
1602
1603 /* We have to split the register */
1604 oldstate=live.state[r];
1605
1606 setlock(rr); /* Make sure this doesn't go away */
1607 /* Forget about r being in the register rr */
1608 disassociate(r);
1609 /* Get a new register, that we will clobber completely */
1610 if (oldstate.status==DIRTY) {
1611 /* If dirtysize is <4, we need a register that can handle the
1612 eventual smaller memory store! Thanks to Quake68k for exposing
1613 this detail ;-) */
1614 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1615 }
1616 else {
1617 nr=alloc_reg_hinted(r,4,1,spec);
1618 }
1619 nind=live.state[r].realind;
1620 live.state[r]=oldstate; /* Keep all the old state info */
1621 live.state[r].realreg=nr;
1622 live.state[r].realind=nind;
1623
1624 if (size<live.state[r].validsize) {
1625 if (live.state[r].val) {
1626 /* Might as well compensate for the offset now */
1627 raw_lea_l_brr(nr,rr,oldstate.val);
1628 live.state[r].val=0;
1629 live.state[r].dirtysize=4;
1630 set_status(r,DIRTY);
1631 }
1632 else
1633 raw_mov_l_rr(nr,rr); /* Make another copy */
1634 }
1635 unlock2(rr);
1636 }
1637
1638 static __inline__ void add_offset(int r, uae_u32 off)
1639 {
1640 live.state[r].val+=off;
1641 }
1642
1643 static __inline__ void remove_offset(int r, int spec)
1644 {
1645 reg_status oldstate;
1646 int rr;
1647
1648 if (isconst(r))
1649 return;
1650 if (live.state[r].val==0)
1651 return;
1652 if (isinreg(r) && live.state[r].validsize<4)
1653 evict(r);
1654
1655 if (!isinreg(r))
1656 alloc_reg_hinted(r,4,0,spec);
1657
1658 Dif (live.state[r].validsize!=4) {
1659 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1660 abort();
1661 }
1662 make_exclusive(r,0,-1);
1663 /* make_exclusive might have done the job already */
1664 if (live.state[r].val==0)
1665 return;
1666
1667 rr=live.state[r].realreg;
1668
1669 if (live.nat[rr].nholds==1) {
1670 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1671 // live.state[r].val,r,rr,target);
1672 adjust_nreg(rr,live.state[r].val);
1673 live.state[r].dirtysize=4;
1674 live.state[r].val=0;
1675 set_status(r,DIRTY);
1676 return;
1677 }
1678 write_log("Failed in remove_offset\n");
1679 abort();
1680 }
1681
1682 static __inline__ void remove_all_offsets(void)
1683 {
1684 int i;
1685
1686 for (i=0;i<VREGS;i++)
1687 remove_offset(i,-1);
1688 }
1689
1690 static inline void flush_reg_count(void)
1691 {
1692 #if RECORD_REGISTER_USAGE
1693 for (int r = 0; r < 16; r++)
1694 if (reg_count_local[r])
1695 ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1696 #endif
1697 }
1698
1699 static inline void record_register(int r)
1700 {
1701 #if RECORD_REGISTER_USAGE
1702 if (r < 16)
1703 reg_count_local[r]++;
1704 #endif
1705 }
1706
1707 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1708 {
1709 int n;
1710 int answer=-1;
1711
1712 record_register(r);
1713 if (live.state[r].status==UNDEF) {
1714 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1715 }
1716 if (!can_offset)
1717 remove_offset(r,spec);
1718
1719 if (isinreg(r) && live.state[r].validsize>=size) {
1720 n=live.state[r].realreg;
1721 switch(size) {
1722 case 1:
1723 if (live.nat[n].canbyte || spec>=0) {
1724 answer=n;
1725 }
1726 break;
1727 case 2:
1728 if (live.nat[n].canword || spec>=0) {
1729 answer=n;
1730 }
1731 break;
1732 case 4:
1733 answer=n;
1734 break;
1735 default: abort();
1736 }
1737 if (answer<0)
1738 evict(r);
1739 }
1740 /* either the value was in memory to start with, or it was evicted and
1741 is in memory now */
1742 if (answer<0) {
1743 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1744 }
1745
1746 if (spec>=0 && spec!=answer) {
1747 /* Too bad */
1748 mov_nregs(spec,answer);
1749 answer=spec;
1750 }
1751 live.nat[answer].locked++;
1752 live.nat[answer].touched=touchcnt++;
1753 return answer;
1754 }
1755
1756
1757
1758 static int readreg(int r, int size)
1759 {
1760 return readreg_general(r,size,-1,0);
1761 }
1762
1763 static int readreg_specific(int r, int size, int spec)
1764 {
1765 return readreg_general(r,size,spec,0);
1766 }
1767
1768 static int readreg_offset(int r, int size)
1769 {
1770 return readreg_general(r,size,-1,1);
1771 }
1772
1773 /* writereg_general(r, size, spec)
1774 *
1775 * INPUT
1776 * - r : mid-layer register
1777 * - size : requested size (1/2/4)
1778 * - spec : -1 if find or make a register free, otherwise specifies
1779 * the physical register to use in any case
1780 *
1781 * OUTPUT
1782 * - hard (physical, x86 here) register allocated to virtual register r
1783 */
1784 static __inline__ int writereg_general(int r, int size, int spec)
1785 {
1786 int n;
1787 int answer=-1;
1788
1789 record_register(r);
1790 if (size<4) {
1791 remove_offset(r,spec);
1792 }
1793
1794 make_exclusive(r,size,spec);
1795 if (isinreg(r)) {
1796 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1797 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1798 n=live.state[r].realreg;
1799
1800 Dif (live.nat[n].nholds!=1)
1801 abort();
1802 switch(size) {
1803 case 1:
1804 if (live.nat[n].canbyte || spec>=0) {
1805 live.state[r].dirtysize=ndsize;
1806 live.state[r].validsize=nvsize;
1807 answer=n;
1808 }
1809 break;
1810 case 2:
1811 if (live.nat[n].canword || spec>=0) {
1812 live.state[r].dirtysize=ndsize;
1813 live.state[r].validsize=nvsize;
1814 answer=n;
1815 }
1816 break;
1817 case 4:
1818 live.state[r].dirtysize=ndsize;
1819 live.state[r].validsize=nvsize;
1820 answer=n;
1821 break;
1822 default: abort();
1823 }
1824 if (answer<0)
1825 evict(r);
1826 }
1827 /* either the value was in memory to start with, or it was evicted and
1828 is in memory now */
1829 if (answer<0) {
1830 answer=alloc_reg_hinted(r,size,1,spec);
1831 }
1832 if (spec>=0 && spec!=answer) {
1833 mov_nregs(spec,answer);
1834 answer=spec;
1835 }
1836 if (live.state[r].status==UNDEF)
1837 live.state[r].validsize=4;
1838 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1839 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1840
1841 live.nat[answer].locked++;
1842 live.nat[answer].touched=touchcnt++;
1843 if (size==4) {
1844 live.state[r].val=0;
1845 }
1846 else {
1847 Dif (live.state[r].val) {
1848 write_log("Problem with val\n");
1849 abort();
1850 }
1851 }
1852 set_status(r,DIRTY);
1853 return answer;
1854 }
1855
1856 static int writereg(int r, int size)
1857 {
1858 return writereg_general(r,size,-1);
1859 }
1860
1861 static int writereg_specific(int r, int size, int spec)
1862 {
1863 return writereg_general(r,size,spec);
1864 }
1865
1866 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1867 {
1868 int n;
1869 int answer=-1;
1870
1871 record_register(r);
1872 if (live.state[r].status==UNDEF) {
1873 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1874 }
1875 remove_offset(r,spec);
1876 make_exclusive(r,0,spec);
1877
1878 Dif (wsize<rsize) {
1879 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1880 abort();
1881 }
1882 if (isinreg(r) && live.state[r].validsize>=rsize) {
1883 n=live.state[r].realreg;
1884 Dif (live.nat[n].nholds!=1)
1885 abort();
1886
1887 switch(rsize) {
1888 case 1:
1889 if (live.nat[n].canbyte || spec>=0) {
1890 answer=n;
1891 }
1892 break;
1893 case 2:
1894 if (live.nat[n].canword || spec>=0) {
1895 answer=n;
1896 }
1897 break;
1898 case 4:
1899 answer=n;
1900 break;
1901 default: abort();
1902 }
1903 if (answer<0)
1904 evict(r);
1905 }
1906 /* either the value was in memory to start with, or it was evicted and
1907 is in memory now */
1908 if (answer<0) {
1909 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1910 }
1911
1912 if (spec>=0 && spec!=answer) {
1913 /* Too bad */
1914 mov_nregs(spec,answer);
1915 answer=spec;
1916 }
1917 if (wsize>live.state[r].dirtysize)
1918 live.state[r].dirtysize=wsize;
1919 if (wsize>live.state[r].validsize)
1920 live.state[r].validsize=wsize;
1921 set_status(r,DIRTY);
1922
1923 live.nat[answer].locked++;
1924 live.nat[answer].touched=touchcnt++;
1925
1926 Dif (live.state[r].val) {
1927 write_log("Problem with val(rmw)\n");
1928 abort();
1929 }
1930 return answer;
1931 }
1932
1933 static int rmw(int r, int wsize, int rsize)
1934 {
1935 return rmw_general(r,wsize,rsize,-1);
1936 }
1937
1938 static int rmw_specific(int r, int wsize, int rsize, int spec)
1939 {
1940 return rmw_general(r,wsize,rsize,spec);
1941 }
1942
1943
1944 /* needed for restoring the carry flag on non-P6 cores */
1945 static void bt_l_ri_noclobber(R4 r, IMM i)
1946 {
1947 int size=4;
1948 if (i<16)
1949 size=2;
1950 r=readreg(r,size);
1951 raw_bt_l_ri(r,i);
1952 unlock2(r);
1953 }
1954
1955 /********************************************************************
1956 * FPU register status handling. EMIT TIME! *
1957 ********************************************************************/
1958
1959 static void f_tomem(int r)
1960 {
1961 if (live.fate[r].status==DIRTY) {
1962 #if USE_LONG_DOUBLE
1963 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1964 #else
1965 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1966 #endif
1967 live.fate[r].status=CLEAN;
1968 }
1969 }
1970
1971 static void f_tomem_drop(int r)
1972 {
1973 if (live.fate[r].status==DIRTY) {
1974 #if USE_LONG_DOUBLE
1975 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1976 #else
1977 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1978 #endif
1979 live.fate[r].status=INMEM;
1980 }
1981 }
1982
1983
1984 static __inline__ int f_isinreg(int r)
1985 {
1986 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1987 }
1988
1989 static void f_evict(int r)
1990 {
1991 int rr;
1992
1993 if (!f_isinreg(r))
1994 return;
1995 rr=live.fate[r].realreg;
1996 if (live.fat[rr].nholds==1)
1997 f_tomem_drop(r);
1998 else
1999 f_tomem(r);
2000
2001 Dif (live.fat[rr].locked &&
2002 live.fat[rr].nholds==1) {
2003 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2004 abort();
2005 }
2006
2007 live.fat[rr].nholds--;
2008 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2009 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2010 int thisind=live.fate[r].realind;
2011 live.fat[rr].holds[thisind]=topreg;
2012 live.fate[topreg].realind=thisind;
2013 }
2014 live.fate[r].status=INMEM;
2015 live.fate[r].realreg=-1;
2016 }
2017
2018 static __inline__ void f_free_nreg(int r)
2019 {
2020 int i=live.fat[r].nholds;
2021
2022 while (i) {
2023 int vr;
2024
2025 --i;
2026 vr=live.fat[r].holds[i];
2027 f_evict(vr);
2028 }
2029 Dif (live.fat[r].nholds!=0) {
2030 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2031 abort();
2032 }
2033 }
2034
2035
2036 /* Use with care! */
2037 static __inline__ void f_isclean(int r)
2038 {
2039 if (!f_isinreg(r))
2040 return;
2041 live.fate[r].status=CLEAN;
2042 }
2043
2044 static __inline__ void f_disassociate(int r)
2045 {
2046 f_isclean(r);
2047 f_evict(r);
2048 }
2049
2050
2051
2052 static int f_alloc_reg(int r, int willclobber)
2053 {
2054 int bestreg;
2055 uae_s32 when;
2056 int i;
2057 uae_s32 badness;
2058 bestreg=-1;
2059 when=2000000000;
2060 for (i=N_FREGS;i--;) {
2061 badness=live.fat[i].touched;
2062 if (live.fat[i].nholds==0)
2063 badness=0;
2064
2065 if (!live.fat[i].locked && badness<when) {
2066 bestreg=i;
2067 when=badness;
2068 if (live.fat[i].nholds==0)
2069 break;
2070 }
2071 }
2072 Dif (bestreg==-1)
2073 abort();
2074
2075 if (live.fat[bestreg].nholds>0) {
2076 f_free_nreg(bestreg);
2077 }
2078 if (f_isinreg(r)) {
2079 f_evict(r);
2080 }
2081
2082 if (!willclobber) {
2083 if (live.fate[r].status!=UNDEF) {
2084 #if USE_LONG_DOUBLE
2085 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2086 #else
2087 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2088 #endif
2089 }
2090 live.fate[r].status=CLEAN;
2091 }
2092 else {
2093 live.fate[r].status=DIRTY;
2094 }
2095 live.fate[r].realreg=bestreg;
2096 live.fate[r].realind=live.fat[bestreg].nholds;
2097 live.fat[bestreg].touched=touchcnt++;
2098 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2099 live.fat[bestreg].nholds++;
2100
2101 return bestreg;
2102 }
2103
2104 static void f_unlock(int r)
2105 {
2106 Dif (!live.fat[r].locked)
2107 abort();
2108 live.fat[r].locked--;
2109 }
2110
2111 static void f_setlock(int r)
2112 {
2113 live.fat[r].locked++;
2114 }
2115
2116 static __inline__ int f_readreg(int r)
2117 {
2118 int n;
2119 int answer=-1;
2120
2121 if (f_isinreg(r)) {
2122 n=live.fate[r].realreg;
2123 answer=n;
2124 }
2125 /* either the value was in memory to start with, or it was evicted and
2126 is in memory now */
2127 if (answer<0)
2128 answer=f_alloc_reg(r,0);
2129
2130 live.fat[answer].locked++;
2131 live.fat[answer].touched=touchcnt++;
2132 return answer;
2133 }
2134
2135 static __inline__ void f_make_exclusive(int r, int clobber)
2136 {
2137 freg_status oldstate;
2138 int rr=live.fate[r].realreg;
2139 int nr;
2140 int nind;
2141 int ndirt=0;
2142 int i;
2143
2144 if (!f_isinreg(r))
2145 return;
2146 if (live.fat[rr].nholds==1)
2147 return;
2148 for (i=0;i<live.fat[rr].nholds;i++) {
2149 int vr=live.fat[rr].holds[i];
2150 if (vr!=r && live.fate[vr].status==DIRTY)
2151 ndirt++;
2152 }
2153 if (!ndirt && !live.fat[rr].locked) {
2154 /* Everything else is clean, so let's keep this register */
2155 for (i=0;i<live.fat[rr].nholds;i++) {
2156 int vr=live.fat[rr].holds[i];
2157 if (vr!=r) {
2158 f_evict(vr);
2159 i--; /* Try that index again! */
2160 }
2161 }
2162 Dif (live.fat[rr].nholds!=1) {
2163 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2164 for (i=0;i<live.fat[rr].nholds;i++) {
2165 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2166 live.fate[live.fat[rr].holds[i]].realreg,
2167 live.fate[live.fat[rr].holds[i]].realind);
2168 }
2169 write_log("\n");
2170 abort();
2171 }
2172 return;
2173 }
2174
2175 /* We have to split the register */
2176 oldstate=live.fate[r];
2177
2178 f_setlock(rr); /* Make sure this doesn't go away */
2179 /* Forget about r being in the register rr */
2180 f_disassociate(r);
2181 /* Get a new register, that we will clobber completely */
2182 nr=f_alloc_reg(r,1);
2183 nind=live.fate[r].realind;
2184 if (!clobber)
2185 raw_fmov_rr(nr,rr); /* Make another copy */
2186 live.fate[r]=oldstate; /* Keep all the old state info */
2187 live.fate[r].realreg=nr;
2188 live.fate[r].realind=nind;
2189 f_unlock(rr);
2190 }
2191
2192
2193 static __inline__ int f_writereg(int r)
2194 {
2195 int n;
2196 int answer=-1;
2197
2198 f_make_exclusive(r,1);
2199 if (f_isinreg(r)) {
2200 n=live.fate[r].realreg;
2201 answer=n;
2202 }
2203 if (answer<0) {
2204 answer=f_alloc_reg(r,1);
2205 }
2206 live.fate[r].status=DIRTY;
2207 live.fat[answer].locked++;
2208 live.fat[answer].touched=touchcnt++;
2209 return answer;
2210 }
2211
2212 static int f_rmw(int r)
2213 {
2214 int n;
2215
2216 f_make_exclusive(r,0);
2217 if (f_isinreg(r)) {
2218 n=live.fate[r].realreg;
2219 }
2220 else
2221 n=f_alloc_reg(r,0);
2222 live.fate[r].status=DIRTY;
2223 live.fat[n].locked++;
2224 live.fat[n].touched=touchcnt++;
2225 return n;
2226 }
2227
2228 static void fflags_into_flags_internal(uae_u32 tmp)
2229 {
2230 int r;
2231
2232 clobber_flags();
2233 r=f_readreg(FP_RESULT);
2234 if (FFLAG_NREG_CLOBBER_CONDITION) {
2235 int tmp2=tmp;
2236 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2237 raw_fflags_into_flags(r);
2238 unlock2(tmp);
2239 forget_about(tmp2);
2240 }
2241 else
2242 raw_fflags_into_flags(r);
2243 f_unlock(r);
2244 live_flags();
2245 }
2246
2247
2248
2249
2250 /********************************************************************
2251 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2252 ********************************************************************/
2253
2254 /*
2255 * RULES FOR HANDLING REGISTERS:
2256 *
2257 * * In the function headers, order the parameters
2258 * - 1st registers written to
2259 * - 2nd read/modify/write registers
2260 * - 3rd registers read from
2261 * * Before calling raw_*, you must call readreg, writereg or rmw for
2262 * each register
2263 * * The order for this is
2264 * - 1st call remove_offset for all registers written to with size<4
2265 * - 2nd call readreg for all registers read without offset
2266 * - 3rd call rmw for all rmw registers
2267 * - 4th call readreg_offset for all registers that can handle offsets
2268 * - 5th call get_offset for all the registers from the previous step
2269 * - 6th call writereg for all written-to registers
2270 * - 7th call raw_*
2271 * - 8th unlock2 all registers that were locked
2272 */
2273
2274 MIDFUNC(0,live_flags,(void))
2275 {
2276 live.flags_on_stack=TRASH;
2277 live.flags_in_flags=VALID;
2278 live.flags_are_important=1;
2279 }
2280 MENDFUNC(0,live_flags,(void))
2281
2282 MIDFUNC(0,dont_care_flags,(void))
2283 {
2284 live.flags_are_important=0;
2285 }
2286 MENDFUNC(0,dont_care_flags,(void))
2287
2288
2289 MIDFUNC(0,duplicate_carry,(void))
2290 {
2291 evict(FLAGX);
2292 make_flags_live_internal();
2293 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2294 log_vwrite(FLAGX);
2295 }
2296 MENDFUNC(0,duplicate_carry,(void))
2297
2298 MIDFUNC(0,restore_carry,(void))
2299 {
2300 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2301 bt_l_ri_noclobber(FLAGX,0);
2302 }
2303 else { /* Avoid the stall the above creates.
2304 This is slow on non-P6, though.
2305 */
2306 COMPCALL(rol_b_ri(FLAGX,8));
2307 isclean(FLAGX);
2308 }
2309 }
2310 MENDFUNC(0,restore_carry,(void))
2311
2312 MIDFUNC(0,start_needflags,(void))
2313 {
2314 needflags=1;
2315 }
2316 MENDFUNC(0,start_needflags,(void))
2317
2318 MIDFUNC(0,end_needflags,(void))
2319 {
2320 needflags=0;
2321 }
2322 MENDFUNC(0,end_needflags,(void))
2323
2324 MIDFUNC(0,make_flags_live,(void))
2325 {
2326 make_flags_live_internal();
2327 }
2328 MENDFUNC(0,make_flags_live,(void))
2329
2330 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2331 {
2332 clobber_flags();
2333 fflags_into_flags_internal(tmp);
2334 }
2335 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2336
2337
2338 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2339 {
2340 int size=4;
2341 if (i<16)
2342 size=2;
2343 CLOBBER_BT;
2344 r=readreg(r,size);
2345 raw_bt_l_ri(r,i);
2346 unlock2(r);
2347 }
2348 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2349
2350 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2351 {
2352 CLOBBER_BT;
2353 r=readreg(r,4);
2354 b=readreg(b,4);
2355 raw_bt_l_rr(r,b);
2356 unlock2(r);
2357 unlock2(b);
2358 }
2359 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2360
2361 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2362 {
2363 int size=4;
2364 if (i<16)
2365 size=2;
2366 CLOBBER_BT;
2367 r=rmw(r,size,size);
2368 raw_btc_l_ri(r,i);
2369 unlock2(r);
2370 }
2371 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2372
2373 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2374 {
2375 CLOBBER_BT;
2376 b=readreg(b,4);
2377 r=rmw(r,4,4);
2378 raw_btc_l_rr(r,b);
2379 unlock2(r);
2380 unlock2(b);
2381 }
2382 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2383
2384
2385 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2386 {
2387 int size=4;
2388 if (i<16)
2389 size=2;
2390 CLOBBER_BT;
2391 r=rmw(r,size,size);
2392 raw_btr_l_ri(r,i);
2393 unlock2(r);
2394 }
2395 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2396
2397 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2398 {
2399 CLOBBER_BT;
2400 b=readreg(b,4);
2401 r=rmw(r,4,4);
2402 raw_btr_l_rr(r,b);
2403 unlock2(r);
2404 unlock2(b);
2405 }
2406 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2407
2408
2409 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2410 {
2411 int size=4;
2412 if (i<16)
2413 size=2;
2414 CLOBBER_BT;
2415 r=rmw(r,size,size);
2416 raw_bts_l_ri(r,i);
2417 unlock2(r);
2418 }
2419 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2420
2421 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2422 {
2423 CLOBBER_BT;
2424 b=readreg(b,4);
2425 r=rmw(r,4,4);
2426 raw_bts_l_rr(r,b);
2427 unlock2(r);
2428 unlock2(b);
2429 }
2430 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2431
2432 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2433 {
2434 CLOBBER_MOV;
2435 d=writereg(d,4);
2436 raw_mov_l_rm(d,s);
2437 unlock2(d);
2438 }
2439 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2440
2441
2442 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2443 {
2444 r=readreg(r,4);
2445 raw_call_r(r);
2446 unlock2(r);
2447 }
2448 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2449
2450 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2451 {
2452 CLOBBER_SUB;
2453 raw_sub_l_mi(d,s) ;
2454 }
2455 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2456
2457 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2458 {
2459 CLOBBER_MOV;
2460 raw_mov_l_mi(d,s) ;
2461 }
2462 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2463
2464 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2465 {
2466 CLOBBER_MOV;
2467 raw_mov_w_mi(d,s) ;
2468 }
2469 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2470
2471 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2472 {
2473 CLOBBER_MOV;
2474 raw_mov_b_mi(d,s) ;
2475 }
2476 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2477
2478 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2479 {
2480 if (!i && !needflags)
2481 return;
2482 CLOBBER_ROL;
2483 r=rmw(r,1,1);
2484 raw_rol_b_ri(r,i);
2485 unlock2(r);
2486 }
2487 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2488
2489 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2490 {
2491 if (!i && !needflags)
2492 return;
2493 CLOBBER_ROL;
2494 r=rmw(r,2,2);
2495 raw_rol_w_ri(r,i);
2496 unlock2(r);
2497 }
2498 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2499
2500 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2501 {
2502 if (!i && !needflags)
2503 return;
2504 CLOBBER_ROL;
2505 r=rmw(r,4,4);
2506 raw_rol_l_ri(r,i);
2507 unlock2(r);
2508 }
2509 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2510
2511 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2512 {
2513 if (isconst(r)) {
2514 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2515 return;
2516 }
2517 CLOBBER_ROL;
2518 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2519 d=rmw(d,4,4);
2520 Dif (r!=1) {
2521 write_log("Illegal register %d in raw_rol_b\n",r);
2522 abort();
2523 }
2524 raw_rol_l_rr(d,r) ;
2525 unlock2(r);
2526 unlock2(d);
2527 }
2528 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2529
2530 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2531 { /* Can only do this with r==1, i.e. cl */
2532
2533 if (isconst(r)) {
2534 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2535 return;
2536 }
2537 CLOBBER_ROL;
2538 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2539 d=rmw(d,2,2);
2540 Dif (r!=1) {
2541 write_log("Illegal register %d in raw_rol_b\n",r);
2542 abort();
2543 }
2544 raw_rol_w_rr(d,r) ;
2545 unlock2(r);
2546 unlock2(d);
2547 }
2548 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2549
2550 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2551 { /* Can only do this with r==1, i.e. cl */
2552
2553 if (isconst(r)) {
2554 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2555 return;
2556 }
2557
2558 CLOBBER_ROL;
2559 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2560 d=rmw(d,1,1);
2561 Dif (r!=1) {
2562 write_log("Illegal register %d in raw_rol_b\n",r);
2563 abort();
2564 }
2565 raw_rol_b_rr(d,r) ;
2566 unlock2(r);
2567 unlock2(d);
2568 }
2569 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2570
2571
2572 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2573 {
2574 if (isconst(r)) {
2575 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2576 return;
2577 }
2578 CLOBBER_SHLL;
2579 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2580 d=rmw(d,4,4);
2581 Dif (r!=1) {
2582 write_log("Illegal register %d in raw_rol_b\n",r);
2583 abort();
2584 }
2585 raw_shll_l_rr(d,r) ;
2586 unlock2(r);
2587 unlock2(d);
2588 }
2589 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2590
2591 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2592 { /* Can only do this with r==1, i.e. cl */
2593
2594 if (isconst(r)) {
2595 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2596 return;
2597 }
2598 CLOBBER_SHLL;
2599 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2600 d=rmw(d,2,2);
2601 Dif (r!=1) {
2602 write_log("Illegal register %d in raw_shll_b\n",r);
2603 abort();
2604 }
2605 raw_shll_w_rr(d,r) ;
2606 unlock2(r);
2607 unlock2(d);
2608 }
2609 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2610
2611 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2612 { /* Can only do this with r==1, i.e. cl */
2613
2614 if (isconst(r)) {
2615 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2616 return;
2617 }
2618
2619 CLOBBER_SHLL;
2620 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2621 d=rmw(d,1,1);
2622 Dif (r!=1) {
2623 write_log("Illegal register %d in raw_shll_b\n",r);
2624 abort();
2625 }
2626 raw_shll_b_rr(d,r) ;
2627 unlock2(r);
2628 unlock2(d);
2629 }
2630 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2631
2632
2633 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2634 {
2635 if (!i && !needflags)
2636 return;
2637 CLOBBER_ROR;
2638 r=rmw(r,1,1);
2639 raw_ror_b_ri(r,i);
2640 unlock2(r);
2641 }
2642 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2643
2644 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2645 {
2646 if (!i && !needflags)
2647 return;
2648 CLOBBER_ROR;
2649 r=rmw(r,2,2);
2650 raw_ror_w_ri(r,i);
2651 unlock2(r);
2652 }
2653 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2654
2655 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2656 {
2657 if (!i && !needflags)
2658 return;
2659 CLOBBER_ROR;
2660 r=rmw(r,4,4);
2661 raw_ror_l_ri(r,i);
2662 unlock2(r);
2663 }
2664 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2665
2666 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2667 {
2668 if (isconst(r)) {
2669 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2670 return;
2671 }
2672 CLOBBER_ROR;
2673 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2674 d=rmw(d,4,4);
2675 raw_ror_l_rr(d,r) ;
2676 unlock2(r);
2677 unlock2(d);
2678 }
2679 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2680
2681 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2682 {
2683 if (isconst(r)) {
2684 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2685 return;
2686 }
2687 CLOBBER_ROR;
2688 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2689 d=rmw(d,2,2);
2690 raw_ror_w_rr(d,r) ;
2691 unlock2(r);
2692 unlock2(d);
2693 }
2694 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2695
2696 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2697 {
2698 if (isconst(r)) {
2699 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2700 return;
2701 }
2702
2703 CLOBBER_ROR;
2704 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2705 d=rmw(d,1,1);
2706 raw_ror_b_rr(d,r) ;
2707 unlock2(r);
2708 unlock2(d);
2709 }
2710 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2711
2712 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2713 {
2714 if (isconst(r)) {
2715 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2716 return;
2717 }
2718 CLOBBER_SHRL;
2719 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2720 d=rmw(d,4,4);
2721 Dif (r!=1) {
2722 write_log("Illegal register %d in raw_rol_b\n",r);
2723 abort();
2724 }
2725 raw_shrl_l_rr(d,r) ;
2726 unlock2(r);
2727 unlock2(d);
2728 }
2729 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2730
2731 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2732 { /* Can only do this with r==1, i.e. cl */
2733
2734 if (isconst(r)) {
2735 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2736 return;
2737 }
2738 CLOBBER_SHRL;
2739 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2740 d=rmw(d,2,2);
2741 Dif (r!=1) {
2742 write_log("Illegal register %d in raw_shrl_b\n",r);
2743 abort();
2744 }
2745 raw_shrl_w_rr(d,r) ;
2746 unlock2(r);
2747 unlock2(d);
2748 }
2749 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2750
2751 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2752 { /* Can only do this with r==1, i.e. cl */
2753
2754 if (isconst(r)) {
2755 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2756 return;
2757 }
2758
2759 CLOBBER_SHRL;
2760 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2761 d=rmw(d,1,1);
2762 Dif (r!=1) {
2763 write_log("Illegal register %d in raw_shrl_b\n",r);
2764 abort();
2765 }
2766 raw_shrl_b_rr(d,r) ;
2767 unlock2(r);
2768 unlock2(d);
2769 }
2770 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2771
2772
2773
2774 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2775 {
2776 if (!i && !needflags)
2777 return;
2778 if (isconst(r) && !needflags) {
2779 live.state[r].val<<=i;
2780 return;
2781 }
2782 CLOBBER_SHLL;
2783 r=rmw(r,4,4);
2784 raw_shll_l_ri(r,i);
2785 unlock2(r);
2786 }
2787 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2788
2789 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2790 {
2791 if (!i && !needflags)
2792 return;
2793 CLOBBER_SHLL;
2794 r=rmw(r,2,2);
2795 raw_shll_w_ri(r,i);
2796 unlock2(r);
2797 }
2798 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2799
2800 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2801 {
2802 if (!i && !needflags)
2803 return;
2804 CLOBBER_SHLL;
2805 r=rmw(r,1,1);
2806 raw_shll_b_ri(r,i);
2807 unlock2(r);
2808 }
2809 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2810
2811 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2812 {
2813 if (!i && !needflags)
2814 return;
2815 if (isconst(r) && !needflags) {
2816 live.state[r].val>>=i;
2817 return;
2818 }
2819 CLOBBER_SHRL;
2820 r=rmw(r,4,4);
2821 raw_shrl_l_ri(r,i);
2822 unlock2(r);
2823 }
2824 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2825
2826 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2827 {
2828 if (!i && !needflags)
2829 return;
2830 CLOBBER_SHRL;
2831 r=rmw(r,2,2);
2832 raw_shrl_w_ri(r,i);
2833 unlock2(r);
2834 }
2835 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2836
2837 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2838 {
2839 if (!i && !needflags)
2840 return;
2841 CLOBBER_SHRL;
2842 r=rmw(r,1,1);
2843 raw_shrl_b_ri(r,i);
2844 unlock2(r);
2845 }
2846 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2847
2848 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2849 {
2850 if (!i && !needflags)
2851 return;
2852 CLOBBER_SHRA;
2853 r=rmw(r,4,4);
2854 raw_shra_l_ri(r,i);
2855 unlock2(r);
2856 }
2857 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2858
2859 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2860 {
2861 if (!i && !needflags)
2862 return;
2863 CLOBBER_SHRA;
2864 r=rmw(r,2,2);
2865 raw_shra_w_ri(r,i);
2866 unlock2(r);
2867 }
2868 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2869
2870 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2871 {
2872 if (!i && !needflags)
2873 return;
2874 CLOBBER_SHRA;
2875 r=rmw(r,1,1);
2876 raw_shra_b_ri(r,i);
2877 unlock2(r);
2878 }
2879 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2880
2881 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2882 {
2883 if (isconst(r)) {
2884 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2885 return;
2886 }
2887 CLOBBER_SHRA;
2888 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2889 d=rmw(d,4,4);
2890 Dif (r!=1) {
2891 write_log("Illegal register %d in raw_rol_b\n",r);
2892 abort();
2893 }
2894 raw_shra_l_rr(d,r) ;
2895 unlock2(r);
2896 unlock2(d);
2897 }
2898 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2899
2900 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2901 { /* Can only do this with r==1, i.e. cl */
2902
2903 if (isconst(r)) {
2904 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2905 return;
2906 }
2907 CLOBBER_SHRA;
2908 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2909 d=rmw(d,2,2);
2910 Dif (r!=1) {
2911 write_log("Illegal register %d in raw_shra_b\n",r);
2912 abort();
2913 }
2914 raw_shra_w_rr(d,r) ;
2915 unlock2(r);
2916 unlock2(d);
2917 }
2918 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2919
2920 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2921 { /* Can only do this with r==1, i.e. cl */
2922
2923 if (isconst(r)) {
2924 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2925 return;
2926 }
2927
2928 CLOBBER_SHRA;
2929 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2930 d=rmw(d,1,1);
2931 Dif (r!=1) {
2932 write_log("Illegal register %d in raw_shra_b\n",r);
2933 abort();
2934 }
2935 raw_shra_b_rr(d,r) ;
2936 unlock2(r);
2937 unlock2(d);
2938 }
2939 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2940
2941
2942 MIDFUNC(2,setcc,(W1 d, IMM cc))
2943 {
2944 CLOBBER_SETCC;
2945 d=writereg(d,1);
2946 raw_setcc(d,cc);
2947 unlock2(d);
2948 }
2949 MENDFUNC(2,setcc,(W1 d, IMM cc))
2950
2951 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2952 {
2953 CLOBBER_SETCC;
2954 raw_setcc_m(d,cc);
2955 }
2956 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2957
2958 MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
2959 {
2960 if (d==s)
2961 return;
2962 CLOBBER_CMOV;
2963 s=readreg(s,1);
2964 d=rmw(d,1,1);
2965 raw_cmov_b_rr(d,s,cc);
2966 unlock2(s);
2967 unlock2(d);
2968 }
2969 MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
2970
2971 MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
2972 {
2973 if (d==s)
2974 return;
2975 CLOBBER_CMOV;
2976 s=readreg(s,2);
2977 d=rmw(d,2,2);
2978 raw_cmov_w_rr(d,s,cc);
2979 unlock2(s);
2980 unlock2(d);
2981 }
2982 MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
2983
2984 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2985 {
2986 if (d==s)
2987 return;
2988 CLOBBER_CMOV;
2989 s=readreg(s,4);
2990 d=rmw(d,4,4);
2991 raw_cmov_l_rr(d,s,cc);
2992 unlock2(s);
2993 unlock2(d);
2994 }
2995 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2996
2997 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2998 {
2999 CLOBBER_CMOV;
3000 d=rmw(d,4,4);
3001 raw_cmov_l_rm(d,s,cc);
3002 unlock2(d);
3003 }
3004 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
3005
3006 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
3007 {
3008 CLOBBER_BSF;
3009 s = readreg(s, 4);
3010 d = writereg(d, 4);
3011 raw_bsf_l_rr(d, s);
3012 unlock2(s);
3013 unlock2(d);
3014 }
3015 MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
3016
3017 /* Set the Z flag depending on the value in s. Note that the
3018 value has to be 0 or -1 (or, more precisely, for non-zero
3019 values, bit 14 must be set)! */
3020 MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3021 {
3022 CLOBBER_BSF;
3023 s=rmw_specific(s,4,4,FLAG_NREG3);
3024 tmp=writereg(tmp,4);
3025 raw_flags_set_zero(s, tmp);
3026 unlock2(tmp);
3027 unlock2(s);
3028 }
3029 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3030
3031 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3032 {
3033 CLOBBER_MUL;
3034 s=readreg(s,4);
3035 d=rmw(d,4,4);
3036 raw_imul_32_32(d,s);
3037 unlock2(s);
3038 unlock2(d);
3039 }
3040 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3041
3042 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3043 {
3044 CLOBBER_MUL;
3045 s=rmw_specific(s,4,4,MUL_NREG2);
3046 d=rmw_specific(d,4,4,MUL_NREG1);
3047 raw_imul_64_32(d,s);
3048 unlock2(s);
3049 unlock2(d);
3050 }
3051 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3052
3053 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3054 {
3055 CLOBBER_MUL;
3056 s=rmw_specific(s,4,4,MUL_NREG2);
3057 d=rmw_specific(d,4,4,MUL_NREG1);
3058 raw_mul_64_32(d,s);
3059 unlock2(s);
3060 unlock2(d);
3061 }
3062 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3063
3064 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3065 {
3066 CLOBBER_MUL;
3067 s=readreg(s,4);
3068 d=rmw(d,4,4);
3069 raw_mul_32_32(d,s);
3070 unlock2(s);
3071 unlock2(d);
3072 }
3073 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3074
3075 #if SIZEOF_VOID_P == 8
3076 MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3077 {
3078 int isrmw;
3079
3080 if (isconst(s)) {
3081 set_const(d,(uae_s32)live.state[s].val);
3082 return;
3083 }
3084
3085 CLOBBER_SE32;
3086 isrmw=(s==d);
3087 if (!isrmw) {
3088 s=readreg(s,4);
3089 d=writereg(d,4);
3090 }
3091 else { /* If we try to lock this twice, with different sizes, we
3092 are int trouble! */
3093 s=d=rmw(s,4,4);
3094 }
3095 raw_sign_extend_32_rr(d,s);
3096 if (!isrmw) {
3097 unlock2(d);
3098 unlock2(s);
3099 }
3100 else {
3101 unlock2(s);
3102 }
3103 }
3104 MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3105 #endif
3106
3107 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3108 {
3109 int isrmw;
3110
3111 if (isconst(s)) {
3112 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3113 return;
3114 }
3115
3116 CLOBBER_SE16;
3117 isrmw=(s==d);
3118 if (!isrmw) {
3119 s=readreg(s,2);
3120 d=writereg(d,4);
3121 }
3122 else { /* If we try to lock this twice, with different sizes, we
3123 are int trouble! */
3124 s=d=rmw(s,4,2);
3125 }
3126 raw_sign_extend_16_rr(d,s);
3127 if (!isrmw) {
3128 unlock2(d);
3129 unlock2(s);
3130 }
3131 else {
3132 unlock2(s);
3133 }
3134 }
3135 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3136
3137 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3138 {
3139 int isrmw;
3140
3141 if (isconst(s)) {
3142 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3143 return;
3144 }
3145
3146 isrmw=(s==d);
3147 CLOBBER_SE8;
3148 if (!isrmw) {
3149 s=readreg(s,1);
3150 d=writereg(d,4);
3151 }
3152 else { /* If we try to lock this twice, with different sizes, we
3153 are int trouble! */
3154 s=d=rmw(s,4,1);
3155 }
3156
3157 raw_sign_extend_8_rr(d,s);
3158
3159 if (!isrmw) {
3160 unlock2(d);
3161 unlock2(s);
3162 }
3163 else {
3164 unlock2(s);
3165 }
3166 }
3167 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3168
3169
3170 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3171 {
3172 int isrmw;
3173
3174 if (isconst(s)) {
3175 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3176 return;
3177 }
3178
3179 isrmw=(s==d);
3180 CLOBBER_ZE16;
3181 if (!isrmw) {
3182 s=readreg(s,2);
3183 d=writereg(d,4);
3184 }
3185 else { /* If we try to lock this twice, with different sizes, we
3186 are int trouble! */
3187 s=d=rmw(s,4,2);
3188 }
3189 raw_zero_extend_16_rr(d,s);
3190 if (!isrmw) {
3191 unlock2(d);
3192 unlock2(s);
3193 }
3194 else {
3195 unlock2(s);
3196 }
3197 }
3198 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3199
3200 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3201 {
3202 int isrmw;
3203 if (isconst(s)) {
3204 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3205 return;
3206 }
3207
3208 isrmw=(s==d);
3209 CLOBBER_ZE8;
3210 if (!isrmw) {
3211 s=readreg(s,1);
3212 d=writereg(d,4);
3213 }
3214 else { /* If we try to lock this twice, with different sizes, we
3215 are int trouble! */
3216 s=d=rmw(s,4,1);
3217 }
3218
3219 raw_zero_extend_8_rr(d,s);
3220
3221 if (!isrmw) {
3222 unlock2(d);
3223 unlock2(s);
3224 }
3225 else {
3226 unlock2(s);
3227 }
3228 }
3229 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3230
3231 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3232 {
3233 if (d==s)
3234 return;
3235 if (isconst(s)) {
3236 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3237 return;
3238 }
3239
3240 CLOBBER_MOV;
3241 s=readreg(s,1);
3242 d=writereg(d,1);
3243 raw_mov_b_rr(d,s);
3244 unlock2(d);
3245 unlock2(s);
3246 }
3247 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3248
3249 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3250 {
3251 if (d==s)
3252 return;
3253 if (isconst(s)) {
3254 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3255 return;
3256 }
3257
3258 CLOBBER_MOV;
3259 s=readreg(s,2);
3260 d=writereg(d,2);
3261 raw_mov_w_rr(d,s);
3262 unlock2(d);
3263 unlock2(s);
3264 }
3265 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3266
3267
3268 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3269 {
3270 CLOBBER_MOV;
3271 baser=readreg(baser,4);
3272 index=readreg(index,4);
3273 d=writereg(d,4);
3274
3275 raw_mov_l_rrm_indexed(d,baser,index,factor);
3276 unlock2(d);
3277 unlock2(baser);
3278 unlock2(index);
3279 }
3280 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3281
3282 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3283 {
3284 CLOBBER_MOV;
3285 baser=readreg(baser,4);
3286 index=readreg(index,4);
3287 d=writereg(d,2);
3288
3289 raw_mov_w_rrm_indexed(d,baser,index,factor);
3290 unlock2(d);
3291 unlock2(baser);
3292 unlock2(index);
3293 }
3294 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3295
3296 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3297 {
3298 CLOBBER_MOV;
3299 baser=readreg(baser,4);
3300 index=readreg(index,4);
3301 d=writereg(d,1);
3302
3303 raw_mov_b_rrm_indexed(d,baser,index,factor);
3304
3305 unlock2(d);
3306 unlock2(baser);
3307 unlock2(index);
3308 }
3309 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3310
3311
3312 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3313 {
3314 CLOBBER_MOV;
3315 baser=readreg(baser,4);
3316 index=readreg(index,4);
3317 s=readreg(s,4);
3318
3319 Dif (baser==s || index==s)
3320 abort();
3321
3322
3323 raw_mov_l_mrr_indexed(baser,index,factor,s);
3324 unlock2(s);
3325 unlock2(baser);
3326 unlock2(index);
3327 }
3328 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3329
3330 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3331 {
3332 CLOBBER_MOV;
3333 baser=readreg(baser,4);
3334 index=readreg(index,4);
3335 s=readreg(s,2);
3336
3337 raw_mov_w_mrr_indexed(baser,index,factor,s);
3338 unlock2(s);
3339 unlock2(baser);
3340 unlock2(index);
3341 }
3342 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3343
3344 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3345 {
3346 CLOBBER_MOV;
3347 s=readreg(s,1);
3348 baser=readreg(baser,4);
3349 index=readreg(index,4);
3350
3351 raw_mov_b_mrr_indexed(baser,index,factor,s);
3352 unlock2(s);
3353 unlock2(baser);
3354 unlock2(index);
3355 }
3356 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3357
3358
3359 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3360 {
3361 int basereg=baser;
3362 int indexreg=index;
3363
3364 CLOBBER_MOV;
3365 s=readreg(s,4);
3366 baser=readreg_offset(baser,4);
3367 index=readreg_offset(index,4);
3368
3369 base+=get_offset(basereg);
3370 base+=factor*get_offset(indexreg);
3371
3372 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3373 unlock2(s);
3374 unlock2(baser);
3375 unlock2(index);
3376 }
3377 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3378
3379 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3380 {
3381 int basereg=baser;
3382 int indexreg=index;
3383
3384 CLOBBER_MOV;
3385 s=readreg(s,2);
3386 baser=readreg_offset(baser,4);
3387 index=readreg_offset(index,4);
3388
3389 base+=get_offset(basereg);
3390 base+=factor*get_offset(indexreg);
3391
3392 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3393 unlock2(s);
3394 unlock2(baser);
3395 unlock2(index);
3396 }
3397 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3398
3399 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3400 {
3401 int basereg=baser;
3402 int indexreg=index;
3403
3404 CLOBBER_MOV;
3405 s=readreg(s,1);
3406 baser=readreg_offset(baser,4);
3407 index=readreg_offset(index,4);
3408
3409 base+=get_offset(basereg);
3410 base+=factor*get_offset(indexreg);
3411
3412 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3413 unlock2(s);
3414 unlock2(baser);
3415 unlock2(index);
3416 }
3417 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3418
3419
3420
3421 /* Read a long from base+baser+factor*index */
3422 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3423 {
3424 int basereg=baser;
3425 int indexreg=index;
3426
3427 CLOBBER_MOV;
3428 baser=readreg_offset(baser,4);
3429 index=readreg_offset(index,4);
3430 base+=get_offset(basereg);
3431 base+=factor*get_offset(indexreg);
3432 d=writereg(d,4);
3433 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3434 unlock2(d);
3435 unlock2(baser);
3436 unlock2(index);
3437 }
3438 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3439
3440
3441 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3442 {
3443 int basereg=baser;
3444 int indexreg=index;
3445
3446 CLOBBER_MOV;
3447 remove_offset(d,-1);
3448 baser=readreg_offset(baser,4);
3449 index=readreg_offset(index,4);
3450 base+=get_offset(basereg);
3451 base+=factor*get_offset(indexreg);
3452 d=writereg(d,2);
3453 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3454 unlock2(d);
3455 unlock2(baser);
3456 unlock2(index);
3457 }
3458 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3459
3460
3461 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3462 {
3463 int basereg=baser;
3464 int indexreg=index;
3465
3466 CLOBBER_MOV;
3467 remove_offset(d,-1);
3468 baser=readreg_offset(baser,4);
3469 index=readreg_offset(index,4);
3470 base+=get_offset(basereg);
3471 base+=factor*get_offset(indexreg);
3472 d=writereg(d,1);
3473 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3474 unlock2(d);
3475 unlock2(baser);
3476 unlock2(index);
3477 }
3478 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3479
3480 /* Read a long from base+factor*index */
3481 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3482 {
3483 int indexreg=index;
3484
3485 if (isconst(index)) {
3486 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3487 return;
3488 }
3489
3490 CLOBBER_MOV;
3491 index=readreg_offset(index,4);
3492 base+=get_offset(indexreg)*factor;
3493 d=writereg(d,4);
3494
3495 raw_mov_l_rm_indexed(d,base,index,factor);
3496 unlock2(index);
3497 unlock2(d);
3498 }
3499 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3500
3501
3502 /* read the long at the address contained in s+offset and store in d */
3503 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3504 {
3505 if (isconst(s)) {
3506 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3507 return;
3508 }
3509 CLOBBER_MOV;
3510 s=readreg(s,4);
3511 d=writereg(d,4);
3512
3513 raw_mov_l_rR(d,s,offset);
3514 unlock2(d);
3515 unlock2(s);
3516 }
3517 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3518
3519 /* read the word at the address contained in s+offset and store in d */
3520 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3521 {
3522 if (isconst(s)) {
3523 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3524 return;
3525 }
3526 CLOBBER_MOV;
3527 s=readreg(s,4);
3528 d=writereg(d,2);
3529
3530 raw_mov_w_rR(d,s,offset);
3531 unlock2(d);
3532 unlock2(s);
3533 }
3534 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3535
3536 /* read the word at the address contained in s+offset and store in d */
3537 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3538 {
3539 if (isconst(s)) {
3540 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3541 return;
3542 }
3543 CLOBBER_MOV;
3544 s=readreg(s,4);
3545 d=writereg(d,1);
3546
3547 raw_mov_b_rR(d,s,offset);
3548 unlock2(d);
3549 unlock2(s);
3550 }
3551 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3552
3553 /* read the long at the address contained in s+offset and store in d */
3554 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3555 {
3556 int sreg=s;
3557 if (isconst(s)) {
3558 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3559 return;
3560 }
3561 CLOBBER_MOV;
3562 s=readreg_offset(s,4);
3563 offset+=get_offset(sreg);
3564 d=writereg(d,4);
3565
3566 raw_mov_l_brR(d,s,offset);
3567 unlock2(d);
3568 unlock2(s);
3569 }
3570 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3571
3572 /* read the word at the address contained in s+offset and store in d */
3573 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3574 {
3575 int sreg=s;
3576 if (isconst(s)) {
3577 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3578 return;
3579 }
3580 CLOBBER_MOV;
3581 remove_offset(d,-1);
3582 s=readreg_offset(s,4);
3583 offset+=get_offset(sreg);
3584 d=writereg(d,2);
3585
3586 raw_mov_w_brR(d,s,offset);
3587 unlock2(d);
3588 unlock2(s);
3589 }
3590 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3591
3592 /* read the word at the address contained in s+offset and store in d */
3593 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3594 {
3595 int sreg=s;
3596 if (isconst(s)) {
3597 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3598 return;
3599 }
3600 CLOBBER_MOV;
3601 remove_offset(d,-1);
3602 s=readreg_offset(s,4);
3603 offset+=get_offset(sreg);
3604 d=writereg(d,1);
3605
3606 raw_mov_b_brR(d,s,offset);
3607 unlock2(d);
3608 unlock2(s);
3609 }
3610 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3611
3612 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3613 {
3614 int dreg=d;
3615 if (isconst(d)) {
3616 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3617 return;
3618 }
3619
3620 CLOBBER_MOV;
3621 d=readreg_offset(d,4);
3622 offset+=get_offset(dreg);
3623 raw_mov_l_Ri(d,i,offset);
3624 unlock2(d);
3625 }
3626 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3627
3628 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3629 {
3630 int dreg=d;
3631 if (isconst(d)) {
3632 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3633 return;
3634 }
3635
3636 CLOBBER_MOV;
3637 d=readreg_offset(d,4);
3638 offset+=get_offset(dreg);
3639 raw_mov_w_Ri(d,i,offset);
3640 unlock2(d);
3641 }
3642 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3643
3644 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3645 {
3646 int dreg=d;
3647 if (isconst(d)) {
3648 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3649 return;
3650 }
3651
3652 CLOBBER_MOV;
3653 d=readreg_offset(d,4);
3654 offset+=get_offset(dreg);
3655 raw_mov_b_Ri(d,i,offset);
3656 unlock2(d);
3657 }
3658 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3659
3660 /* Warning! OFFSET is byte sized only! */
3661 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3662 {
3663 if (isconst(d)) {
3664 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3665 return;
3666 }
3667 if (isconst(s)) {
3668 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3669 return;
3670 }
3671
3672 CLOBBER_MOV;
3673 s=readreg(s,4);
3674 d=readreg(d,4);
3675
3676 raw_mov_l_Rr(d,s,offset);
3677 unlock2(d);
3678 unlock2(s);
3679 }
3680 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3681
3682 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3683 {
3684 if (isconst(d)) {
3685 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3686 return;
3687 }
3688 if (isconst(s)) {
3689 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3690 return;
3691 }
3692
3693 CLOBBER_MOV;
3694 s=readreg(s,2);
3695 d=readreg(d,4);
3696 raw_mov_w_Rr(d,s,offset);
3697 unlock2(d);
3698 unlock2(s);
3699 }
3700 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3701
3702 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3703 {
3704 if (isconst(d)) {
3705 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3706 return;
3707 }
3708 if (isconst(s)) {
3709 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3710 return;
3711 }
3712
3713 CLOBBER_MOV;
3714 s=readreg(s,1);
3715 d=readreg(d,4);
3716 raw_mov_b_Rr(d,s,offset);
3717 unlock2(d);
3718 unlock2(s);
3719 }
3720 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3721
3722 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3723 {
3724 if (isconst(s)) {
3725 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3726 return;
3727 }
3728 #if USE_OFFSET
3729 if (d==s) {
3730 add_offset(d,offset);
3731 return;
3732 }
3733 #endif
3734 CLOBBER_LEA;
3735 s=readreg(s,4);
3736 d=writereg(d,4);
3737 raw_lea_l_brr(d,s,offset);
3738 unlock2(d);
3739 unlock2(s);
3740 }
3741 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3742
3743 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3744 {
3745 if (!offset) {
3746 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3747 return;
3748 }
3749 CLOBBER_LEA;
3750 s=readreg(s,4);
3751 index=readreg(index,4);
3752 d=writereg(d,4);
3753
3754 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3755 unlock2(d);
3756 unlock2(index);
3757 unlock2(s);
3758 }
3759 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3760
3761 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3762 {
3763 CLOBBER_LEA;
3764 s=readreg(s,4);
3765 index=readreg(index,4);
3766 d=writereg(d,4);
3767
3768 raw_lea_l_rr_indexed(d,s,index,factor);
3769 unlock2(d);
3770 unlock2(index);
3771 unlock2(s);
3772 }
3773 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3774
3775 /* write d to the long at the address contained in s+offset */
3776 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3777 {
3778 int dreg=d;
3779 if (isconst(d)) {
3780 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3781 return;
3782 }
3783
3784 CLOBBER_MOV;
3785 s=readreg(s,4);
3786 d=readreg_offset(d,4);
3787 offset+=get_offset(dreg);
3788
3789 raw_mov_l_bRr(d,s,offset);
3790 unlock2(d);
3791 unlock2(s);
3792 }
3793 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3794
3795 /* write the word at the address contained in s+offset and store in d */
3796 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3797 {
3798 int dreg=d;
3799
3800 if (isconst(d)) {
3801 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3802 return;
3803 }
3804
3805 CLOBBER_MOV;
3806 s=readreg(s,2);
3807 d=readreg_offset(d,4);
3808 offset+=get_offset(dreg);
3809 raw_mov_w_bRr(d,s,offset);
3810 unlock2(d);
3811 unlock2(s);
3812 }
3813 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3814
3815 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3816 {
3817 int dreg=d;
3818 if (isconst(d)) {
3819 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3820 return;
3821 }
3822
3823 CLOBBER_MOV;
3824 s=readreg(s,1);
3825 d=readreg_offset(d,4);
3826 offset+=get_offset(dreg);
3827 raw_mov_b_bRr(d,s,offset);
3828 unlock2(d);
3829 unlock2(s);
3830 }
3831 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3832
3833 MIDFUNC(1,bswap_32,(RW4 r))
3834 {
3835 int reg=r;
3836
3837 if (isconst(r)) {
3838 uae_u32 oldv=live.state[r].val;
3839 live.state[r].val=reverse32(oldv);
3840 return;
3841 }
3842
3843 CLOBBER_SW32;
3844 r=rmw(r,4,4);
3845 raw_bswap_32(r);
3846 unlock2(r);
3847 }
3848 MENDFUNC(1,bswap_32,(RW4 r))
3849
3850 MIDFUNC(1,bswap_16,(RW2 r))
3851 {
3852 if (isconst(r)) {
3853 uae_u32 oldv=live.state[r].val;
3854 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3855 (oldv&0xffff0000);
3856 return;
3857 }
3858
3859 CLOBBER_SW16;
3860 r=rmw(r,2,2);
3861
3862 raw_bswap_16(r);
3863 unlock2(r);
3864 }
3865 MENDFUNC(1,bswap_16,(RW2 r))
3866
3867
3868
3869 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3870 {
3871 int olds;
3872
3873 if (d==s) { /* How pointless! */
3874 return;
3875 }
3876 if (isconst(s)) {
3877 COMPCALL(mov_l_ri)(d,live.state[s].val);
3878 return;
3879 }
3880 olds=s;
3881 disassociate(d);
3882 s=readreg_offset(s,4);
3883 live.state[d].realreg=s;
3884 live.state[d].realind=live.nat[s].nholds;
3885 live.state[d].val=live.state[olds].val;
3886 live.state[d].validsize=4;
3887 live.state[d].dirtysize=4;
3888 set_status(d,DIRTY);
3889
3890 live.nat[s].holds[live.nat[s].nholds]=d;
3891 live.nat[s].nholds++;
3892 log_clobberreg(d);
3893 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3894 d,s,live.state[d].realind,live.nat[s].nholds); */
3895 unlock2(s);
3896 }
3897 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3898
3899 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3900 {
3901 if (isconst(s)) {
3902 COMPCALL(mov_l_mi)(d,live.state[s].val);
3903 return;
3904 }
3905 CLOBBER_MOV;
3906 s=readreg(s,4);
3907
3908 raw_mov_l_mr(d,s);
3909 unlock2(s);
3910 }
3911 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3912
3913
3914 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3915 {
3916 if (isconst(s)) {
3917 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3918 return;
3919 }
3920 CLOBBER_MOV;
3921 s=readreg(s,2);
3922
3923 raw_mov_w_mr(d,s);
3924 unlock2(s);
3925 }
3926 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3927
3928 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3929 {
3930 CLOBBER_MOV;
3931 d=writereg(d,2);
3932
3933 raw_mov_w_rm(d,s);
3934 unlock2(d);
3935 }
3936 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3937
3938 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3939 {
3940 if (isconst(s)) {
3941 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3942 return;
3943 }
3944
3945 CLOBBER_MOV;
3946 s=readreg(s,1);
3947
3948 raw_mov_b_mr(d,s);
3949 unlock2(s);
3950 }
3951 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3952
3953 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3954 {
3955 CLOBBER_MOV;
3956 d=writereg(d,1);
3957
3958 raw_mov_b_rm(d,s);
3959 unlock2(d);
3960 }
3961 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3962
3963 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3964 {
3965 set_const(d,s);
3966 return;
3967 }
3968 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3969
3970 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3971 {
3972 CLOBBER_MOV;
3973 d=writereg(d,2);
3974
3975 raw_mov_w_ri(d,s);
3976 unlock2(d);
3977 }
3978 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3979
3980 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3981 {
3982 CLOBBER_MOV;
3983 d=writereg(d,1);
3984
3985 raw_mov_b_ri(d,s);
3986 unlock2(d);
3987 }
3988 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3989
3990
3991 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3992 {
3993 CLOBBER_ADD;
3994 raw_add_l_mi(d,s) ;
3995 }
3996 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3997
3998 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3999 {
4000 CLOBBER_ADD;
4001 raw_add_w_mi(d,s) ;
4002 }
4003 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
4004
4005 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
4006 {
4007 CLOBBER_ADD;
4008 raw_add_b_mi(d,s) ;
4009 }
4010 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
4011
4012
4013 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
4014 {
4015 CLOBBER_TEST;
4016 d=readreg(d,4);
4017
4018 raw_test_l_ri(d,i);
4019 unlock2(d);
4020 }
4021 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
4022
4023 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
4024 {
4025 CLOBBER_TEST;
4026 d=readreg(d,4);
4027 s=readreg(s,4);
4028
4029 raw_test_l_rr(d,s);;
4030 unlock2(d);
4031 unlock2(s);
4032 }
4033 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4034
4035 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4036 {
4037 CLOBBER_TEST;
4038 d=readreg(d,2);
4039 s=readreg(s,2);
4040
4041 raw_test_w_rr(d,s);
4042 unlock2(d);
4043 unlock2(s);
4044 }
4045 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4046
4047 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4048 {
4049 CLOBBER_TEST;
4050 d=readreg(d,1);
4051 s=readreg(s,1);
4052
4053 raw_test_b_rr(d,s);
4054 unlock2(d);
4055 unlock2(s);
4056 }
4057 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4058
4059
4060 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4061 {
4062 if (isconst(d) && !needflags) {
4063 live.state[d].val &= i;
4064 return;
4065 }
4066
4067 CLOBBER_AND;
4068 d=rmw(d,4,4);
4069
4070 raw_and_l_ri(d,i);
4071 unlock2(d);
4072 }
4073 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4074
4075 MIDFUNC(2,and_l,(RW4 d, R4 s))
4076 {
4077 CLOBBER_AND;
4078 s=readreg(s,4);
4079 d=rmw(d,4,4);
4080
4081 raw_and_l(d,s);
4082 unlock2(d);
4083 unlock2(s);
4084 }
4085 MENDFUNC(2,and_l,(RW4 d, R4 s))
4086
4087 MIDFUNC(2,and_w,(RW2 d, R2 s))
4088 {
4089 CLOBBER_AND;
4090 s=readreg(s,2);
4091 d=rmw(d,2,2);
4092
4093 raw_and_w(d,s);
4094 unlock2(d);
4095 unlock2(s);
4096 }
4097 MENDFUNC(2,and_w,(RW2 d, R2 s))
4098
4099 MIDFUNC(2,and_b,(RW1 d, R1 s))
4100 {
4101 CLOBBER_AND;
4102 s=readreg(s,1);
4103 d=rmw(d,1,1);
4104
4105 raw_and_b(d,s);
4106 unlock2(d);
4107 unlock2(s);
4108 }
4109 MENDFUNC(2,and_b,(RW1 d, R1 s))
4110
4111 // gb-- used for making an fpcr value in compemu_fpp.cpp
4112 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4113 {
4114 CLOBBER_OR;
4115 d=rmw(d,4,4);
4116
4117 raw_or_l_rm(d,s);
4118 unlock2(d);
4119 }
4120 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4121
4122 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4123 {
4124 if (isconst(d) && !needflags) {
4125 live.state[d].val|=i;
4126 return;
4127 }
4128 CLOBBER_OR;
4129 d=rmw(d,4,4);
4130
4131 raw_or_l_ri(d,i);
4132 unlock2(d);
4133 }
4134 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4135
4136 MIDFUNC(2,or_l,(RW4 d, R4 s))
4137 {
4138 if (isconst(d) && isconst(s) && !needflags) {
4139 live.state[d].val|=live.state[s].val;
4140 return;
4141 }
4142 CLOBBER_OR;
4143 s=readreg(s,4);
4144 d=rmw(d,4,4);
4145
4146 raw_or_l(d,s);
4147 unlock2(d);
4148 unlock2(s);
4149 }
4150 MENDFUNC(2,or_l,(RW4 d, R4 s))
4151
4152 MIDFUNC(2,or_w,(RW2 d, R2 s))
4153 {
4154 CLOBBER_OR;
4155 s=readreg(s,2);
4156 d=rmw(d,2,2);
4157
4158 raw_or_w(d,s);
4159 unlock2(d);
4160 unlock2(s);
4161 }
4162 MENDFUNC(2,or_w,(RW2 d, R2 s))
4163
4164 MIDFUNC(2,or_b,(RW1 d, R1 s))
4165 {
4166 CLOBBER_OR;
4167 s=readreg(s,1);
4168 d=rmw(d,1,1);
4169
4170 raw_or_b(d,s);
4171 unlock2(d);
4172 unlock2(s);
4173 }
4174 MENDFUNC(2,or_b,(RW1 d, R1 s))
4175
4176 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4177 {
4178 CLOBBER_ADC;
4179 s=readreg(s,4);
4180 d=rmw(d,4,4);
4181
4182 raw_adc_l(d,s);
4183
4184 unlock2(d);
4185 unlock2(s);
4186 }
4187 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4188
4189 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4190 {
4191 CLOBBER_ADC;
4192 s=readreg(s,2);
4193 d=rmw(d,2,2);
4194
4195 raw_adc_w(d,s);
4196 unlock2(d);
4197 unlock2(s);
4198 }
4199 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4200
4201 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4202 {
4203 CLOBBER_ADC;
4204 s=readreg(s,1);
4205 d=rmw(d,1,1);
4206
4207 raw_adc_b(d,s);
4208 unlock2(d);
4209 unlock2(s);
4210 }
4211 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4212
4213 MIDFUNC(2,add_l,(RW4 d, R4 s))
4214 {
4215 if (isconst(s)) {
4216 COMPCALL(add_l_ri)(d,live.state[s].val);
4217 return;
4218 }
4219
4220 CLOBBER_ADD;
4221 s=readreg(s,4);
4222 d=rmw(d,4,4);
4223
4224 raw_add_l(d,s);
4225
4226 unlock2(d);
4227 unlock2(s);
4228 }
4229 MENDFUNC(2,add_l,(RW4 d, R4 s))
4230
4231 MIDFUNC(2,add_w,(RW2 d, R2 s))
4232 {
4233 if (isconst(s)) {
4234 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4235 return;
4236 }
4237
4238 CLOBBER_ADD;
4239 s=readreg(s,2);
4240 d=rmw(d,2,2);
4241
4242 raw_add_w(d,s);
4243 unlock2(d);
4244 unlock2(s);
4245 }
4246 MENDFUNC(2,add_w,(RW2 d, R2 s))
4247
4248 MIDFUNC(2,add_b,(RW1 d, R1 s))
4249 {
4250 if (isconst(s)) {
4251 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4252 return;
4253 }
4254
4255 CLOBBER_ADD;
4256 s=readreg(s,1);
4257 d=rmw(d,1,1);
4258
4259 raw_add_b(d,s);
4260 unlock2(d);
4261 unlock2(s);
4262 }
4263 MENDFUNC(2,add_b,(RW1 d, R1 s))
4264
4265 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4266 {
4267 if (!i && !needflags)
4268 return;
4269 if (isconst(d) && !needflags) {
4270 live.state[d].val-=i;
4271 return;
4272 }
4273 #if USE_OFFSET
4274 if (!needflags) {
4275 add_offset(d,-i);
4276 return;
4277 }
4278 #endif
4279
4280 CLOBBER_SUB;
4281 d=rmw(d,4,4);
4282
4283 raw_sub_l_ri(d,i);
4284 unlock2(d);
4285 }
4286 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4287
4288 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4289 {
4290 if (!i && !needflags)
4291 return;
4292
4293 CLOBBER_SUB;
4294 d=rmw(d,2,2);
4295
4296 raw_sub_w_ri(d,i);
4297 unlock2(d);
4298 }
4299 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4300
4301 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4302 {
4303 if (!i && !needflags)
4304 return;
4305
4306 CLOBBER_SUB;
4307 d=rmw(d,1,1);
4308
4309 raw_sub_b_ri(d,i);
4310
4311 unlock2(d);
4312 }
4313 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4314
4315 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4316 {
4317 if (!i && !needflags)
4318 return;
4319 if (isconst(d) && !needflags) {
4320 live.state[d].val+=i;
4321 return;
4322 }
4323 #if USE_OFFSET
4324 if (!needflags) {
4325 add_offset(d,i);
4326 return;
4327 }
4328 #endif
4329 CLOBBER_ADD;
4330 d=rmw(d,4,4);
4331 raw_add_l_ri(d,i);
4332 unlock2(d);
4333 }
4334 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4335
4336 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4337 {
4338 if (!i && !needflags)
4339 return;
4340
4341 CLOBBER_ADD;
4342 d=rmw(d,2,2);
4343
4344 raw_add_w_ri(d,i);
4345 unlock2(d);
4346 }
4347 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4348
4349 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4350 {
4351 if (!i && !needflags)
4352 return;
4353
4354 CLOBBER_ADD;
4355 d=rmw(d,1,1);
4356
4357 raw_add_b_ri(d,i);
4358
4359 unlock2(d);
4360 }
4361 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4362
4363 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4364 {
4365 CLOBBER_SBB;
4366 s=readreg(s,4);
4367 d=rmw(d,4,4);
4368
4369 raw_sbb_l(d,s);
4370 unlock2(d);
4371 unlock2(s);
4372 }
4373 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4374
4375 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4376 {
4377 CLOBBER_SBB;
4378 s=readreg(s,2);
4379 d=rmw(d,2,2);
4380
4381 raw_sbb_w(d,s);
4382 unlock2(d);
4383 unlock2(s);
4384 }
4385 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4386
4387 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4388 {
4389 CLOBBER_SBB;
4390 s=readreg(s,1);
4391 d=rmw(d,1,1);
4392
4393 raw_sbb_b(d,s);
4394 unlock2(d);
4395 unlock2(s);
4396 }
4397 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4398
4399 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4400 {
4401 if (isconst(s)) {
4402 COMPCALL(sub_l_ri)(d,live.state[s].val);
4403 return;
4404 }
4405
4406 CLOBBER_SUB;
4407 s=readreg(s,4);
4408 d=rmw(d,4,4);
4409
4410 raw_sub_l(d,s);
4411 unlock2(d);
4412 unlock2(s);
4413 }
4414 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4415
4416 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4417 {
4418 if (isconst(s)) {
4419 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4420 return;
4421 }
4422
4423 CLOBBER_SUB;
4424 s=readreg(s,2);
4425 d=rmw(d,2,2);
4426
4427 raw_sub_w(d,s);
4428 unlock2(d);
4429 unlock2(s);
4430 }
4431 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4432
4433 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4434 {
4435 if (isconst(s)) {
4436 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4437 return;
4438 }
4439
4440 CLOBBER_SUB;
4441 s=readreg(s,1);
4442 d=rmw(d,1,1);
4443
4444 raw_sub_b(d,s);
4445 unlock2(d);
4446 unlock2(s);
4447 }
4448 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4449
4450 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4451 {
4452 CLOBBER_CMP;
4453 s=readreg(s,4);
4454 d=readreg(d,4);
4455
4456 raw_cmp_l(d,s);
4457 unlock2(d);
4458 unlock2(s);
4459 }
4460 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4461
4462 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4463 {
4464 CLOBBER_CMP;
4465 r=readreg(r,4);
4466
4467 raw_cmp_l_ri(r,i);
4468 unlock2(r);
4469 }
4470 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4471
4472 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4473 {
4474 CLOBBER_CMP;
4475 s=readreg(s,2);
4476 d=readreg(d,2);
4477
4478 raw_cmp_w(d,s);
4479 unlock2(d);
4480 unlock2(s);
4481 }
4482 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4483
4484 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4485 {
4486 CLOBBER_CMP;
4487 s=readreg(s,1);
4488 d=readreg(d,1);
4489
4490 raw_cmp_b(d,s);
4491 unlock2(d);
4492 unlock2(s);
4493 }
4494 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4495
4496
4497 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4498 {
4499 CLOBBER_XOR;
4500 s=readreg(s,4);
4501 d=rmw(d,4,4);
4502
4503 raw_xor_l(d,s);
4504 unlock2(d);
4505 unlock2(s);
4506 }
4507 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4508
4509 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4510 {
4511 CLOBBER_XOR;
4512 s=readreg(s,2);
4513 d=rmw(d,2,2);
4514
4515 raw_xor_w(d,s);
4516 unlock2(d);
4517 unlock2(s);
4518 }
4519 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4520
4521 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4522 {
4523 CLOBBER_XOR;
4524 s=readreg(s,1);
4525 d=rmw(d,1,1);
4526
4527 raw_xor_b(d,s);
4528 unlock2(d);
4529 unlock2(s);
4530 }
4531 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4532
4533 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4534 {
4535 clobber_flags();
4536 remove_all_offsets();
4537 if (osize==4) {
4538 if (out1!=in1 && out1!=r) {
4539 COMPCALL(forget_about)(out1);
4540 }
4541 }
4542 else {
4543 tomem_c(out1);
4544 }
4545
4546 in1=readreg_specific(in1,isize,REG_PAR1);
4547 r=readreg(r,4);
4548 prepare_for_call_1(); /* This should ensure that there won't be
4549 any need for swapping nregs in prepare_for_call_2
4550 */
4551 #if USE_NORMAL_CALLING_CONVENTION
4552 raw_push_l_r(in1);
4553 #endif
4554 unlock2(in1);
4555 unlock2(r);
4556
4557 prepare_for_call_2();
4558 raw_call_r(r);
4559
4560 #if USE_NORMAL_CALLING_CONVENTION
4561 raw_inc_sp(4);
4562 #endif
4563
4564
4565 live.nat[REG_RESULT].holds[0]=out1;
4566 live.nat[REG_RESULT].nholds=1;
4567 live.nat[REG_RESULT].touched=touchcnt++;
4568
4569 live.state[out1].realreg=REG_RESULT;
4570 live.state[out1].realind=0;
4571 live.state[out1].val=0;
4572 live.state[out1].validsize=osize;
4573 live.state[out1].dirtysize=osize;
4574 set_status(out1,DIRTY);
4575 }
4576 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4577
4578 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4579 {
4580 clobber_flags();
4581 remove_all_offsets();
4582 in1=readreg_specific(in1,isize1,REG_PAR1);
4583 in2=readreg_specific(in2,isize2,REG_PAR2);
4584 r=readreg(r,4);
4585 prepare_for_call_1(); /* This should ensure that there won't be
4586 any need for swapping nregs in prepare_for_call_2
4587 */
4588 #if USE_NORMAL_CALLING_CONVENTION
4589 raw_push_l_r(in2);
4590 raw_push_l_r(in1);
4591 #endif
4592 unlock2(r);
4593 unlock2(in1);
4594 unlock2(in2);
4595 prepare_for_call_2();
4596 raw_call_r(r);
4597 #if USE_NORMAL_CALLING_CONVENTION
4598 raw_inc_sp(8);
4599 #endif
4600 }
4601 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4602
4603 /* forget_about() takes a mid-layer register */
4604 MIDFUNC(1,forget_about,(W4 r))
4605 {
4606 if (isinreg(r))
4607 disassociate(r);
4608 live.state[r].val=0;
4609 set_status(r,UNDEF);
4610 }
4611 MENDFUNC(1,forget_about,(W4 r))
4612
4613 MIDFUNC(0,nop,(void))
4614 {
4615 raw_nop();
4616 }
4617 MENDFUNC(0,nop,(void))
4618
4619
4620 MIDFUNC(1,f_forget_about,(FW r))
4621 {
4622 if (f_isinreg(r))
4623 f_disassociate(r);
4624 live.fate[r].status=UNDEF;
4625 }
4626 MENDFUNC(1,f_forget_about,(FW r))
4627
4628 MIDFUNC(1,fmov_pi,(FW r))
4629 {
4630 r=f_writereg(r);
4631 raw_fmov_pi(r);
4632 f_unlock(r);
4633 }
4634 MENDFUNC(1,fmov_pi,(FW r))
4635
4636 MIDFUNC(1,fmov_log10_2,(FW r))
4637 {
4638 r=f_writereg(r);
4639 raw_fmov_log10_2(r);
4640 f_unlock(r);
4641 }
4642 MENDFUNC(1,fmov_log10_2,(FW r))
4643
4644 MIDFUNC(1,fmov_log2_e,(FW r))
4645 {
4646 r=f_writereg(r);
4647 raw_fmov_log2_e(r);
4648 f_unlock(r);
4649 }
4650 MENDFUNC(1,fmov_log2_e,(FW r))
4651
4652 MIDFUNC(1,fmov_loge_2,(FW r))
4653 {
4654 r=f_writereg(r);
4655 raw_fmov_loge_2(r);
4656 f_unlock(r);
4657 }
4658 MENDFUNC(1,fmov_loge_2,(FW r))
4659
4660 MIDFUNC(1,fmov_1,(FW r))
4661 {
4662 r=f_writereg(r);
4663 raw_fmov_1(r);
4664 f_unlock(r);
4665 }
4666 MENDFUNC(1,fmov_1,(FW r))
4667
4668 MIDFUNC(1,fmov_0,(FW r))
4669 {
4670 r=f_writereg(r);
4671 raw_fmov_0(r);
4672 f_unlock(r);
4673 }
4674 MENDFUNC(1,fmov_0,(FW r))
4675
4676 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4677 {
4678 r=f_writereg(r);
4679 raw_fmov_rm(r,m);
4680 f_unlock(r);
4681 }
4682 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4683
4684 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4685 {
4686 r=f_writereg(r);
4687 raw_fmovi_rm(r,m);
4688 f_unlock(r);
4689 }
4690 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4691
4692 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4693 {
4694 r=f_readreg(r);
4695 raw_fmovi_mr(m,r);
4696 f_unlock(r);
4697 }
4698 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4699
4700 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4701 {
4702 r=f_writereg(r);
4703 raw_fmovs_rm(r,m);
4704 f_unlock(r);
4705 }
4706 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4707
4708 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4709 {
4710 r=f_readreg(r);
4711 raw_fmovs_mr(m,r);
4712 f_unlock(r);
4713 }
4714 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4715
4716 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4717 {
4718 r=f_readreg(r);
4719 raw_fmov_ext_mr(m,r);
4720 f_unlock(r);
4721 }
4722 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4723
4724 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4725 {
4726 r=f_readreg(r);
4727 raw_fmov_mr(m,r);
4728 f_unlock(r);
4729 }
4730 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4731
4732 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4733 {
4734 r=f_writereg(r);
4735 raw_fmov_ext_rm(r,m);
4736 f_unlock(r);
4737 }
4738 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4739
4740 MIDFUNC(2,fmov_rr,(FW d, FR s))
4741 {
4742 if (d==s) { /* How pointless! */
4743 return;
4744 }
4745 #if USE_F_ALIAS
4746 f_disassociate(d);
4747 s=f_readreg(s);
4748 live.fate[d].realreg=s;
4749 live.fate[d].realind=live.fat[s].nholds;
4750 live.fate[d].status=DIRTY;
4751 live.fat[s].holds[live.fat[s].nholds]=d;
4752 live.fat[s].nholds++;
4753 f_unlock(s);
4754 #else
4755 s=f_readreg(s);
4756 d=f_writereg(d);
4757 raw_fmov_rr(d,s);
4758 f_unlock(s);
4759 f_unlock(d);
4760 #endif
4761 }
4762 MENDFUNC(2,fmov_rr,(FW d, FR s))
4763
4764 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4765 {
4766 index=readreg(index,4);
4767
4768 raw_fldcw_m_indexed(index,base);
4769 unlock2(index);
4770 }
4771 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4772
4773 MIDFUNC(1,ftst_r,(FR r))
4774 {
4775 r=f_readreg(r);
4776 raw_ftst_r(r);
4777 f_unlock(r);
4778 }
4779 MENDFUNC(1,ftst_r,(FR r))
4780
4781 MIDFUNC(0,dont_care_fflags,(void))
4782 {
4783 f_disassociate(FP_RESULT);
4784 }
4785 MENDFUNC(0,dont_care_fflags,(void))
4786
4787 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4788 {
4789 s=f_readreg(s);
4790 d=f_writereg(d);
4791 raw_fsqrt_rr(d,s);
4792 f_unlock(s);
4793 f_unlock(d);
4794 }
4795 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4796
4797 MIDFUNC(2,fabs_rr,(FW d, FR s))
4798 {
4799 s=f_readreg(s);
4800 d=f_writereg(d);
4801 raw_fabs_rr(d,s);
4802 f_unlock(s);
4803 f_unlock(d);
4804 }
4805 MENDFUNC(2,fabs_rr,(FW d, FR s))
4806
4807 MIDFUNC(2,fsin_rr,(FW d, FR s))
4808 {
4809 s=f_readreg(s);
4810 d=f_writereg(d);
4811 raw_fsin_rr(d,s);
4812 f_unlock(s);
4813 f_unlock(d);
4814 }
4815 MENDFUNC(2,fsin_rr,(FW d, FR s))
4816
4817 MIDFUNC(2,fcos_rr,(FW d, FR s))
4818 {
4819 s=f_readreg(s);
4820 d=f_writereg(d);
4821 raw_fcos_rr(d,s);
4822 f_unlock(s);
4823 f_unlock(d);
4824 }
4825 MENDFUNC(2,fcos_rr,(FW d, FR s))
4826
4827 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4828 {
4829 s=f_readreg(s);
4830 d=f_writereg(d);
4831 raw_ftwotox_rr(d,s);
4832 f_unlock(s);
4833 f_unlock(d);
4834 }
4835 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4836
4837 MIDFUNC(2,fetox_rr,(FW d, FR s))
4838 {
4839 s=f_readreg(s);
4840 d=f_writereg(d);
4841 raw_fetox_rr(d,s);
4842 f_unlock(s);
4843 f_unlock(d);
4844 }
4845 MENDFUNC(2,fetox_rr,(FW d, FR s))
4846
4847 MIDFUNC(2,frndint_rr,(FW d, FR s))
4848 {
4849 s=f_readreg(s);
4850 d=f_writereg(d);
4851 raw_frndint_rr(d,s);
4852 f_unlock(s);
4853 f_unlock(d);
4854 }
4855 MENDFUNC(2,frndint_rr,(FW d, FR s))
4856
4857 MIDFUNC(2,flog2_rr,(FW d, FR s))
4858 {
4859 s=f_readreg(s);
4860 d=f_writereg(d);
4861 raw_flog2_rr(d,s);
4862 f_unlock(s);
4863 f_unlock(d);
4864 }
4865 MENDFUNC(2,flog2_rr,(FW d, FR s))
4866
4867 MIDFUNC(2,fneg_rr,(FW d, FR s))
4868 {
4869 s=f_readreg(s);
4870 d=f_writereg(d);
4871 raw_fneg_rr(d,s);
4872 f_unlock(s);
4873 f_unlock(d);
4874 }
4875 MENDFUNC(2,fneg_rr,(FW d, FR s))
4876
4877 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4878 {
4879 s=f_readreg(s);
4880 d=f_rmw(d);
4881 raw_fadd_rr(d,s);
4882 f_unlock(s);
4883 f_unlock(d);
4884 }
4885 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4886
4887 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4888 {
4889 s=f_readreg(s);
4890 d=f_rmw(d);
4891 raw_fsub_rr(d,s);
4892 f_unlock(s);
4893 f_unlock(d);
4894 }
4895 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4896
4897 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4898 {
4899 d=f_readreg(d);
4900 s=f_readreg(s);
4901 raw_fcmp_rr(d,s);
4902 f_unlock(s);
4903 f_unlock(d);
4904 }
4905 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4906
4907 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4908 {
4909 s=f_readreg(s);
4910 d=f_rmw(d);
4911 raw_fdiv_rr(d,s);
4912 f_unlock(s);
4913 f_unlock(d);
4914 }
4915 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4916
4917 MIDFUNC(2,frem_rr,(FRW d, FR s))
4918 {
4919 s=f_readreg(s);
4920 d=f_rmw(d);
4921 raw_frem_rr(d,s);
4922 f_unlock(s);
4923 f_unlock(d);
4924 }
4925 MENDFUNC(2,frem_rr,(FRW d, FR s))
4926
4927 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4928 {
4929 s=f_readreg(s);
4930 d=f_rmw(d);
4931 raw_frem1_rr(d,s);
4932 f_unlock(s);
4933 f_unlock(d);
4934 }
4935 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4936
4937 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4938 {
4939 s=f_readreg(s);
4940 d=f_rmw(d);
4941 raw_fmul_rr(d,s);
4942 f_unlock(s);
4943 f_unlock(d);
4944 }
4945 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4946
4947 /********************************************************************
4948 * Support functions exposed to gencomp. CREATE time *
4949 ********************************************************************/
4950
4951 void set_zero(int r, int tmp)
4952 {
4953 if (setzflg_uses_bsf)
4954 bsf_l_rr(r,r);
4955 else
4956 simulate_bsf(tmp,r);
4957 }
4958
4959 int kill_rodent(int r)
4960 {
4961 return KILLTHERAT &&
4962 have_rat_stall &&
4963 (live.state[r].status==INMEM ||
4964 live.state[r].status==CLEAN ||
4965 live.state[r].status==ISCONST ||
4966 live.state[r].dirtysize==4);
4967 }
4968
4969 uae_u32 get_const(int r)
4970 {
4971 Dif (!isconst(r)) {
4972 write_log("Register %d should be constant, but isn't\n",r);
4973 abort();
4974 }
4975 return live.state[r].val;
4976 }
4977
4978 void sync_m68k_pc(void)
4979 {
4980 if (m68k_pc_offset) {
4981 add_l_ri(PC_P,m68k_pc_offset);
4982 comp_pc_p+=m68k_pc_offset;
4983 m68k_pc_offset=0;
4984 }
4985 }
4986
4987 /********************************************************************
4988 * Scratch registers management *
4989 ********************************************************************/
4990
4991 struct scratch_t {
4992 uae_u32 regs[VREGS];
4993 fpu_register fregs[VFREGS];
4994 };
4995
4996 static scratch_t scratch;
4997
4998 /********************************************************************
4999 * Support functions exposed to newcpu *
5000 ********************************************************************/
5001
5002 static inline const char *str_on_off(bool b)
5003 {
5004 return b ? "on" : "off";
5005 }
5006
5007 void compiler_init(void)
5008 {
5009 static bool initialized = false;
5010 if (initialized)
5011 return;
5012
5013 #if JIT_DEBUG
5014 // JIT debug mode ?
5015 JITDebug = PrefsFindBool("jitdebug");
5016 #endif
5017 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
5018
5019 #ifdef USE_JIT_FPU
5020 // Use JIT compiler for FPU instructions ?
5021 avoid_fpu = !PrefsFindBool("jitfpu");
5022 #else
5023 // JIT FPU is always disabled
5024 avoid_fpu = true;
5025 #endif
5026 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5027
5028 // Get size of the translation cache (in KB)
5029 cache_size = PrefsFindInt32("jitcachesize");
5030 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5031
5032 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5033 raw_init_cpu();
5034 setzflg_uses_bsf = target_check_bsf();
5035 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5036 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5037 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5038
5039 // Translation cache flush mechanism
5040 lazy_flush = PrefsFindBool("jitlazyflush");
5041 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5042 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5043
5044 // Compiler features
5045 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5046 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5047 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5048 #if USE_INLINING
5049 follow_const_jumps = PrefsFindBool("jitinline");
5050 #endif
5051 write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5052 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5053
5054 // Build compiler tables
5055 build_comp();
5056
5057 initialized = true;
5058
5059 #if PROFILE_UNTRANSLATED_INSNS
5060 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5061 #endif
5062
5063 #if PROFILE_COMPILE_TIME
5064 write_log("<JIT compiler> : gather statistics on translation time\n");
5065 emul_start_time = clock();
5066 #endif
5067 }
5068
5069 void compiler_exit(void)
5070 {
5071 #if PROFILE_COMPILE_TIME
5072 emul_end_time = clock();
5073 #endif
5074
5075 // Deallocate translation cache
5076 if (compiled_code) {
5077 vm_release(compiled_code, cache_size * 1024);
5078 compiled_code = 0;
5079 }
5080
5081 // Deallocate popallspace
5082 if (popallspace) {
5083 vm_release(popallspace, POPALLSPACE_SIZE);
5084 popallspace = 0;
5085 }
5086
5087 #if PROFILE_COMPILE_TIME
5088 write_log("### Compile Block statistics\n");
5089 write_log("Number of calls to compile_block : %d\n", compile_count);
5090 uae_u32 emul_time = emul_end_time - emul_start_time;
5091 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5092 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5093 100.0*double(compile_time)/double(emul_time));
5094 write_log("\n");
5095 #endif
5096
5097 #if PROFILE_UNTRANSLATED_INSNS
5098 uae_u64 untranslated_count = 0;
5099 for (int i = 0; i < 65536; i++) {
5100 opcode_nums[i] = i;
5101 untranslated_count += raw_cputbl_count[i];
5102 }
5103 write_log("Sorting out untranslated instructions count...\n");
5104 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5105 write_log("\nRank Opc Count Name\n");
5106 for (int i = 0; i < untranslated_top_ten; i++) {
5107 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5108 struct instr *dp;
5109 struct mnemolookup *lookup;
5110 if (!count)
5111 break;
5112 dp = table68k + opcode_nums[i];
5113 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5114 ;
5115 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5116 }
5117 #endif
5118
5119 #if RECORD_REGISTER_USAGE
5120 int reg_count_ids[16];
5121 uint64 tot_reg_count = 0;
5122 for (int i = 0; i < 16; i++) {
5123 reg_count_ids[i] = i;
5124 tot_reg_count += reg_count[i];
5125 }
5126 qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5127 uint64 cum_reg_count = 0;
5128 for (int i = 0; i < 16; i++) {
5129 int r = reg_count_ids[i];
5130 cum_reg_count += reg_count[r];
5131 printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5132 reg_count[r],
5133 100.0*double(reg_count[r])/double(tot_reg_count),
5134 100.0*double(cum_reg_count)/double(tot_reg_count));
5135 }
5136 #endif
5137 }
5138
5139 bool compiler_use_jit(void)
5140 {
5141 // Check for the "jit" prefs item
5142 if (!PrefsFindBool("jit"))
5143 return false;
5144
5145 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5146 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5147 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5148 return false;
5149 }
5150
5151 // Enable JIT for 68020+ emulation only
5152 if (CPUType < 2) {
5153 write_log("<JIT compiler> : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType);
5154 return false;
5155 }
5156
5157 return true;
5158 }
5159
5160 void init_comp(void)
5161 {
5162 int i;
5163 uae_s8* cb=can_byte;
5164 uae_s8* cw=can_word;
5165 uae_s8* au=always_used;
5166
5167 #if RECORD_REGISTER_USAGE
5168 for (i=0;i<16;i++)
5169 reg_count_local[i] = 0;
5170 #endif
5171
5172 for (i=0;i<VREGS;i++) {
5173 live.state[i].realreg=-1;
5174 live.state[i].needflush=NF_SCRATCH;
5175 live.state[i].val=0;
5176 set_status(i,UNDEF);
5177 }
5178
5179 for (i=0;i<VFREGS;i++) {
5180 live.fate[i].status=UNDEF;
5181 live.fate[i].realreg=-1;
5182 live.fate[i].needflush=NF_SCRATCH;
5183 }
5184
5185 for (i=0;i<VREGS;i++) {
5186 if (i<16) { /* First 16 registers map to 68k registers */
5187 live.state[i].mem=((uae_u32*)&regs)+i;
5188 live.state[i].needflush=NF_TOMEM;
5189 set_status(i,INMEM);
5190 }
5191 else
5192 live.state[i].mem=scratch.regs+i;
5193 }
5194 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5195 live.state[PC_P].needflush=NF_TOMEM;
5196 set_const(PC_P,(uintptr)comp_pc_p);
5197
5198 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5199 live.state[FLAGX].needflush=NF_TOMEM;
5200 set_status(FLAGX,INMEM);
5201
5202 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5203 live.state[FLAGTMP].needflush=NF_TOMEM;
5204 set_status(FLAGTMP,INMEM);
5205
5206 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5207 set_status(NEXT_HANDLER,UNDEF);
5208
5209 for (i=0;i<VFREGS;i++) {
5210 if (i<8) { /* First 8 registers map to 68k FPU registers */
5211 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5212 live.fate[i].needflush=NF_TOMEM;
5213 live.fate[i].status=INMEM;
5214 }
5215 else if (i==FP_RESULT) {
5216 live.fate[i].mem=(uae_u32*)(&fpu.result);
5217 live.fate[i].needflush=NF_TOMEM;
5218 live.fate[i].status=INMEM;
5219 }
5220 else
5221 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5222 }
5223
5224
5225 for (i=0;i<N_REGS;i++) {
5226 live.nat[i].touched=0;
5227 live.nat[i].nholds=0;
5228 live.nat[i].locked=0;
5229 if (*cb==i) {
5230 live.nat[i].canbyte=1; cb++;
5231 } else live.nat[i].canbyte=0;
5232 if (*cw==i) {
5233 live.nat[i].canword=1; cw++;
5234 } else live.nat[i].canword=0;
5235 if (*au==i) {
5236 live.nat[i].locked=1; au++;
5237 }
5238 }
5239
5240 for (i=0;i<N_FREGS;i++) {
5241 live.fat[i].touched=0;
5242 live.fat[i].nholds=0;
5243 live.fat[i].locked=0;
5244 }
5245
5246 touchcnt=1;
5247 m68k_pc_offset=0;
5248 live.flags_in_flags=TRASH;
5249 live.flags_on_stack=VALID;
5250 live.flags_are_important=1;
5251
5252 raw_fp_init();
5253 }
5254
5255 /* Only do this if you really mean it! The next call should be to init!*/
5256 void flush(int save_regs)
5257 {
5258 int fi,i;
5259
5260 log_flush();
5261 flush_flags(); /* low level */
5262 sync_m68k_pc(); /* mid level */
5263
5264 if (save_regs) {
5265 for (i=0;i<VFREGS;i++) {
5266 if (live.fate[i].needflush==NF_SCRATCH ||
5267 live.fate[i].status==CLEAN) {
5268 f_disassociate(i);
5269 }
5270 }
5271 for (i=0;i<VREGS;i++) {
5272 if (live.state[i].needflush==NF_TOMEM) {
5273 switch(live.state[i].status) {
5274 case INMEM:
5275 if (live.state[i].val) {
5276 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5277 log_vwrite(i);
5278 live.state[i].val=0;
5279 }
5280 break;
5281 case CLEAN:
5282 case DIRTY:
5283 remove_offset(i,-1); tomem(i); break;
5284 case ISCONST:
5285 if (i!=PC_P)
5286 writeback_const(i);
5287 break;
5288 default: break;
5289 }
5290 Dif (live.state[i].val && i!=PC_P) {
5291 write_log("Register %d still has val %x\n",
5292 i,live.state[i].val);
5293 }
5294 }
5295 }
5296 for (i=0;i<VFREGS;i++) {
5297 if (live.fate[i].needflush==NF_TOMEM &&
5298 live.fate[i].status==DIRTY) {
5299 f_evict(i);
5300 }
5301 }
5302 raw_fp_cleanup_drop();
5303 }
5304 if (needflags) {
5305 write_log("Warning! flush with needflags=1!\n");
5306 }
5307 }
5308
5309 static void flush_keepflags(void)
5310 {
5311 int fi,i;
5312
5313 for (i=0;i<VFREGS;i++) {
5314 if (live.fate[i].needflush==NF_SCRATCH ||
5315 live.fate[i].status==CLEAN) {
5316 f_disassociate(i);
5317 }
5318 }
5319 for (i=0;i<VREGS;i++) {
5320 if (live.state[i].needflush==NF_TOMEM) {
5321 switch(live.state[i].status) {
5322 case INMEM:
5323 /* Can't adjust the offset here --- that needs "add" */
5324 break;
5325 case CLEAN:
5326 case DIRTY:
5327 remove_offset(i,-1); tomem(i); break;
5328 case ISCONST:
5329 if (i!=PC_P)
5330 writeback_const(i);
5331 break;
5332 default: break;
5333 }
5334 }
5335 }
5336 for (i=0;i<VFREGS;i++) {
5337 if (live.fate[i].needflush==NF_TOMEM &&
5338 live.fate[i].status==DIRTY) {
5339 f_evict(i);
5340 }
5341 }
5342 raw_fp_cleanup_drop();
5343 }
5344
5345 void freescratch(void)
5346 {
5347 int i;
5348 for (i=0;i<N_REGS;i++)
5349 if (live.nat[i].locked && i!=4)
5350 write_log("Warning! %d is locked\n",i);
5351
5352 for (i=0;i<VREGS;i++)
5353 if (live.state[i].needflush==NF_SCRATCH) {
5354 forget_about(i);
5355 }
5356
5357 for (i=0;i<VFREGS;i++)
5358 if (live.fate[i].needflush==NF_SCRATCH) {
5359 f_forget_about(i);
5360 }
5361 }
5362
5363 /********************************************************************
5364 * Support functions, internal *
5365 ********************************************************************/
5366
5367
5368 static void align_target(uae_u32 a)
5369 {
5370 if (!a)
5371 return;
5372
5373 if (tune_nop_fillers)
5374 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5375 else {
5376 /* Fill with NOPs --- makes debugging with gdb easier */
5377 while ((uintptr)target&(a-1))
5378 *target++=0x90;
5379 }
5380 }
5381
5382 static __inline__ int isinrom(uintptr addr)
5383 {
5384 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5385 }
5386
5387 static void flush_all(void)
5388 {
5389 int i;
5390
5391 log_flush();
5392 for (i=0;i<VREGS;i++)
5393 if (live.state[i].status==DIRTY) {
5394 if (!call_saved[live.state[i].realreg]) {
5395 tomem(i);
5396 }
5397 }
5398 for (i=0;i<VFREGS;i++)
5399 if (f_isinreg(i))
5400 f_evict(i);
5401 raw_fp_cleanup_drop();
5402 }
5403
5404 /* Make sure all registers that will get clobbered by a call are
5405 save and sound in memory */
5406 static void prepare_for_call_1(void)
5407 {
5408 flush_all(); /* If there are registers that don't get clobbered,
5409 * we should be a bit more selective here */
5410 }
5411
5412 /* We will call a C routine in a moment. That will clobber all registers,
5413 so we need to disassociate everything */
5414 static void prepare_for_call_2(void)
5415 {
5416 int i;
5417 for (i=0;i<N_REGS;i++)
5418 if (!call_saved[i] && live.nat[i].nholds>0)
5419 free_nreg(i);
5420
5421 for (i=0;i<N_FREGS;i++)
5422 if (live.fat[i].nholds>0)
5423 f_free_nreg(i);
5424
5425 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5426 flags at the very start of the call_r
5427 functions! */
5428 }
5429
5430 /********************************************************************
5431 * Memory access and related functions, CREATE time *
5432 ********************************************************************/
5433
5434 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5435 {
5436 next_pc_p=not_taken;
5437 taken_pc_p=taken;
5438 branch_cc=cond;
5439 }
5440
5441
5442 static uae_u32 get_handler_address(uae_u32 addr)
5443 {
5444 uae_u32 cl=cacheline(addr);
5445 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5446 return (uintptr)&(bi->direct_handler_to_use);
5447 }
5448
5449 static uae_u32 get_handler(uae_u32 addr)
5450 {
5451 uae_u32 cl=cacheline(addr);
5452 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5453 return (uintptr)bi->direct_handler_to_use;
5454 }
5455
5456 static void load_handler(int reg, uae_u32 addr)
5457 {
5458 mov_l_rm(reg,get_handler_address(addr));
5459 }
5460
5461 /* This version assumes that it is writing *real* memory, and *will* fail
5462 * if that assumption is wrong! No branches, no second chances, just
5463 * straight go-for-it attitude */
5464
5465 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5466 {
5467 int f=tmp;
5468
5469 if (clobber)
5470 f=source;
5471
5472 switch(size) {
5473 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5474 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5475 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5476 }
5477 forget_about(tmp);
5478 forget_about(f);
5479 }
5480
5481 void writebyte(int address, int source, int tmp)
5482 {
5483 writemem_real(address,source,1,tmp,0);
5484 }
5485
5486 static __inline__ void writeword_general(int address, int source, int tmp,
5487 int clobber)
5488 {
5489 writemem_real(address,source,2,tmp,clobber);
5490 }
5491
5492 void writeword_clobber(int address, int source, int tmp)
5493 {
5494 writeword_general(address,source,tmp,1);
5495 }
5496
5497 void writeword(int address, int source, int tmp)
5498 {
5499 writeword_general(address,source,tmp,0);
5500 }
5501
5502 static __inline__ void writelong_general(int address, int source, int tmp,
5503 int clobber)
5504 {
5505 writemem_real(address,source,4,tmp,clobber);
5506 }
5507
5508 void writelong_clobber(int address, int source, int tmp)
5509 {
5510 writelong_general(address,source,tmp,1);
5511 }
5512
5513 void writelong(int address, int source, int tmp)
5514 {
5515 writelong_general(address,source,tmp,0);
5516 }
5517
5518
5519
5520 /* This version assumes that it is reading *real* memory, and *will* fail
5521 * if that assumption is wrong! No branches, no second chances, just
5522 * straight go-for-it attitude */
5523
5524 static void readmem_real(int address, int dest, int size, int tmp)
5525 {
5526 int f=tmp;
5527
5528 if (size==4 && address!=dest)
5529 f=dest;
5530
5531 switch(size) {
5532 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5533 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5534 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5535 }
5536 forget_about(tmp);
5537 }
5538
5539 void readbyte(int address, int dest, int tmp)
5540 {
5541 readmem_real(address,dest,1,tmp);
5542 }
5543
5544 void readword(int address, int dest, int tmp)
5545 {
5546 readmem_real(address,dest,2,tmp);
5547 }
5548
5549 void readlong(int address, int dest, int tmp)
5550 {
5551 readmem_real(address,dest,4,tmp);
5552 }
5553
5554 void get_n_addr(int address, int dest, int tmp)
5555 {
5556 // a is the register containing the virtual address
5557 // after the offset had been fetched
5558 int a=tmp;
5559
5560 // f is the register that will contain the offset
5561 int f=tmp;
5562
5563 // a == f == tmp if (address == dest)
5564 if (address!=dest) {
5565 a=address;
5566 f=dest;
5567 }
5568
5569 #if REAL_ADDRESSING
5570 mov_l_rr(dest, address);
5571 #elif DIRECT_ADDRESSING
5572 lea_l_brr(dest,address,MEMBaseDiff);
5573 #endif
5574 forget_about(tmp);
5575 }
5576
5577 void get_n_addr_jmp(int address, int dest, int tmp)
5578 {
5579 /* For this, we need to get the same address as the rest of UAE
5580 would --- otherwise we end up translating everything twice */
5581 get_n_addr(address,dest,tmp);
5582 }
5583
5584
5585 /* base is a register, but dp is an actual value.
5586 target is a register, as is tmp */
5587 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5588 {
5589 int reg = (dp >> 12) & 15;
5590 int regd_shift=(dp >> 9) & 3;
5591
5592 if (dp & 0x100) {
5593 int ignorebase=(dp&0x80);
5594 int ignorereg=(dp&0x40);
5595 int addbase=0;
5596 int outer=0;
5597
5598 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5599 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5600
5601 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5602 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5603
5604 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5605 if (!ignorereg) {
5606 if ((dp & 0x800) == 0)
5607 sign_extend_16_rr(target,reg);
5608 else
5609 mov_l_rr(target,reg);
5610 shll_l_ri(target,regd_shift);
5611 }
5612 else
5613 mov_l_ri(target,0);
5614
5615 /* target is now regd */
5616 if (!ignorebase)
5617 add_l(target,base);
5618 add_l_ri(target,addbase);
5619 if (dp&0x03) readlong(target,target,tmp);
5620 } else { /* do the getlong first, then add regd */
5621 if (!ignorebase) {
5622 mov_l_rr(target,base);
5623 add_l_ri(target,addbase);
5624 }
5625 else
5626 mov_l_ri(target,addbase);
5627 if (dp&0x03) readlong(target,target,tmp);
5628
5629 if (!ignorereg) {
5630 if ((dp & 0x800) == 0)
5631 sign_extend_16_rr(tmp,reg);
5632 else
5633 mov_l_rr(tmp,reg);
5634 shll_l_ri(tmp,regd_shift);
5635 /* tmp is now regd */
5636 add_l(target,tmp);
5637 }
5638 }
5639 add_l_ri(target,outer);
5640 }
5641 else { /* 68000 version */
5642 if ((dp & 0x800) == 0) { /* Sign extend */
5643 sign_extend_16_rr(target,reg);
5644 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5645 }
5646 else {
5647 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5648 }
5649 }
5650 forget_about(tmp);
5651 }
5652
5653
5654
5655
5656
5657 void set_cache_state(int enabled)
5658 {
5659 if (enabled!=letit)
5660 flush_icache_hard(77);
5661 letit=enabled;
5662 }
5663
5664 int get_cache_state(void)
5665 {
5666 return letit;
5667 }
5668
5669 uae_u32 get_jitted_size(void)
5670 {
5671 if (compiled_code)
5672 return current_compile_p-compiled_code;
5673 return 0;
5674 }
5675
5676 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5677 const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5678
5679 static uint8 *do_alloc_code(uint32 size, int depth)
5680 {
5681 #if defined(__linux__) && 0
5682 /*
5683 This is a really awful hack that is known to work on Linux at
5684 least.
5685
5686 The trick here is to make sure the allocated cache is nearby
5687 code segment, and more precisely in the positive half of a
5688 32-bit address space. i.e. addr < 0x80000000. Actually, it
5689 turned out that a 32-bit binary run on AMD64 yields a cache
5690 allocated around 0xa0000000, thus causing some troubles when
5691 translating addresses from m68k to x86.
5692 */
5693 static uint8 * code_base = NULL;
5694 if (code_base == NULL) {
5695 uintptr page_size = getpagesize();
5696 uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5697 if (boundaries < page_size)
5698 boundaries = page_size;
5699 code_base = (uint8 *)sbrk(0);
5700 for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5701 if (vm_acquire_fixed(code_base, size) == 0) {
5702 uint8 *code = code_base;
5703 code_base += size;
5704 return code;
5705 }
5706 code_base += boundaries;
5707 }
5708 return NULL;
5709 }
5710
5711 if (vm_acquire_fixed(code_base, size) == 0) {
5712 uint8 *code = code_base;
5713 code_base += size;
5714 return code;
5715 }
5716
5717 if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5718 return NULL;
5719
5720 return do_alloc_code(size, depth + 1);
5721 #else
5722 uint8 *code = (uint8 *)vm_acquire(size);
5723 return code == VM_MAP_FAILED ? NULL : code;
5724 #endif
5725 }
5726
5727 static inline uint8 *alloc_code(uint32 size)
5728 {
5729 uint8 *ptr = do_alloc_code(size, 0);
5730 /* allocated code must fit in 32-bit boundaries */
5731 assert((uintptr)ptr <= 0xffffffff);
5732 return ptr;
5733 }
5734
5735 void alloc_cache(void)
5736 {
5737 if (compiled_code) {
5738 flush_icache_hard(6);
5739 vm_release(compiled_code, cache_size * 1024);
5740 compiled_code = 0;
5741 }
5742
5743 if (cache_size == 0)
5744 return;
5745
5746 while (!compiled_code && cache_size) {
5747 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5748 compiled_code = 0;
5749 cache_size /= 2;
5750 }
5751 }
5752 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5753
5754 if (compiled_code) {
5755 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5756 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5757 current_compile_p = compiled_code;
5758 current_cache_size = 0;
5759 }
5760 }
5761
5762
5763
5764 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5765
5766 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5767 {
5768 uae_u32 k1 = 0;
5769 uae_u32 k2 = 0;
5770
5771 #if USE_CHECKSUM_INFO
5772 checksum_info *csi = bi->csi;
5773 Dif(!csi) abort();
5774 while (csi) {
5775 uae_s32 len = csi->length;
5776 uintptr tmp = (uintptr)csi->start_p;
5777 #else
5778 uae_s32 len = bi->len;
5779 uintptr tmp = (uintptr)bi->min_pcp;
5780 #endif
5781 uae_u32*pos;
5782
5783 len += (tmp & 3);
5784 tmp &= ~((uintptr)3);
5785 pos = (uae_u32 *)tmp;
5786
5787 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5788 while (len > 0) {
5789 k1 += *pos;
5790 k2 ^= *pos;
5791 pos++;
5792 len -= 4;
5793 }
5794 }
5795
5796 #if USE_CHECKSUM_INFO
5797 csi = csi->next;
5798 }
5799 #endif
5800
5801 *c1 = k1;
5802 *c2 = k2;
5803 }
5804
5805 #if 0
5806 static void show_checksum(CSI_TYPE* csi)
5807 {
5808 uae_u32 k1=0;
5809 uae_u32 k2=0;
5810 uae_s32 len=CSI_LENGTH(csi);
5811 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5812 uae_u32* pos;
5813
5814 len+=(tmp&3);
5815 tmp&=(~3);
5816 pos=(uae_u32*)tmp;
5817
5818 if (len<0 || len>MAX_CHECKSUM_LEN) {
5819 return;
5820 }
5821 else {
5822 while (len>0) {
5823 write_log("%08x ",*pos);
5824 pos++;
5825 len-=4;
5826 }
5827 write_log(" bla\n");
5828 }
5829 }
5830 #endif
5831
5832
5833 int check_for_cache_miss(void)
5834 {
5835 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5836
5837 if (bi) {
5838 int cl=cacheline(regs.pc_p);
5839 if (bi!=cache_tags[cl+1].bi) {
5840 raise_in_cl_list(bi);
5841 return 1;
5842 }
5843 }
5844 return 0;
5845 }
5846
5847
5848 static void recompile_block(void)
5849 {
5850 /* An existing block's countdown code has expired. We need to make
5851 sure that execute_normal doesn't refuse to recompile due to a
5852 perceived cache miss... */
5853 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5854
5855 Dif (!bi)
5856 abort();
5857 raise_in_cl_list(bi);
5858 execute_normal();
5859 return;
5860 }
5861 static void cache_miss(void)
5862 {
5863 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5864 uae_u32 cl=cacheline(regs.pc_p);
5865 blockinfo* bi2=get_blockinfo(cl);
5866
5867 if (!bi) {
5868 execute_normal(); /* Compile this block now */
5869 return;
5870 }
5871 Dif (!bi2 || bi==bi2) {
5872 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5873 abort();
5874 }
5875 raise_in_cl_list(bi);
5876 return;
5877 }
5878
5879 static int called_check_checksum(blockinfo* bi);
5880
5881 static inline int block_check_checksum(blockinfo* bi)
5882 {
5883 uae_u32 c1,c2;
5884 bool isgood;
5885
5886 if (bi->status!=BI_NEED_CHECK)
5887 return 1; /* This block is in a checked state */
5888
5889 checksum_count++;
5890
5891 if (bi->c1 || bi->c2)
5892 calc_checksum(bi,&c1,&c2);
5893 else {
5894 c1=c2=1; /* Make sure it doesn't match */
5895 }
5896
5897 isgood=(c1==bi->c1 && c2==bi->c2);
5898
5899 if (isgood) {
5900 /* This block is still OK. So we reactivate. Of course, that
5901 means we have to move it into the needs-to-be-flushed list */
5902 bi->handler_to_use=bi->handler;
5903 set_dhtu(bi,bi->direct_handler);
5904 bi->status=BI_CHECKING;
5905 isgood=called_check_checksum(bi);
5906 }
5907 if (isgood) {
5908 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5909 c1,c2,bi->c1,bi->c2);*/
5910 remove_from_list(bi);
5911 add_to_active(bi);
5912 raise_in_cl_list(bi);
5913 bi->status=BI_ACTIVE;
5914 }
5915 else {
5916 /* This block actually changed. We need to invalidate it,
5917 and set it up to be recompiled */
5918 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5919 c1,c2,bi->c1,bi->c2); */
5920 invalidate_block(bi);
5921 raise_in_cl_list(bi);
5922 }
5923 return isgood;
5924 }
5925
5926 static int called_check_checksum(blockinfo* bi)
5927 {
5928 dependency* x=bi->deplist;
5929 int isgood=1;
5930 int i;
5931
5932 for (i=0;i<2 && isgood;i++) {
5933 if (bi->dep[i].jmp_off) {
5934 isgood=block_check_checksum(bi->dep[i].target);
5935 }
5936 }
5937 return isgood;
5938 }
5939
5940 static void check_checksum(void)
5941 {
5942 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5943 uae_u32 cl=cacheline(regs.pc_p);
5944 blockinfo* bi2=get_blockinfo(cl);
5945
5946 /* These are not the droids you are looking for... */
5947 if (!bi) {
5948 /* Whoever is the primary target is in a dormant state, but
5949 calling it was accidental, and we should just compile this
5950 new block */
5951 execute_normal();
5952 return;
5953 }
5954 if (bi!=bi2) {
5955 /* The block was hit accidentally, but it does exist. Cache miss */
5956 cache_miss();
5957 return;
5958 }
5959
5960 if (!block_check_checksum(bi))
5961 execute_normal();
5962 }
5963
5964 static __inline__ void match_states(blockinfo* bi)
5965 {
5966 int i;
5967 smallstate* s=&(bi->env);
5968
5969 if (bi->status==BI_NEED_CHECK) {
5970 block_check_checksum(bi);
5971 }
5972 if (bi->status==BI_ACTIVE ||
5973 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5974 block makes (about not using
5975 certain vregs) */
5976 for (i=0;i<16;i++) {
5977 if (s->virt[i]==L_UNNEEDED) {
5978 // write_log("unneeded reg %d at %p\n",i,target);
5979 COMPCALL(forget_about)(i); // FIXME
5980 }
5981 }
5982 }
5983 flush(1);
5984
5985 /* And now deal with the *demands* the block makes */
5986 for (i=0;i<N_REGS;i++) {
5987 int v=s->nat[i];
5988 if (v>=0) {
5989 // printf("Loading reg %d into %d at %p\n",v,i,target);
5990 readreg_specific(v,4,i);
5991 // do_load_reg(i,v);
5992 // setlock(i);
5993 }
5994 }
5995 for (i=0;i<N_REGS;i++) {
5996 int v=s->nat[i];
5997 if (v>=0) {
5998 unlock2(i);
5999 }
6000 }
6001 }
6002
6003 static __inline__ void create_popalls(void)
6004 {
6005 int i,r;
6006
6007 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
6008 write_log("FATAL: Could not allocate popallspace!\n");
6009 abort();
6010 }
6011 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
6012
6013 int stack_space = STACK_OFFSET;
6014 for (i=0;i<N_REGS;i++) {
6015 if (need_to_preserve[i])
6016 stack_space += sizeof(void *);
6017 }
6018 stack_space %= STACK_ALIGN;
6019 if (stack_space)
6020 stack_space = STACK_ALIGN - stack_space;
6021
6022 current_compile_p=popallspace;
6023 set_target(current_compile_p);
6024
6025 /* We need to guarantee 16-byte stack alignment on x86 at any point
6026 within the JIT generated code. We have multiple exit points
6027 possible but a single entry. A "jmp" is used so that we don't
6028 have to generate stack alignment in generated code that has to
6029 call external functions (e.g. a generic instruction handler).
6030
6031 In summary, JIT generated code is not leaf so we have to deal
6032 with it here to maintain correct stack alignment. */
6033 align_target(align_jumps);
6034 current_compile_p=get_target();
6035 pushall_call_handler=get_target();
6036 for (i=N_REGS;i--;) {
6037 if (need_to_preserve[i])
6038 raw_push_l_r(i);
6039 }
6040 raw_dec_sp(stack_space);
6041 r=REG_PC_TMP;
6042 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6043 raw_and_l_ri(r,TAGMASK);
6044 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6045
6046 /* now the exit points */
6047 align_target(align_jumps);
6048 popall_do_nothing=get_target();
6049 raw_inc_sp(stack_space);
6050 for (i=0;i<N_REGS;i++) {
6051 if (need_to_preserve[i])
6052 raw_pop_l_r(i);
6053 }
6054 raw_jmp((uintptr)do_nothing);
6055
6056 align_target(align_jumps);
6057 popall_execute_normal=get_target();
6058 raw_inc_sp(stack_space);
6059 for (i=0;i<N_REGS;i++) {
6060 if (need_to_preserve[i])
6061 raw_pop_l_r(i);
6062 }
6063 raw_jmp((uintptr)execute_normal);
6064
6065 align_target(align_jumps);
6066 popall_cache_miss=get_target();
6067 raw_inc_sp(stack_space);
6068 for (i=0;i<N_REGS;i++) {
6069 if (need_to_preserve[i])
6070 raw_pop_l_r(i);
6071 }
6072 raw_jmp((uintptr)cache_miss);
6073
6074 align_target(align_jumps);
6075 popall_recompile_block=get_target();
6076 raw_inc_sp(stack_space);
6077 for (i=0;i<N_REGS;i++) {
6078 if (need_to_preserve[i])
6079 raw_pop_l_r(i);
6080 }
6081 raw_jmp((uintptr)recompile_block);
6082
6083 align_target(align_jumps);
6084 popall_exec_nostats=get_target();
6085 raw_inc_sp(stack_space);
6086 for (i=0;i<N_REGS;i++) {
6087 if (need_to_preserve[i])
6088 raw_pop_l_r(i);
6089 }
6090 raw_jmp((uintptr)exec_nostats);
6091
6092 align_target(align_jumps);
6093 popall_check_checksum=get_target();
6094 raw_inc_sp(stack_space);
6095 for (i=0;i<N_REGS;i++) {
6096 if (need_to_preserve[i])
6097 raw_pop_l_r(i);
6098 }
6099 raw_jmp((uintptr)check_checksum);
6100
6101 // no need to further write into popallspace
6102 vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6103 }
6104
6105 static __inline__ void reset_lists(void)
6106 {
6107 int i;
6108
6109 for (i=0;i<MAX_HOLD_BI;i++)
6110 hold_bi[i]=NULL;
6111 active=NULL;
6112 dormant=NULL;
6113 }
6114
6115 static void prepare_block(blockinfo* bi)
6116 {
6117 int i;
6118
6119 set_target(current_compile_p);
6120 align_target(align_jumps);
6121 bi->direct_pen=(cpuop_func *)get_target();
6122 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6123 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6124 raw_jmp((uintptr)popall_execute_normal);
6125
6126 align_target(align_jumps);
6127 bi->direct_pcc=(cpuop_func *)get_target();
6128 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6129 raw_mov_l_mr((uintptr)&regs.pc_p,0);
6130 raw_jmp((uintptr)popall_check_checksum);
6131 current_compile_p=get_target();
6132
6133 bi->deplist=NULL;
6134 for (i=0;i<2;i++) {
6135 bi->dep[i].prev_p=NULL;
6136 bi->dep[i].next=NULL;
6137 }
6138 bi->env=default_ss;
6139 bi->status=BI_INVALID;
6140 bi->havestate=0;
6141 //bi->env=empty_ss;
6142 }
6143
6144 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6145 static inline void reset_compop(int opcode)
6146 {
6147 compfunctbl[opcode] = NULL;
6148 nfcompfunctbl[opcode] = NULL;
6149 }
6150
6151 static int read_opcode(const char *p)
6152 {
6153 int opcode = 0;
6154 for (int i = 0; i < 4; i++) {
6155 int op = p[i];
6156 switch (op) {
6157 case '0': case '1': case '2': case '3': case '4':
6158 case '5': case '6': case '7': case '8': case '9':
6159 opcode = (opcode << 4) | (op - '0');
6160 break;
6161 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6162 opcode = (opcode << 4) | ((op - 'a') + 10);
6163 break;
6164 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6165 opcode = (opcode << 4) | ((op - 'A') + 10);
6166 break;
6167 default:
6168 return -1;
6169 }
6170 }
6171 return opcode;
6172 }
6173
6174 static bool merge_blacklist()
6175 {
6176 const char *blacklist = PrefsFindString("jitblacklist");
6177 if (blacklist) {
6178 const char *p = blacklist;
6179 for (;;) {
6180 if (*p == 0)
6181 return true;
6182
6183 int opcode1 = read_opcode(p);
6184 if (opcode1 < 0)
6185 return false;
6186 p += 4;
6187
6188 int opcode2 = opcode1;
6189 if (*p == '-') {
6190 p++;
6191 opcode2 = read_opcode(p);
6192 if (opcode2 < 0)
6193 return false;
6194 p += 4;
6195 }
6196
6197 if (*p == 0 || *p == ',' || *p == ';') {
6198 write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6199 for (int opcode = opcode1; opcode <= opcode2; opcode++)
6200 reset_compop(cft_map(opcode));
6201
6202 if (*p == ',' || *p++ == ';')
6203 continue;
6204
6205 return true;
6206 }
6207
6208 return false;
6209 }
6210 }
6211 return true;
6212 }
6213
6214 void build_comp(void)
6215 {
6216 int i;
6217 int jumpcount=0;
6218 unsigned long opcode;
6219 struct comptbl* tbl=op_smalltbl_0_comp_ff;
6220 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6221 int count;
6222 int cpu_level = 0; // 68000 (default)
6223 if (CPUType == 4)
6224 cpu_level = 4; // 68040 with FPU
6225 else {
6226 if (FPUType)
6227 cpu_level = 3; // 68020 with FPU
6228 else if (CPUType >= 2)
6229 cpu_level = 2; // 68020
6230 else if (CPUType == 1)
6231 cpu_level = 1;
6232 }
6233 struct cputbl *nfctbl = (
6234 cpu_level == 4 ? op_smalltbl_0_nf
6235 : cpu_level == 3 ? op_smalltbl_1_nf
6236 : cpu_level == 2 ? op_smalltbl_2_nf
6237 : cpu_level == 1 ? op_smalltbl_3_nf
6238 : op_smalltbl_4_nf);
6239
6240 write_log ("<JIT compiler> : building compiler function tables\n");
6241
6242 for (opcode = 0; opcode < 65536; opcode++) {
6243 reset_compop(opcode);
6244 nfcpufunctbl[opcode] = op_illg_1;
6245 prop[opcode].use_flags = 0x1f;
6246 prop[opcode].set_flags = 0x1f;
6247 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6248 }
6249
6250 for (i = 0; tbl[i].opcode < 65536; i++) {
6251 int cflow = table68k[tbl[i].opcode].cflow;
6252 if (follow_const_jumps && (tbl[i].specific & 16))
6253 cflow = fl_const_jump;
6254 else
6255 cflow &= ~fl_const_jump;
6256 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6257
6258 int uses_fpu = tbl[i].specific & 32;
6259 if (uses_fpu && avoid_fpu)
6260 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6261 else
6262 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6263 }
6264
6265 for (i = 0; nftbl[i].opcode < 65536; i++) {
6266 int uses_fpu = tbl[i].specific & 32;
6267 if (uses_fpu && avoid_fpu)
6268 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6269 else
6270 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6271
6272 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6273 }
6274
6275 for (i = 0; nfctbl[i].handler; i++) {
6276 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6277 }
6278
6279 for (opcode = 0; opcode < 65536; opcode++) {
6280 compop_func *f;
6281 compop_func *nff;
6282 cpuop_func *nfcf;
6283 int isaddx,cflow;
6284
6285 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6286 continue;
6287
6288 if (table68k[opcode].handler != -1) {
6289 f = compfunctbl[cft_map(table68k[opcode].handler)];
6290 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6291 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6292 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6293 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6294 prop[cft_map(opcode)].cflow = cflow;
6295 prop[cft_map(opcode)].is_addx = isaddx;
6296 compfunctbl[cft_map(opcode)] = f;
6297 nfcompfunctbl[cft_map(opcode)] = nff;
6298 Dif (nfcf == op_illg_1)
6299 abort();
6300 nfcpufunctbl[cft_map(opcode)] = nfcf;
6301 }
6302 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6303 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6304 /* Unconditional jumps don't evaluate condition codes, so they
6305 * don't actually use any flags themselves */
6306 if (prop[cft_map(opcode)].cflow & fl_const_jump)
6307 prop[cft_map(opcode)].use_flags = 0;
6308 }
6309 for (i = 0; nfctbl[i].handler != NULL; i++) {
6310 if (nfctbl[i].specific)
6311 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6312 }
6313
6314 /* Merge in blacklist */
6315 if (!merge_blacklist())
6316 write_log("<JIT compiler> : blacklist merge failure!\n");
6317
6318 count=0;
6319 for (opcode = 0; opcode < 65536; opcode++) {
6320 if (compfunctbl[cft_map(opcode)])
6321 count++;
6322 }
6323 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6324
6325 /* Initialise state */
6326 create_popalls();
6327 alloc_cache();
6328 reset_lists();
6329
6330 for (i=0;i<TAGSIZE;i+=2) {
6331 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6332 cache_tags[i+1].bi=NULL;
6333 }
6334
6335 #if 0
6336 for (i=0;i<N_REGS;i++) {
6337 empty_ss.nat[i].holds=-1;
6338 empty_ss.nat[i].validsize=0;
6339 empty_ss.nat[i].dirtysize=0;
6340 }
6341 #endif
6342 for (i=0;i<VREGS;i++) {
6343 empty_ss.virt[i]=L_NEEDED;
6344 }
6345 for (i=0;i<N_REGS;i++) {
6346 empty_ss.nat[i]=L_UNKNOWN;
6347 }
6348 default_ss=empty_ss;
6349 }
6350
6351
6352 static void flush_icache_none(int n)
6353 {
6354 /* Nothing to do. */
6355 }
6356
6357 static void flush_icache_hard(int n)
6358 {
6359 uae_u32 i;
6360 blockinfo* bi, *dbi;
6361
6362 hard_flush_count++;
6363 #if 0
6364 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6365 n,regs.pc,regs.pc_p,current_cache_size/1024);
6366 current_cache_size = 0;
6367 #endif
6368 bi=active;
6369 while(bi) {
6370 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6371 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6372 dbi=bi; bi=bi->next;
6373 free_blockinfo(dbi);
6374 }
6375 bi=dormant;
6376 while(bi) {
6377 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6378 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6379 dbi=bi; bi=bi->next;
6380 free_blockinfo(dbi);
6381 }
6382
6383 reset_lists();
6384 if (!compiled_code)
6385 return;
6386 current_compile_p=compiled_code;
6387 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6388 }
6389
6390
6391 /* "Soft flushing" --- instead of actually throwing everything away,
6392 we simply mark everything as "needs to be checked".
6393 */
6394
6395 static inline void flush_icache_lazy(int n)
6396 {
6397 uae_u32 i;
6398 blockinfo* bi;
6399 blockinfo* bi2;
6400
6401 soft_flush_count++;
6402 if (!active)
6403 return;
6404
6405 bi=active;
6406 while (bi) {
6407 uae_u32 cl=cacheline(bi->pc_p);
6408 if (bi->status==BI_INVALID ||
6409 bi->status==BI_NEED_RECOMP) {
6410 if (bi==cache_tags[cl+1].bi)
6411 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6412 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6413 set_dhtu(bi,bi->direct_pen);
6414 bi->status=BI_INVALID;
6415 }
6416 else {
6417 if (bi==cache_tags[cl+1].bi)
6418 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6419 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6420 set_dhtu(bi,bi->direct_pcc);
6421 bi->status=BI_NEED_CHECK;
6422 }
6423 bi2=bi;
6424 bi=bi->next;
6425 }
6426 /* bi2 is now the last entry in the active list */
6427 bi2->next=dormant;
6428 if (dormant)
6429 dormant->prev_p=&(bi2->next);
6430
6431 dormant=active;
6432 active->prev_p=&dormant;
6433 active=NULL;
6434 }
6435
6436 void flush_icache_range(uae_u8 *start_p, uae_u32 length)
6437 {
6438 if (!active)
6439 return;
6440
6441 #if LAZY_FLUSH_ICACHE_RANGE
6442 blockinfo *bi = active;
6443 while (bi) {
6444 #if USE_CHECKSUM_INFO
6445 bool candidate = false;
6446 for (checksum_info *csi = bi->csi; csi; csi = csi->next) {
6447 if (((start_p - csi->start_p) < csi->length) ||
6448 ((csi->start_p - start_p) < length)) {
6449 candidate = true;
6450 break;
6451 }
6452 }
6453 #else
6454 // Assume system is consistent and would invalidate the right range
6455 const bool candidate = (bi->pc_p - start_p) < length;
6456 #endif
6457 blockinfo *dbi = bi;
6458 bi = bi->next;
6459 if (candidate) {
6460 uae_u32 cl = cacheline(dbi->pc_p);
6461 if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) {
6462 if (dbi == cache_tags[cl+1].bi)
6463 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6464 dbi->handler_to_use = (cpuop_func *)popall_execute_normal;
6465 set_dhtu(dbi, dbi->direct_pen);
6466 dbi->status = BI_INVALID;
6467 }
6468 else {
6469 if (dbi == cache_tags[cl+1].bi)
6470 cache_tags[cl].handler = (cpuop_func *)popall_check_checksum;
6471 dbi->handler_to_use = (cpuop_func *)popall_check_checksum;
6472 set_dhtu(dbi, dbi->direct_pcc);
6473 dbi->status = BI_NEED_CHECK;
6474 }
6475 remove_from_list(dbi);
6476 add_to_dormant(dbi);
6477 }
6478 }
6479 return;
6480 #endif
6481 flush_icache(-1);
6482 }
6483
6484 static void catastrophe(void)
6485 {
6486 abort();
6487 }
6488
6489 int failure;
6490
6491 #define TARGET_M68K 0
6492 #define TARGET_POWERPC 1
6493 #define TARGET_X86 2
6494 #define TARGET_X86_64 3
6495 #if defined(i386) || defined(__i386__)
6496 #define TARGET_NATIVE TARGET_X86
6497 #endif
6498 #if defined(powerpc) || defined(__powerpc__)
6499 #define TARGET_NATIVE TARGET_POWERPC
6500 #endif
6501 #if defined(x86_64) || defined(__x86_64__)
6502 #define TARGET_NATIVE TARGET_X86_64
6503 #endif
6504
6505 #ifdef ENABLE_MON
6506 static uae_u32 mon_read_byte_jit(uintptr addr)
6507 {
6508 uae_u8 *m = (uae_u8 *)addr;
6509 return (uintptr)(*m);
6510 }
6511
6512 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6513 {
6514 uae_u8 *m = (uae_u8 *)addr;
6515 *m = b;
6516 }
6517 #endif
6518
6519 void disasm_block(int target, uint8 * start, size_t length)
6520 {
6521 if (!JITDebug)
6522 return;
6523
6524 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6525 char disasm_str[200];
6526 sprintf(disasm_str, "%s $%x $%x",
6527 target == TARGET_M68K ? "d68" :
6528 target == TARGET_X86 ? "d86" :
6529 target == TARGET_X86_64 ? "d8664" :
6530 target == TARGET_POWERPC ? "d" : "x",
6531 start, start + length - 1);
6532
6533 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6534 void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6535
6536 mon_read_byte = mon_read_byte_jit;
6537 mon_write_byte = mon_write_byte_jit;
6538
6539 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6540 mon(4, arg);
6541
6542 mon_read_byte = old_mon_read_byte;
6543 mon_write_byte = old_mon_write_byte;
6544 #endif
6545 }
6546
6547 static void disasm_native_block(uint8 *start, size_t length)
6548 {
6549 disasm_block(TARGET_NATIVE, start, length);
6550 }
6551
6552 static void disasm_m68k_block(uint8 *start, size_t length)
6553 {
6554 disasm_block(TARGET_M68K, start, length);
6555 }
6556
6557 #ifdef HAVE_GET_WORD_UNSWAPPED
6558 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6559 #else
6560 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6561 #endif
6562
6563 #if JIT_DEBUG
6564 static uae_u8 *last_regs_pc_p = 0;
6565 static uae_u8 *last_compiled_block_addr = 0;
6566
6567 void compiler_dumpstate(void)
6568 {
6569 if (!JITDebug)
6570 return;
6571
6572 write_log("### Host addresses\n");
6573 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6574 write_log("PC_P : %p\n", &regs.pc_p);
6575 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6576 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6577 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6578 write_log("\n");
6579
6580 write_log("### M68k processor state\n");
6581 m68k_dumpstate(0);
6582 write_log("\n");
6583
6584 write_log("### Block in Mac address space\n");
6585 write_log("M68K block : %p\n",
6586 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6587 write_log("Native block : %p (%d bytes)\n",
6588 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6589 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6590 write_log("\n");
6591 }
6592 #endif
6593
6594 static void compile_block(cpu_history* pc_hist, int blocklen)
6595 {
6596 if (letit && compiled_code) {
6597 #if PROFILE_COMPILE_TIME
6598 compile_count++;
6599 clock_t start_time = clock();
6600 #endif
6601 #if JIT_DEBUG
6602 bool disasm_block = false;
6603 #endif
6604
6605 /* OK, here we need to 'compile' a block */
6606 int i;
6607 int r;
6608 int was_comp=0;
6609 uae_u8 liveflags[MAXRUN+1];
6610 #if USE_CHECKSUM_INFO
6611 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6612 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6613 uintptr min_pcp=max_pcp;
6614 #else
6615 uintptr max_pcp=(uintptr)pc_hist[0].location;
6616 uintptr min_pcp=max_pcp;
6617 #endif
6618 uae_u32 cl=cacheline(pc_hist[0].location);
6619 void* specflags=(void*)&regs.spcflags;
6620 blockinfo* bi=NULL;
6621 blockinfo* bi2;
6622 int extra_len=0;
6623
6624 redo_current_block=0;
6625 if (current_compile_p>=max_compile_start)
6626 flush_icache_hard(7);
6627
6628 alloc_blockinfos();
6629
6630 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6631 bi2=get_blockinfo(cl);
6632
6633 optlev=bi->optlevel;
6634 if (bi->status!=BI_INVALID) {
6635 Dif (bi!=bi2) {
6636 /* I don't think it can happen anymore. Shouldn't, in
6637 any case. So let's make sure... */
6638 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6639 bi->count,bi->optlevel,bi->handler_to_use,
6640 cache_tags[cl].handler);
6641 abort();
6642 }
6643
6644 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6645 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6646 /* What the heck? We are not supposed to be here! */
6647 abort();
6648 }
6649 }
6650 if (bi->count==-1) {
6651 optlev++;
6652 while (!optcount[optlev])
6653 optlev++;
6654 bi->count=optcount[optlev]-1;
6655 }
6656 current_block_pc_p=(uintptr)pc_hist[0].location;
6657
6658 remove_deps(bi); /* We are about to create new code */
6659 bi->optlevel=optlev;
6660 bi->pc_p=(uae_u8*)pc_hist[0].location;
6661 #if USE_CHECKSUM_INFO
6662 free_checksum_info_chain(bi->csi);
6663 bi->csi = NULL;
6664 #endif
6665
6666 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6667 i=blocklen;
6668 while (i--) {
6669 uae_u16* currpcp=pc_hist[i].location;
6670 uae_u32 op=DO_GET_OPCODE(currpcp);
6671
6672 #if USE_CHECKSUM_INFO
6673 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6674 if (follow_const_jumps && is_const_jump(op)) {
6675 checksum_info *csi = alloc_checksum_info();
6676 csi->start_p = (uae_u8 *)min_pcp;
6677 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6678 csi->next = bi->csi;
6679 bi->csi = csi;
6680 max_pcp = (uintptr)currpcp;
6681 }
6682 min_pcp = (uintptr)currpcp;
6683 #else
6684 if ((uintptr)currpcp<min_pcp)
6685 min_pcp=(uintptr)currpcp;
6686 if ((uintptr)currpcp>max_pcp)
6687 max_pcp=(uintptr)currpcp;
6688 #endif
6689
6690 liveflags[i]=((liveflags[i+1]&
6691 (~prop[op].set_flags))|
6692 prop[op].use_flags);
6693 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6694 liveflags[i]&= ~FLAG_Z;
6695 }
6696
6697 #if USE_CHECKSUM_INFO
6698 checksum_info *csi = alloc_checksum_info();
6699 csi->start_p = (uae_u8 *)min_pcp;
6700 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6701 csi->next = bi->csi;
6702 bi->csi = csi;
6703 #endif
6704
6705 bi->needed_flags=liveflags[0];
6706
6707 align_target(align_loops);
6708 was_comp=0;
6709
6710 bi->direct_handler=(cpuop_func *)get_target();
6711 set_dhtu(bi,bi->direct_handler);
6712 bi->status=BI_COMPILING;
6713 current_block_start_target=(uintptr)get_target();
6714
6715 log_startblock();
6716
6717 if (bi->count>=0) { /* Need to generate countdown code */
6718 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6719 raw_sub_l_mi((uintptr)&(bi->count),1);
6720 raw_jl((uintptr)popall_recompile_block);
6721 }
6722 if (optlev==0) { /* No need to actually translate */
6723 /* Execute normally without keeping stats */
6724 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6725 raw_jmp((uintptr)popall_exec_nostats);
6726 }
6727 else {
6728 reg_alloc_run=0;
6729 next_pc_p=0;
6730 taken_pc_p=0;
6731 branch_cc=0;
6732
6733 comp_pc_p=(uae_u8*)pc_hist[0].location;
6734 init_comp();
6735 was_comp=1;
6736
6737 #ifdef USE_CPU_EMUL_SERVICES
6738 raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6739 raw_jcc_b_oponly(NATIVE_CC_GT);
6740 uae_s8 *branchadd=(uae_s8*)get_target();
6741 emit_byte(0);
6742 raw_call((uintptr)cpu_do_check_ticks);
6743 *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6744 #endif
6745
6746 #if JIT_DEBUG
6747 if (JITDebug) {
6748 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6749 raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6750 }
6751 #endif
6752
6753 for (i=0;i<blocklen &&
6754 get_target_noopt()<max_compile_start;i++) {
6755 cpuop_func **cputbl;
6756 compop_func **comptbl;
6757 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6758 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6759 if (!needed_flags) {
6760 cputbl=nfcpufunctbl;
6761 comptbl=nfcompfunctbl;
6762 }
6763 else {
6764 cputbl=cpufunctbl;
6765 comptbl=compfunctbl;
6766 }
6767
6768 #if FLIGHT_RECORDER
6769 {
6770 mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6771 clobber_flags();
6772 remove_all_offsets();
6773 int arg = readreg_specific(S1,4,REG_PAR1);
6774 prepare_for_call_1();
6775 unlock2(arg);
6776 prepare_for_call_2();
6777 raw_call((uintptr)m68k_record_step);
6778 }
6779 #endif
6780
6781 failure = 1; // gb-- defaults to failure state
6782 if (comptbl[opcode] && optlev>1) {
6783 failure=0;
6784 if (!was_comp) {
6785 comp_pc_p=(uae_u8*)pc_hist[i].location;
6786 init_comp();
6787 }
6788 was_comp=1;
6789
6790 comptbl[opcode](opcode);
6791 freescratch();
6792 if (!(liveflags[i+1] & FLAG_CZNV)) {
6793 /* We can forget about flags */
6794 dont_care_flags();
6795 }
6796 #if INDIVIDUAL_INST
6797 flush(1);
6798 nop();
6799 flush(1);
6800 was_comp=0;
6801 #endif
6802 }
6803
6804 if (failure) {
6805 if (was_comp) {
6806 flush(1);
6807 was_comp=0;
6808 }
6809 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6810 #if USE_NORMAL_CALLING_CONVENTION
6811 raw_push_l_r(REG_PAR1);
6812 #endif
6813 raw_mov_l_mi((uintptr)&regs.pc_p,
6814 (uintptr)pc_hist[i].location);
6815 raw_call((uintptr)cputbl[opcode]);
6816 #if PROFILE_UNTRANSLATED_INSNS
6817 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6818 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6819 #endif
6820 #if USE_NORMAL_CALLING_CONVENTION
6821 raw_inc_sp(4);
6822 #endif
6823
6824 if (i < blocklen - 1) {
6825 uae_s8* branchadd;
6826
6827 raw_mov_l_rm(0,(uintptr)specflags);
6828 raw_test_l_rr(0,0);
6829 raw_jz_b_oponly();
6830 branchadd=(uae_s8 *)get_target();
6831 emit_byte(0);
6832 raw_jmp((uintptr)popall_do_nothing);
6833 *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6834 }
6835 }
6836 }
6837 #if 1 /* This isn't completely kosher yet; It really needs to be
6838 be integrated into a general inter-block-dependency scheme */
6839 if (next_pc_p && taken_pc_p &&
6840 was_comp && taken_pc_p==current_block_pc_p) {
6841 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6842 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6843 uae_u8 x=bi1->needed_flags;
6844
6845 if (x==0xff || 1) { /* To be on the safe side */
6846 uae_u16* next=(uae_u16*)next_pc_p;
6847 uae_u32 op=DO_GET_OPCODE(next);
6848
6849 x=0x1f;
6850 x&=(~prop[op].set_flags);
6851 x|=prop[op].use_flags;
6852 }
6853
6854 x|=bi2->needed_flags;
6855 if (!(x & FLAG_CZNV)) {
6856 /* We can forget about flags */
6857 dont_care_flags();
6858 extra_len+=2; /* The next instruction now is part of this
6859 block */
6860 }
6861
6862 }
6863 #endif
6864 log_flush();
6865
6866 if (next_pc_p) { /* A branch was registered */
6867 uintptr t1=next_pc_p;
6868 uintptr t2=taken_pc_p;
6869 int cc=branch_cc;
6870
6871 uae_u32* branchadd;
6872 uae_u32* tba;
6873 bigstate tmp;
6874 blockinfo* tbi;
6875
6876 if (taken_pc_p<next_pc_p) {
6877 /* backward branch. Optimize for the "taken" case ---
6878 which means the raw_jcc should fall through when
6879 the 68k branch is taken. */
6880 t1=taken_pc_p;
6881 t2=next_pc_p;
6882 cc=branch_cc^1;
6883 }
6884
6885 tmp=live; /* ouch! This is big... */
6886 raw_jcc_l_oponly(cc);
6887 branchadd=(uae_u32*)get_target();
6888 emit_long(0);
6889
6890 /* predicted outcome */
6891 tbi=get_blockinfo_addr_new((void*)t1,1);
6892 match_states(tbi);
6893 raw_cmp_l_mi((uintptr)specflags,0);
6894 raw_jcc_l_oponly(4);
6895 tba=(uae_u32*)get_target();
6896 emit_long(get_handler(t1)-((uintptr)tba+4));
6897 raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6898 flush_reg_count();
6899 raw_jmp((uintptr)popall_do_nothing);
6900 create_jmpdep(bi,0,tba,t1);
6901
6902 align_target(align_jumps);
6903 /* not-predicted outcome */
6904 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6905 live=tmp; /* Ouch again */
6906 tbi=get_blockinfo_addr_new((void*)t2,1);
6907 match_states(tbi);
6908
6909 //flush(1); /* Can only get here if was_comp==1 */
6910 raw_cmp_l_mi((uintptr)specflags,0);
6911 raw_jcc_l_oponly(4);
6912 tba=(uae_u32*)get_target();
6913 emit_long(get_handler(t2)-((uintptr)tba+4));
6914 raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6915 flush_reg_count();
6916 raw_jmp((uintptr)popall_do_nothing);
6917 create_jmpdep(bi,1,tba,t2);
6918 }
6919 else
6920 {
6921 if (was_comp) {
6922 flush(1);
6923 }
6924 flush_reg_count();
6925
6926 /* Let's find out where next_handler is... */
6927 if (was_comp && isinreg(PC_P)) {
6928 r=live.state[PC_P].realreg;
6929 raw_and_l_ri(r,TAGMASK);
6930 int r2 = (r==0) ? 1 : 0;
6931 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6932 raw_cmp_l_mi((uintptr)specflags,0);
6933 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6934 raw_jmp_r(r2);
6935 }
6936 else if (was_comp && isconst(PC_P)) {
6937 uae_u32 v=live.state[PC_P].val;
6938 uae_u32* tba;
6939 blockinfo* tbi;
6940
6941 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6942 match_states(tbi);
6943
6944 raw_cmp_l_mi((uintptr)specflags,0);
6945 raw_jcc_l_oponly(4);
6946 tba=(uae_u32*)get_target();
6947 emit_long(get_handler(v)-((uintptr)tba+4));
6948 raw_mov_l_mi((uintptr)&regs.pc_p,v);
6949 raw_jmp((uintptr)popall_do_nothing);
6950 create_jmpdep(bi,0,tba,v);
6951 }
6952 else {
6953 r=REG_PC_TMP;
6954 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6955 raw_and_l_ri(r,TAGMASK);
6956 int r2 = (r==0) ? 1 : 0;
6957 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6958 raw_cmp_l_mi((uintptr)specflags,0);
6959 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6960 raw_jmp_r(r2);
6961 }
6962 }
6963 }
6964
6965 #if USE_MATCH
6966 if (callers_need_recompile(&live,&(bi->env))) {
6967 mark_callers_recompile(bi);
6968 }
6969
6970 big_to_small_state(&live,&(bi->env));
6971 #endif
6972
6973 #if USE_CHECKSUM_INFO
6974 remove_from_list(bi);
6975 if (trace_in_rom) {
6976 // No need to checksum that block trace on cache invalidation
6977 free_checksum_info_chain(bi->csi);
6978 bi->csi = NULL;
6979 add_to_dormant(bi);
6980 }
6981 else {
6982 calc_checksum(bi,&(bi->c1),&(bi->c2));
6983 add_to_active(bi);
6984 }
6985 #else
6986 if (next_pc_p+extra_len>=max_pcp &&
6987 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6988 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6989 else
6990 max_pcp+=LONGEST_68K_INST;
6991
6992 bi->len=max_pcp-min_pcp;
6993 bi->min_pcp=min_pcp;
6994
6995 remove_from_list(bi);
6996 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6997 add_to_dormant(bi); /* No need to checksum it on cache flush.
6998 Please don't start changing ROMs in
6999 flight! */
7000 }
7001 else {
7002 calc_checksum(bi,&(bi->c1),&(bi->c2));
7003 add_to_active(bi);
7004 }
7005 #endif
7006
7007 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
7008
7009 #if JIT_DEBUG
7010 if (JITDebug)
7011 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
7012
7013 if (JITDebug && disasm_block) {
7014 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
7015 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
7016 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
7017 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
7018 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
7019 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
7020 getchar();
7021 }
7022 #endif
7023
7024 log_dump();
7025 align_target(align_jumps);
7026
7027 /* This is the non-direct handler */
7028 bi->handler=
7029 bi->handler_to_use=(cpuop_func *)get_target();
7030 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7031 raw_jnz((uintptr)popall_cache_miss);
7032 comp_pc_p=(uae_u8*)pc_hist[0].location;
7033
7034 bi->status=BI_FINALIZING;
7035 init_comp();
7036 match_states(bi);
7037 flush(1);
7038
7039 raw_jmp((uintptr)bi->direct_handler);
7040
7041 current_compile_p=get_target();
7042 raise_in_cl_list(bi);
7043
7044 /* We will flush soon, anyway, so let's do it now */
7045 if (current_compile_p>=max_compile_start)
7046 flush_icache_hard(7);
7047
7048 bi->status=BI_ACTIVE;
7049 if (redo_current_block)
7050 block_need_recompile(bi);
7051
7052 #if PROFILE_COMPILE_TIME
7053 compile_time += (clock() - start_time);
7054 #endif
7055 }
7056
7057 /* Account for compilation time */
7058 cpu_do_check_ticks();
7059 }
7060
7061 void do_nothing(void)
7062 {
7063 /* What did you expect this to do? */
7064 }
7065
7066 void exec_nostats(void)
7067 {
7068 for (;;) {
7069 uae_u32 opcode = GET_OPCODE;
7070 #if FLIGHT_RECORDER
7071 m68k_record_step(m68k_getpc());
7072 #endif
7073 (*cpufunctbl[opcode])(opcode);
7074 cpu_check_ticks();
7075 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7076 return; /* We will deal with the spcflags in the caller */
7077 }
7078 }
7079 }
7080
7081 void execute_normal(void)
7082 {
7083 if (!check_for_cache_miss()) {
7084 cpu_history pc_hist[MAXRUN];
7085 int blocklen = 0;
7086 #if REAL_ADDRESSING || DIRECT_ADDRESSING
7087 start_pc_p = regs.pc_p;
7088 start_pc = get_virtual_address(regs.pc_p);
7089 #else
7090 start_pc_p = regs.pc_oldp;
7091 start_pc = regs.pc;
7092 #endif
7093 for (;;) { /* Take note: This is the do-it-normal loop */
7094 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7095 uae_u32 opcode = GET_OPCODE;
7096 #if FLIGHT_RECORDER
7097 m68k_record_step(m68k_getpc());
7098 #endif
7099 (*cpufunctbl[opcode])(opcode);
7100 cpu_check_ticks();
7101 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7102 compile_block(pc_hist, blocklen);
7103 return; /* We will deal with the spcflags in the caller */
7104 }
7105 /* No need to check regs.spcflags, because if they were set,
7106 we'd have ended up inside that "if" */
7107 }
7108 }
7109 }
7110
7111 typedef void (*compiled_handler)(void);
7112
7113 static void m68k_do_compile_execute(void)
7114 {
7115 for (;;) {
7116 ((compiled_handler)(pushall_call_handler))();
7117 /* Whenever we return from that, we should check spcflags */
7118 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7119 if (m68k_do_specialties ())
7120 return;
7121 }
7122 }
7123 }
7124
7125 void m68k_compile_execute (void)
7126 {
7127 for (;;) {
7128 if (quit_program)
7129 break;
7130 m68k_do_compile_execute();
7131 }
7132 }