"Fossies" - the Fresh Open Source Software Archive

Member "radare2-4.0.0/libr/asm/arch/8051/8051_ass.c" (28 Oct 2019, 32097 Bytes) of package /linux/privat/radare2-4.0.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "8051_ass.c" see the Fossies "Dox" file reference documentation.

    1 /* radare - LGPL - Copyright 2009-2019 - hmht */
    2 #include"8051_ass.h"
    3 
    4 /*****************************************************************************\
    5  *              Architecture
    6  *
    7  * File Contents:
    8  * ## Section 1. Token parsers
    9  * ## Section 2: some weird datastructure
   10  * ## Section 3: token classifiers
   11  * ## Section 4: Generic instruction emitters
   12  * ## Section 5: Specific instruction parsing
   13  * ## Section 6: mnemonic token dispatcher
   14  * ## Section 7: radare2 glue and mnemonic tokenization
   15  *
   16  * documentation date: 2019-10-04
   17  * documentation date: 2019-10-14
   18  *
   19  * 1. Token parsers
   20  *
   21  * I'm sure most of this is re-inventing the wheel, (poorly, too), this is
   22  * because I didn't take enough time to find a proper implementation.
   23  * If you know a r2lib function that does the job it should be used instead.
   24  *
   25  *
   26  * 2. Some weird datastructure
   27  *
   28  * Started out for matching strings whitespace-independently, and uses c99s
   29  * (struct literal){} notation and is zero-terminated.
   30  * I wrote this thing in the late hours of r2con2019 while jetlagged.
   31  *
   32  * Currently the last place it's used in is mnemonic matching, since I hacked
   33  * in a nr-of-arguments field into the table. Whitespace-independence is
   34  * currently a bug since it'll accept "n o p" as "nop"... also functions need
   35  * to be renamed.
   36  *
   37  * One pitfall is that the match is lazy (non-greedy?) in other words, "reti"
   38  * is matched by "ret", but not the other way around, so the most-specific match
   39  * must come first in the list.
   40  *
   41  *
   42  * 3. token classifiers
   43  *
   44  * right now mostly has functions to distinguish between argument types, and
   45  * parses their data. (Some argument types, such as relative addresses and bits,
   46  * require parsing to asses their validity.)
   47  *
   48  *
   49  * 4. Generic instruction parsing
   50  *
   51  * I started out just writing specific parser for each
   52  * mnemonic(-variant), and halfway through I started noticing a lot of
   53  * code duplication, so extracted some of it.
   54  *  Their basic operation is simple: dump whatever you're given into the out
   55  * parameter, and move the write pointer forward.
   56  *
   57  *
   58  * 5. Specific instruction parsing
   59  *
   60  * Of course, in the very beginning I started out with the idea to
   61  * completely generalize everything, but there were more edge cases than
   62  * my small brain could handle, so I scrapped that and started punching
   63  * out special parsers for each instruction variant mindlessly.
   64  *  The result of this approach is really glaring. Lots of duplication.
   65  * There's lots of easy deduplication opportunity, and now that it's finished I
   66  * have some ideas on how to do it better, but eh.
   67  *
   68  *
   69  * 6. mnemonic token dispatcher
   70  *
   71  * The weird datastructure returns! with macros! it's basically just a jump
   72  * table with one bit of validation.
   73  *
   74  *
   75  * 7. Radare2 glue and mnemonic tokenization
   76  *
   77  * Had one look at the gb glue code and copied the lot of it without really
   78  * understanding what I'm doing.
   79  *
   80  * also splits out the first word (asserted mnemonic) for the token dispatcher,
   81  * and splits up the arguments
   82  *
   83 \*****************************************************************************/
   84 #include<r_util.h>
   85 #include<string.h>
   86 
   87 /******************************************************************************
   88  * ## Section 1. Generic Token parsers
   89  *               ------- -------------*/
   90 
   91 static bool parse_hexadecimal(char const* hexstr, ut16* out) {
   92     if ( !hexstr || hexstr[0] != '0'
   93         || !(hexstr[1] == 'x' || hexstr[1] == 'X')) {
   94         return false;
   95     }
   96     *out = 0;
   97     char const*p;
   98     for (p = hexstr + 2; p < hexstr + 6 && *p ; p += 1) {
   99         *out <<= 4;
  100         if ( '0' <= *p && *p <= '9' ) {
  101             *out |= *p - '0';
  102         } else if ( 'a' <= *p && *p <= 'f' ) {
  103             *out |= *p - 'a' + 10;
  104         } else if ( 'A' <= *p && *p <= 'F' ) {
  105             *out |= *p - 'A' + 10;
  106         } else {
  107             return false;
  108         }
  109     }
  110     return ! *p;
  111 }
  112 
  113 // FIXME: may write outside buffer
  114 /**
  115  * splits up the given string into multiple chucks, separated by unquoted
  116  * commas. It will then copy chunk n-1 into dest, with the leading and trailing
  117  * whitespace stripped.
  118  *
  119  * if chunk n-1 does not exist or is empty, it will return false;
  120  *
  121  * only text before newlines, NUL, and unquoted semicolons is chunked.
  122  *
  123  * any text after a single-quote and before the next single-quote is considered
  124  * quoted. There is no escaping.
  125  */
  126 static bool get_arg(char const*multi, int n, char * dest)
  127 {
  128     char* lastnonws = dest;
  129     bool anynonws = false;
  130     bool in_string = false;
  131     n -= 1;
  132     if (!multi) return false;
  133     while (n && *multi && *multi != '\n' && *multi != '\r') {
  134         if (*multi == '\'') in_string = !in_string;
  135         if (!in_string) {
  136             if (*multi == ';') {
  137                 return false;
  138             }
  139             if (*multi == ',') {
  140                 multi += 1;
  141                 n -= 1;
  142                 continue;
  143             }
  144         }
  145         multi += 1;
  146     }
  147     if (!*multi || *multi == '\n' || *multi == '\r' || in_string) {
  148         return false;
  149     }
  150 
  151     while (*multi && (*multi == ' ' || *multi == '\t')) {
  152         multi += 1;
  153     }
  154 
  155     while (*multi && *multi != '\n' && *multi != '\r') {
  156         if (*multi == '\'') in_string = !in_string;
  157         if (!in_string) {
  158             if (*multi == ';' ||  *multi == ',') {
  159                 break;
  160             }
  161             if (*multi != ' ' && *multi != '\t') {
  162                 lastnonws = dest;
  163                 anynonws = true;
  164             }
  165             *dest = *multi;
  166             dest += 1;
  167             multi += 1;
  168         }
  169     }
  170 
  171     if (in_string) return false;
  172 
  173     if (!anynonws) {
  174         *dest = '\0';
  175         return false;
  176     }
  177     *(lastnonws + 1) = '\0';
  178     return true;
  179 }
  180 
  181 /**
  182  * tokenizes the argument list
  183  * arg parameter must be 3 char pointers wide.
  184  * TODO: merge with get_arg, as this is now the only user
  185  */
  186 static int get_arguments (char**arg, char const*arguments) {
  187     size_t arglen = strlen (arguments) + 1;
  188     char*tmp = malloc (arglen);
  189     if (!get_arg (arguments, 1, tmp)) {
  190         free (tmp); tmp = 0;
  191         return 0;
  192     } else {
  193         arg[0] = realloc (tmp, strlen (tmp) + 1); tmp = 0;
  194         tmp = malloc (arglen);
  195         if (!get_arg (arguments, 2, tmp)) {
  196             free (tmp); tmp = 0;
  197             return 1;
  198         } else {
  199             arg[1] = realloc (tmp, strlen (tmp) + 1); tmp = 0;
  200             tmp = malloc (arglen + 1);
  201             if (!get_arg (arguments, 3, tmp)) {
  202                 free (tmp); tmp = 0;
  203                 return 2;
  204             } else {
  205                 arg[2] = realloc (tmp, strlen (tmp) + 1); tmp = 0;
  206                 tmp = malloc (arglen + 1);
  207                 if (get_arg (arguments, 4, tmp)) {
  208                     free (tmp); tmp = 0;
  209                     free (arg[0]); arg[0] = 0;
  210                     free (arg[1]); arg[1] = 0;
  211                     free (arg[2]); arg[2] = 0;
  212                     return 4;
  213                 }
  214                 free (tmp); tmp = 0;
  215                 return 3;
  216             }
  217         }
  218     }
  219 }
  220 
  221 /**
  222  * returns true if there is no more valid assembly code after this character
  223  */
  224 static bool terminates_asm_line(char c) {
  225     return c == '\0' || c == '\n' || c == '\r' || c == ';' ;
  226 }
  227 
  228 /**
  229  * Like r_str_casecmp, but ignores all isspace characters
  230  */
  231 static int str_iwhitecasecmp(char const*a, char const*b) {
  232     if (!a && !b) {
  233         return *a - *b;
  234     }
  235     while (a && b) {
  236         if (!*a && !*b) {
  237             break;
  238         }
  239         if (!*a || !*b) {
  240             break;
  241         }
  242         if (isspace (*a)) {
  243             a += 1;
  244             continue;
  245         }
  246         if (isspace (*b)) {
  247             b += 1;
  248             continue;
  249         }
  250         if (tolower (*a) == tolower (*b)) {
  251             a += 1;
  252             b += 1;
  253             continue;
  254         }
  255         break;
  256     }
  257     return *a - *b;
  258 }
  259 
  260 /******************************************************************************
  261  * ## Section 2: some weird datastructure
  262                  ------------------------*/
  263 
  264 typedef bool (*parse_mnem_args)(char const*const*, ut16, ut8**);
  265 
  266 typedef struct {
  267     char const*const pattern;
  268     parse_mnem_args res;
  269     int args;
  270 } ftable[];
  271 
  272 static bool pattern_match(char const*str, char const*pattern) {
  273     int si = 0;
  274     int ti = 0;
  275     if (!pattern) {
  276         return true;
  277     }
  278 
  279     while (pattern[ti] != '\0') {
  280         while (isspace (str[si]) && !isspace (pattern[ti])) {
  281             si += 1;
  282         }
  283         if (isspace (pattern[ti])) {
  284             ti += 1;
  285             continue;
  286         }
  287         if (tolower (pattern[ti]) == tolower (str[si])) {
  288             si += 1;
  289             ti += 1;
  290         }
  291         else {
  292             return false;
  293         }
  294     }
  295     return true;
  296 }
  297 
  298 static parse_mnem_args match_prefix_f(int*args, char const*str, ftable const tbl) {
  299     int row = 0;
  300     while (tbl[row].pattern) {
  301         if (pattern_match (str, tbl[row].pattern)) {
  302             *args = tbl[row].args;
  303             return tbl[row].res;
  304         }
  305         else {
  306             row += 1;
  307         }
  308     }
  309     *args = tbl[row].args;
  310     return tbl[row].res;
  311 }
  312 
  313 /******************************************************************************
  314  * ## Section 3: token classifiers
  315                  -----------------*/
  316 
  317 /**
  318  * matches registers r0 and r1 when they are indirectly-addressed.
  319  * 8051-style syntax @r0, but also r2 defacto [r0]
  320  */
  321 static bool is_indirect_reg(char const*str)
  322 {
  323     if ( !str) {
  324         return false;
  325     }
  326 
  327     if (str[0] == '@' ) {
  328         return r_str_ansi_nlen (str, 4) == 3
  329             && tolower (str[1]) == 'r'
  330             && (str[2] == '0' || str[2] == '1');
  331     }
  332 
  333     if (str[0] == '[' ) {
  334         return r_str_ansi_nlen (str, 5) == 4
  335             && tolower (str[1]) == 'r'
  336             && (str[2] == '0' || str[2] == '1')
  337             && str[3] == ']';
  338     }
  339 
  340     return false;
  341 }
  342 
  343 /**
  344  * returns true if the given string denotes an 'r'-register
  345  */
  346 static bool is_reg(char const*str)
  347 {
  348     return str && tolower (str[0]) == 'r' && r_str_ansi_nlen (str, 3) == 2
  349         && '0' <= str[1] && str[1] <= '7';
  350 }
  351 
  352 /**
  353  * returns true if the given number is a valid relative address from the given
  354  *  pc, the relative address is stored in the *out parameter.
  355  */
  356 static bool relative_address(ut16 pc, ut16 address, ut8 *out)
  357 {
  358     st16 diff = address - (pc + 2);
  359     if (diff < INT8_MIN || INT8_MAX < diff) {
  360         return false;
  361     }
  362     else {
  363         *out = diff;
  364         return true;
  365     }
  366 }
  367 
  368 static bool resolve_immediate(char const* imm_str, ut16* imm_out) {
  369     // rasm2 resolves symbols, so does this really only need to parse hex?
  370     // maybe TODO: skip leading '#' if exists?
  371     return parse_hexadecimal (imm_str, imm_out);
  372 }
  373 
  374 static bool to_address(char const* addr_str, ut16* addr_out) {
  375     // rasm2 resolves symbols, so does this really only need to parse hex?
  376     // maybe TODO: check address bounds?
  377     return parse_hexadecimal (addr_str, addr_out);
  378 }
  379 
  380 /**
  381  * attempts to parse the given string as an 8bit-wide address
  382  */
  383 static bool address_direct(char const* addr_str, ut8* addr_out) {
  384     ut16 addr_big;
  385     // rasm2 resolves symbols, so does this really only need to parse hex?
  386     // maybe TODO: check address bounds?
  387     if ( !parse_hexadecimal (addr_str, &addr_big)
  388         || (0xFF < addr_big)) {
  389         return false;
  390     }
  391     *addr_out = addr_big;
  392     return true;
  393 }
  394 
  395 /**
  396  * attempts to parse the given string as a bit-address
  397  */
  398 static bool address_bit(char const* addr_str, ut8* addr_out) {
  399     char *bitpart = malloc (strlen (addr_str) + 1);
  400     char *bytepart = malloc (strlen (addr_str) + 1);
  401     char const *separator = r_str_lchr (addr_str, '.');
  402     ut8 byte;
  403     int bit;
  404     bool ret = false;
  405     // TODO: check if symbols are resolved properly in all cases:
  406     // - symbol.2
  407     // - 0x25.symbol
  408     // - symbol.symbol
  409     // - symbol
  410     if (!separator) {
  411         goto end;
  412     }
  413     r_str_ncpy (bytepart, addr_str, separator - addr_str + 1);
  414     bytepart[separator - addr_str + 1] = '\0';
  415     r_str_ncpy (bitpart, separator + 1, strlen (separator));
  416     if (!address_direct (bytepart, &byte)) {
  417         goto end;
  418     }
  419     if (1 < strlen (bitpart)
  420         || bitpart[0] < '0' || '7' < bitpart[0]) {
  421         ret = false;
  422         goto end;
  423     }
  424     bit = bitpart[0] - '0';
  425     if (0x20 <= byte && byte < 0x30) {
  426         *addr_out = (byte - 0x20) * 8 + bit;
  427         ret = true;
  428     } else if (0x80 <= byte && !(byte%8)) {
  429         *addr_out = byte + bit;
  430         ret = true;
  431     }
  432 end:
  433     free (bitpart); bitpart = 0;
  434     free (bytepart); bytepart = 0;
  435     return ret;
  436 }
  437 
  438 /**
  439  * figures out which register is denoted by the given string
  440  * returns 8 if invalid
  441  */
  442 static int register_number(char const*reg) {
  443     if (is_reg (reg)) {
  444         return reg[1] - '0';
  445     }
  446     if (is_indirect_reg (reg)) {
  447         return reg[2] - '0';
  448     }
  449     return 8; // not register 0-7, so...
  450 }
  451 
  452 /******************************************************************************
  453  * ## Section 4: Generic instruction emmiters
  454                  ----------------------------*/
  455 
  456 static bool single_byte_instr(ut8 const instr, ut8 **out) {
  457     (*out)[0] = instr;
  458     *out += 1;
  459     return true;
  460 }
  461 
  462 static bool singlearg_bit(ut8 const firstbyte, char const* arg, ut8 **out) {
  463     ut8 address;
  464     if (!address_bit (arg, &address)) {
  465         return false;
  466     }
  467     (*out)[0] = firstbyte;
  468     (*out)[1] = address;
  469     *out += 2;
  470     return true;
  471 }
  472 
  473 static bool singlearg_reladdr(ut8 const firstbyte, char const* arg
  474     , ut16 const pc, ut8 **out)
  475 {
  476     ut16 address;
  477     if (!to_address (arg, &address)
  478         || !relative_address (pc, address, (*out)+1)) {
  479         return false;
  480     }
  481     (*out)[0] = firstbyte;
  482     *out += 2;
  483     return true;
  484 }
  485 
  486 static bool singlearg_direct(ut8 const firstbyte, char const* arg
  487     , ut8 **out)
  488 {
  489     ut8 address;
  490     if (!address_direct (arg, &address)) {
  491         return false;
  492     }
  493     (*out)[0] = firstbyte;
  494     (*out)[1] = address;
  495     *out += 2;
  496     return true;
  497 }
  498 
  499 static bool singlearg_immediate(ut8 firstbyte, char const* imm_str, ut8**out) {
  500     ut16 imm;
  501     if (imm_str[0] != '#'
  502         || !resolve_immediate (imm_str + 1, &imm)) {
  503         return false;
  504     }
  505     (*out)[0] = firstbyte;
  506     (*out)[1] = imm & 0x00FF;
  507     *out += 2;
  508     return true;
  509 }
  510 
  511 static bool singlearg_register(ut8 firstbyte, char const*reg, ut8**out) {
  512     return single_byte_instr (firstbyte | register_number (reg), out);
  513 }
  514 
  515 static bool single_a_arg_instr(ut8 const firstbyte, char const*arg
  516     , ut8 **out)
  517 {
  518     if (r_str_casecmp ("a", arg)) {
  519         return false;
  520     }
  521     return single_byte_instr (firstbyte, out);
  522 }
  523 
  524 /******************************************************************************
  525  * ## Section 5: Specific instruction parsing
  526                  ----------------------------*/
  527 
  528 static bool mnem_acall(char const*const*arg, ut16 pc, ut8**out) {
  529     ut16 address;
  530     if (!to_address (arg[0], &address)) {
  531         return false;
  532     }
  533     (*out)[0] = ((address & 0x0700) >> 3) | 0x11;
  534     (*out)[1] = address & 0x00FF;
  535     *out += 2;
  536     return true;
  537 }
  538 
  539 static bool mnem_add(char const*const*arg, ut16 pc, ut8**out) {
  540     if (r_str_casecmp (arg[0], "a")) {
  541         return false;
  542     }
  543     switch (arg[1][0]) {
  544     break; case '@': case '[':
  545         return singlearg_register (0x26, arg[1], out);
  546     break; case '#':
  547         return singlearg_immediate (0x24, arg[1], out);
  548     }
  549     if (is_reg (arg[1])) {
  550         return singlearg_register (0x28, arg[1], out);
  551     } else {
  552         return singlearg_direct (0x25, arg[1], out);
  553     }
  554 }
  555 
  556 static bool mnem_addc(char const*const*arg, ut16 pc, ut8**out) {
  557     if (r_str_casecmp (arg[0], "a")) {
  558         return false;
  559     }
  560     if (is_indirect_reg (arg[1])) {
  561         return singlearg_register (0x36, arg[1], out);
  562     }
  563     if (arg[1][0] == '#') {
  564         return singlearg_immediate (0x34, arg[1], out);
  565     }
  566     if (is_reg (arg[1])) {
  567         return singlearg_register (0x38, arg[1], out);
  568     }
  569     return singlearg_direct (0x35, arg[1], out);
  570 }
  571 
  572 static bool mnem_ajmp(char const*const*arg, ut16 pc, ut8**out) {
  573     ut16 address;
  574     if (!to_address (arg[0], &address)) {
  575         return false;
  576     }
  577     (*out)[0] = ((address & 0x0700) >> 3 ) | 0x01;
  578     (*out)[1] = address & 0x00FF;
  579     *out += 2;
  580     return true;
  581 }
  582 
  583 static bool mnem_anl(char const*const*arg, ut16 pc, ut8**out) {
  584     if (!strcmp (arg[0], "c")) {
  585         if (arg[1][0] == '/') {
  586             return singlearg_bit (0xb0, arg[1] + 1, out);
  587         }
  588         return singlearg_bit (0x82, arg[1], out);
  589     }
  590     if (!strcmp (arg[0], "a")) {
  591         if (is_indirect_reg (arg[1])) {
  592             return singlearg_register (0x56, arg[1], out);
  593         }
  594         if (arg[1][0] == '#') {
  595             return singlearg_immediate (0x54, arg[1], out);
  596         }
  597         if (is_reg (arg[1])) {
  598             return singlearg_register (0x58, arg[1], out);
  599         }
  600         return singlearg_direct (0x55, arg[1], out);
  601     }
  602 
  603     ut8 address;
  604     if (!address_direct (arg[0], &address)) {
  605         return false;
  606     }
  607     if (!r_str_casecmp (arg[1], "a")) {
  608         return singlearg_direct (0x52, arg[0], out);
  609     }
  610     ut16 imm;
  611     if (arg[1][0] != '#' || !resolve_immediate (arg[1] + 1, &imm)) {
  612         return false;
  613     }
  614     (*out)[0] = 0x53;
  615     (*out)[1] = address;
  616     (*out)[2] = imm & 0x00FF;
  617     *out += 3;
  618     return true;
  619 }
  620 
  621 static bool mnem_cjne(char const*const*arg, ut16 pc, ut8**out) {
  622     ut16 address;
  623     if (!to_address (arg[2], &address)
  624         || !relative_address (pc+1, address, (*out)+2)) {
  625         return false;
  626     }
  627     if (!r_str_casecmp (arg[0], "a")) {
  628         if (arg[1][0] == '#') {
  629             ut16 imm;
  630             if (!resolve_immediate (arg[1] + 1, &imm)) {
  631                 return false;
  632             }
  633             (*out)[0] = 0xb4;
  634             (*out)[1] = imm & 0x00FF;
  635             // out[2] set earlier
  636             *out += 3;
  637             return true;
  638         }
  639         ut8 address;
  640         if (!address_direct (arg[1], &address)) {
  641             return false;
  642         }
  643         (*out)[0] = 0xb5;
  644         (*out)[1] = address;
  645         // out[2] set earlier
  646         *out += 3;
  647         return true;
  648     }
  649     if (is_reg (arg[0])) {
  650         ut16 imm;
  651         if (!resolve_immediate (arg[1] + 1, &imm)) {
  652             return false;
  653         }
  654         (*out)[0] = 0xbf | register_number (arg[0]) ;
  655         (*out)[1] = imm & 0x00FF;
  656         // out[2] set earlier
  657         *out += 3;
  658         return true;
  659     }
  660     if (is_indirect_reg (arg[0])) {
  661         ut16 imm;
  662         if (!resolve_immediate (arg[1] + 1, &imm)) {
  663             return false;
  664         }
  665         (*out)[0] = 0xb6 | register_number (arg[0]) ;
  666         (*out)[1] = imm & 0x00FF;
  667         // out[2] set earlier
  668         *out += 3;
  669         return true;
  670     }
  671     return false;
  672 }
  673 
  674 static bool mnem_clr(char const*const*arg, ut16 pc, ut8**out) {
  675     if (!r_str_casecmp  ("a", arg[0])) {
  676         return single_byte_instr (0xe4, out);
  677     }
  678     if (!r_str_casecmp  ("c", arg[0])) {
  679         return single_byte_instr (0xc3, out);
  680     }
  681     return singlearg_bit (0xc2, arg[0], out);
  682 }
  683 
  684 static bool mnem_cpl(char const*const*arg, ut16 pc, ut8**out) {
  685     if (!r_str_casecmp  ("a", arg[0])) {
  686         return single_byte_instr (0xf4, out);
  687     }
  688     if (!r_str_casecmp  ("c", arg[0])) {
  689         return single_byte_instr (0xb3, out);
  690     }
  691     return singlearg_bit (0xb2, arg[0], out);
  692 }
  693 
  694 static bool mnem_da(char const*const*arg, ut16 pc, ut8**out) {
  695     return single_a_arg_instr (0xd4, arg[0], out);
  696 }
  697 
  698 static bool mnem_dec(char const*const*arg, ut16 pc, ut8**out) {
  699     if (is_indirect_reg (arg[0])) {
  700         return singlearg_register (0x16, arg[0], out);
  701     }
  702     if (is_reg (arg[0])) {
  703         return singlearg_register (0x18, arg[0], out);
  704     }
  705     if (!r_str_casecmp ("a", arg[0])) {
  706         return single_byte_instr (0x14, out);
  707     }
  708     return singlearg_direct (0x15, arg[0], out);
  709 }
  710 
  711 static bool mnem_div(char const*const*arg, ut16 pc, ut8**out) {
  712     if (r_str_casecmp  ("ab", arg[0])) {
  713         return false;
  714     }
  715     return single_byte_instr (0x84, out);
  716 }
  717 
  718 static bool mnem_djnz(char const*const*arg, ut16 pc, ut8**out) {
  719     ut16 jmp_address;
  720     if (!to_address (arg[1], &jmp_address)) {
  721         return false;
  722     }
  723     if (!relative_address (pc, jmp_address, (*out) + 2)) {
  724         return false;
  725     }
  726 
  727     if (is_reg (arg[0])) {
  728         (*out)[0] = 0xd8 | register_number (arg[0]);
  729         (*out)[1] = (*out)[2];
  730         *out += 2;
  731         return true;
  732     }
  733     ut8 dec_address;
  734     if (!address_direct (arg[0], &dec_address))  {
  735         return false;
  736     }
  737     (*out)[0] = 0xd5;
  738     (*out)[1] = dec_address;
  739     (*out)[2] -= 1;
  740     *out += 3;
  741     return true;
  742 }
  743 
  744 static bool mnem_inc(char const*const*arg, ut16 pc, ut8**out) {
  745     if (is_reg (arg[0])) {
  746         return singlearg_register (0x08, arg[0], out);
  747     }
  748     if (is_indirect_reg (arg[0])) {
  749         return singlearg_register (0x06, arg[0], out);
  750     }
  751     if (!r_str_casecmp  ("a", arg[0])) {
  752         return single_byte_instr (0x04, out);
  753     }
  754     if (!r_str_casecmp ("dptr", arg[0])) {
  755         return single_byte_instr (0xa3, out);
  756     }
  757     return singlearg_direct (0x05, arg[0], out);
  758 }
  759 
  760 static bool mnem_jb(char const*const*arg, ut16 pc, ut8**out) {
  761     ut8 cmp_addr;
  762     if (!address_bit (arg[0], &cmp_addr)) {
  763         return false;
  764     }
  765     ut16 jmp_addr;
  766     if (!to_address (arg[1], &jmp_addr)
  767         || !relative_address (pc + 1, jmp_addr, (*out) + 2)) {
  768         return false;
  769     }
  770     (*out)[0] = 0x20;
  771     (*out)[1] = cmp_addr;
  772     // out[2] set earlier
  773     *out += 3;
  774     return true;
  775 }
  776 
  777 static bool mnem_jbc(char const*const*arg, ut16 pc, ut8**out) {
  778     ut8 cmp_addr;
  779     if (!address_bit (arg[0], &cmp_addr)) {
  780         return false;
  781     }
  782     ut16 jmp_addr;
  783     if (!to_address (arg[1], &jmp_addr)
  784         || !relative_address (pc + 1, jmp_addr, (*out) + 2)) {
  785         return false;
  786     }
  787     (*out)[0] = 0x10;
  788     (*out)[1] = cmp_addr;
  789     // out[2] set earlier
  790     *out += 3;
  791     return true;
  792 }
  793 
  794 static bool mnem_jc(char const*const*arg, ut16 pc, ut8**out) {
  795     return singlearg_reladdr (0x40, arg[0], pc, out);
  796 }
  797 
  798 static bool mnem_jnb(char const*const*arg, ut16 pc, ut8**out) {
  799     ut8 cmp_addr;
  800     if (!address_bit (arg[0], &cmp_addr)) {
  801         return false;
  802     }
  803     ut16 jmp_addr;
  804     if (!to_address (arg[1], &jmp_addr)
  805         || !relative_address (pc + 1, jmp_addr, (*out) + 2)) {
  806         return false;
  807     }
  808     (*out)[0] = 0x30;
  809     (*out)[1] = cmp_addr;
  810     // out[2] set earlier
  811     *out += 3;
  812     return true;
  813 }
  814 
  815 static bool mnem_jnc(char const*const*arg, ut16 pc, ut8**out) {
  816     return singlearg_reladdr (0x50, arg[0], pc, out);
  817 }
  818 
  819 static bool mnem_jnz(char const*const*arg, ut16 pc, ut8**out) {
  820     return singlearg_reladdr (0x70, arg[0], pc, out);
  821 }
  822 
  823 static bool mnem_jz(char const*const*arg, ut16 pc, ut8**out) {
  824     return singlearg_reladdr (0x60, arg[0], pc, out);
  825 }
  826 
  827 static bool mnem_lcall(char const*const*arg, ut16 pc, ut8**out) {
  828     ut16 address;
  829     if (!to_address (arg[0], &address)) {
  830         return false;
  831     }
  832     (*out)[0] = 0x12;
  833     (*out)[1] = ((address & 0xFF00) >> 8) & 0x00FF;
  834     (*out)[2] = address & 0x00FF;
  835     *out += 3;
  836     return true;
  837 }
  838 
  839 static bool mnem_ljmp(char const*const*arg, ut16 pc, ut8**out) {
  840     ut16 address;
  841     if (!to_address (arg[0], &address)) {
  842         return false;
  843     }
  844     (*out)[0] = 0x02;
  845     (*out)[1] = ((address & 0xFF00) >> 8) & 0x00FF;
  846     (*out)[2] = address & 0x00FF;
  847     *out += 3;
  848     return true;
  849 }
  850 
  851 static bool mnem_mov_c(char const*const*arg, ut16 pc, ut8**out) {
  852     return singlearg_bit (0xa2, arg[1], out);
  853 }
  854 
  855 static bool mnem_mov(char const*const*arg, ut16 pc, ut8**out) {
  856     if (!r_str_casecmp (arg[0], "dptr")) {
  857         ut16 imm;
  858         if (!resolve_immediate (arg[1] + 1, &imm)) {
  859             return false;
  860         }
  861         (*out)[0] = 0x90;
  862         (*out)[1] = imm >> 8;
  863         (*out)[2] = imm;
  864         *out += 3;
  865         return true;
  866     }
  867     if (is_indirect_reg (arg[0])) {
  868         if (!r_str_casecmp (arg[1], "a")) {
  869             return singlearg_register (0xf6, arg[0], out);
  870         }
  871         if (arg[1][0] != '#' ) {
  872             return singlearg_direct (
  873                 0xa6 | register_number (arg[0])
  874                 , arg[1]
  875                 , out);
  876         }
  877         return singlearg_immediate (0x76 | register_number (arg[0])
  878             , arg[1]
  879             , out);
  880     }
  881     if (!r_str_casecmp (arg[0], "a")) {
  882         if (is_indirect_reg (arg[1])) {
  883             return singlearg_register (0xe6, arg[1], out);
  884         }
  885         if (is_reg (arg[1])) {
  886             return singlearg_register (0xe8, arg[1], out);
  887         }
  888         if (arg[1][0] == '#') {
  889             return singlearg_immediate (0x74, arg[1], out);
  890         }
  891         return singlearg_direct (0xe5, arg[1], out);
  892     }
  893     if (is_reg (arg[0])) {
  894         if (!r_str_casecmp (arg[1], "a")) {
  895             return singlearg_register (0xf8, arg[0], out);
  896         }
  897         if (arg[1][0] == '#') {
  898             return singlearg_immediate (
  899                 0x78 | register_number (arg[0])
  900                 , arg[1]
  901                 , out);
  902         }
  903         return singlearg_direct (0xa8 | register_number (arg[0])
  904             , arg[1]
  905             , out);
  906     }
  907     if (!r_str_casecmp (arg[1], "c")) {
  908         return singlearg_bit (0x92, arg[0], out);
  909     }
  910     if (!r_str_casecmp (arg[1], "a")) {
  911         return singlearg_direct (0xf5,  arg[0], out);
  912     }
  913     if (is_reg (arg[1])) {
  914         return singlearg_direct (0x88 | register_number (arg[1])
  915             , arg[0]
  916             , out);
  917     }
  918     if (is_indirect_reg (arg[1])) {
  919         return singlearg_direct (0x86 | register_number (arg[1])
  920             , arg[0]
  921             , out);
  922     }
  923     ut8 dest_addr;
  924     if (!address_direct (arg[0], &dest_addr)) {
  925         return false;
  926     }
  927     if (arg[1][0] == '#') {
  928         ut16 imm;
  929         if (!resolve_immediate (arg[1] + 1, &imm)) {
  930             return false;
  931         }
  932         (*out)[0] = 0x75;
  933         (*out)[1] = dest_addr;
  934         (*out)[2] = imm & 0x00FF;
  935         *out += 3;
  936         return true;
  937     }
  938     ut8 src_addr;
  939     if (!address_direct (arg[1], &src_addr)) {
  940         return false;
  941     }
  942     (*out)[0] = 0x85;
  943     (*out)[1] = src_addr;
  944     (*out)[2] = dest_addr;
  945     *out += 3;
  946     return true;
  947 }
  948 
  949 static bool mnem_movc(char const*const*arg, ut16 pc, ut8**out) {
  950     if (r_str_casecmp (arg[0], "a")) {
  951         return false;
  952     }
  953     if (!str_iwhitecasecmp (arg[1], "@a+dptr")
  954         || !str_iwhitecasecmp (arg[1], "[a+dptr]")) {
  955         return single_byte_instr (0x93, out);
  956     }
  957     if (!str_iwhitecasecmp (arg[1], "@a+pc")
  958         || !str_iwhitecasecmp (arg[1], "[a+pc]")) {
  959         return single_byte_instr (0x83, out);
  960     }
  961     return false;
  962 }
  963 
  964 static bool mnem_movx(char const*const*arg, ut16 pc, ut8**out) {
  965     if (!r_str_casecmp (arg[0], "a")) {
  966         if (is_indirect_reg (arg[1])) {
  967             return singlearg_register (0xe2, arg[1], out);
  968         }
  969         if (!str_iwhitecasecmp (arg[1], "@dptr")
  970             || !str_iwhitecasecmp (arg[1], "[dptr]")) {
  971             return single_byte_instr (0xe0, out);
  972         }
  973     }
  974     if (r_str_casecmp (arg[1], "a")) {
  975         return false;
  976     }
  977     if (is_indirect_reg (arg[0])) {
  978         return singlearg_register (0xf2, arg[0], out);
  979     }
  980     if (!str_iwhitecasecmp (arg[0], "@dptr")
  981         || !str_iwhitecasecmp (arg[0], "[dptr]")) {
  982         return single_byte_instr (0xf0, out);
  983     }
  984     return false;
  985 }
  986 
  987 static bool mnem_mul(char const*const*arg, ut16 pc, ut8**out) {
  988     if (r_str_ncasecmp ("ab", arg[0], 3)) {
  989         return false;
  990     }
  991     return single_byte_instr (0xa4, out);
  992 }
  993 
  994 static bool mnem_nop(char const*const*arg, ut16 pc, ut8**out) {
  995     return single_byte_instr (0x00, out);
  996 }
  997 
  998 static bool mnem_orl(char const*const*arg, ut16 pc, ut8**out) {
  999     if (!r_str_casecmp (arg[0], "c")) {
 1000         if (arg[1][0] == '/') {
 1001             return singlearg_bit (0xa0, arg[1] + 1, out);
 1002         }
 1003         return singlearg_bit (0x72, arg[1], out);
 1004     }
 1005     if (!r_str_casecmp (arg[0], "a")) {
 1006         if (is_indirect_reg (arg[1])) {
 1007             return singlearg_register (0x46, arg[1], out);
 1008         }
 1009         if (arg[1][0] == '#') {
 1010             return singlearg_immediate (0x44, arg[1], out);
 1011         }
 1012         if (is_reg (arg[1])) {
 1013             return singlearg_register (0x48, arg[1], out);
 1014         }
 1015         return singlearg_direct (0x45, arg[1], out);
 1016     }
 1017 
 1018     if (arg[1][0] != '#') {
 1019         return singlearg_direct (0x42, arg[0], out);
 1020     }
 1021 
 1022     ut8 dest_addr;
 1023     if (!address_direct (arg[0], &dest_addr)) {
 1024         return false;
 1025     }
 1026     ut16 imm;
 1027     if (!resolve_immediate (arg[1] + 1, &imm)) {
 1028         return false;
 1029     }
 1030     (*out)[0] = 0x43;
 1031     (*out)[1] = dest_addr;
 1032     (*out)[2] = imm & 0x00FF;
 1033     *out += 3;
 1034     return true;
 1035 }
 1036 
 1037 static bool mnem_pop(char const*const*arg, ut16 pc, ut8**out) {
 1038     return singlearg_direct (0xd0, arg[0], out);
 1039 }
 1040 
 1041 static bool mnem_push(char const*const*arg, ut16 pc, ut8**out) {
 1042     return singlearg_direct (0xc0, arg[0], out);
 1043 }
 1044 
 1045 static bool mnem_ret(char const*const*arg, ut16 pc, ut8**out) {
 1046     return single_byte_instr (0x22, out);
 1047 }
 1048 
 1049 static bool mnem_reti(char const*const*arg, ut16 pc, ut8**out) {
 1050     return single_byte_instr (0x32, out);
 1051 }
 1052 
 1053 static bool mnem_rl(char const*const*arg, ut16 pc, ut8**out) {
 1054     return single_a_arg_instr (0x23, arg[0], out);
 1055 }
 1056 
 1057 static bool mnem_rlc(char const*const*arg, ut16 pc, ut8**out) {
 1058     return single_a_arg_instr (0x33, arg[0], out);
 1059 }
 1060 
 1061 static bool mnem_rr(char const*const*arg, ut16 pc, ut8**out) {
 1062     return single_a_arg_instr (0x03, arg[0], out);
 1063 }
 1064 
 1065 static bool mnem_rrc(char const*const*arg, ut16 pc, ut8**out) {
 1066     return single_a_arg_instr (0x13, arg[0], out);
 1067 }
 1068 
 1069 static bool mnem_setb(char const*const*arg, ut16 pc, ut8**out) {
 1070     if (!r_str_casecmp  ("c", arg[0])) {
 1071         return single_byte_instr (0xd3, out);
 1072     }
 1073     return singlearg_bit (0xd2, arg[0], out);
 1074 }
 1075 
 1076 static bool mnem_sjmp(char const*const*arg, ut16 pc, ut8**out) {
 1077     return singlearg_reladdr (0x80, arg[0], pc, out);
 1078 }
 1079 
 1080 static bool mnem_jmp(char const*const*arg, ut16 pc, ut8**out) {
 1081     if (!str_iwhitecasecmp (arg[0], "@a+dptr")
 1082         || !str_iwhitecasecmp (arg[0], "[a+dptr]")) {
 1083         return single_byte_instr (0x73, out);
 1084     }
 1085 
 1086     ut16 address;
 1087     if (!to_address (arg[0], &address)) {
 1088         return false;
 1089     }
 1090     ut16 reladdr;
 1091     if ( pc < address ) {
 1092         reladdr = address - pc;
 1093     }
 1094     else {
 1095         reladdr = pc - address;
 1096     }
 1097 
 1098     if ( reladdr < 0x100 ) {
 1099         return mnem_sjmp (arg, pc, out);
 1100     }
 1101     else if ( reladdr < 0x08FF ) {
 1102         return mnem_ajmp (arg, pc, out);
 1103     }
 1104     else {
 1105         return mnem_ljmp (arg, pc, out);
 1106     }
 1107 }
 1108 
 1109 static bool mnem_subb(char const*const*arg, ut16 pc, ut8**out) {
 1110     if (r_str_casecmp (arg[0], "a")) {
 1111         return false;
 1112     }
 1113     if (is_indirect_reg (arg[1])) {
 1114         return singlearg_register (0x96, arg[1], out);
 1115     }
 1116     if (arg[1][0] == '#') {
 1117         return singlearg_immediate (0x94, arg[1], out);
 1118     }
 1119     if (is_reg (arg[1])) {
 1120         return singlearg_register (0x98, arg[1], out);
 1121     }
 1122     return singlearg_direct (0x95, arg[1], out);
 1123 }
 1124 
 1125 static bool mnem_swap(char const*const*arg, ut16 pc, ut8**out) {
 1126     return single_a_arg_instr (0xc4, arg[0], out);
 1127 }
 1128 
 1129 static bool mnem_xrl(char const*const*arg, ut16 pc, ut8**out) {
 1130     if (!r_str_casecmp (arg[0], "a")) {
 1131         if (is_indirect_reg (arg[1])) {
 1132             return singlearg_register (0x66, arg[1], out);
 1133         }
 1134         if (arg[1][0] == '#') {
 1135             return singlearg_immediate (0x64, arg[1], out);
 1136         }
 1137         if (is_reg (arg[1])) {
 1138             return singlearg_register (0x68, arg[1], out);
 1139         }
 1140         return singlearg_direct (0x65, arg[1], out);
 1141     }
 1142     if (arg[1][0] != '#') {
 1143         if (r_str_casecmp (arg[1], "a")) {
 1144             return false;
 1145         }
 1146         return singlearg_direct (0x62, arg[0], out);
 1147     }
 1148     ut8 dest_addr;
 1149     if (!address_direct (arg[0], &dest_addr)) {
 1150         return false;
 1151     }
 1152     ut16 imm;
 1153     if (!resolve_immediate (arg[1] + 1, &imm)) {
 1154         return false;
 1155     }
 1156     (*out)[0] = 0x63;
 1157     (*out)[1] = dest_addr;
 1158     (*out)[2] = imm & 0x00FF;
 1159     *out += 3;
 1160     return true;
 1161 }
 1162 
 1163 static bool mnem_xch(char const*const*arg, ut16 pc, ut8**out) {
 1164     if (r_str_casecmp (arg[0], "a")) {
 1165         return false;
 1166     }
 1167     if (is_indirect_reg (arg[1])) {
 1168         return singlearg_register (0xc6, arg[1], out);
 1169     }
 1170     if (is_reg (arg[1])) {
 1171         return singlearg_register (0xc8, arg[1], out);
 1172     }
 1173     return singlearg_direct (0xc5, arg[1], out);
 1174 }
 1175 
 1176 static bool mnem_xchd(char const*const*arg, ut16 pc, ut8**out) {
 1177     if (r_str_casecmp (arg[0], "a")) {
 1178         return false;
 1179     }
 1180     if (!is_indirect_reg (arg[1])) {
 1181         return false;
 1182     }
 1183     return singlearg_register (0xd6, arg[1], out);
 1184 }
 1185 
 1186 /******************************************************************************
 1187  * ## Section 6: mnemonic token dispatcher
 1188                  -------------------------*/
 1189 
 1190 static parse_mnem_args mnemonic(char const *user_asm, int*nargs) {
 1191     return match_prefix_f (nargs, user_asm, (ftable){
 1192     #define mnem(n, mn) { #mn " ", &mnem_ ## mn, n },
 1193     #define zeroarg_mnem(mn) { #mn , &mnem_ ## mn, 0 },
 1194         mnem (1, acall)
 1195         mnem (2, addc)
 1196         mnem (2, add)
 1197         mnem (1, ajmp)
 1198         mnem (2, anl)
 1199         mnem (3, cjne)
 1200         mnem (1, clr)
 1201         mnem (1, cpl)
 1202         mnem (1, da)
 1203         mnem (1, dec)
 1204         mnem (1, div)
 1205         mnem (2, djnz)
 1206         mnem (1, inc)
 1207         mnem (2, jbc)
 1208         mnem (2, jb)
 1209         mnem (1, jc)
 1210         mnem (1, jmp)
 1211         mnem (2, jnb)
 1212         mnem (1, jnc)
 1213         mnem (1, jz)
 1214         mnem (1, jnz)
 1215         mnem (1, lcall)
 1216         mnem (1, ljmp)
 1217 /* so uh, the whitespace-independent matching sees movc and mov c as the same
 1218  * thing...
 1219  * My first thought was to add an exception for mov c, but later I saw that it'd
 1220  * be better to match the space after each instruction, but the exception is
 1221  * still here
 1222  */
 1223         { "mov c,", &mnem_mov_c, 2 },
 1224         mnem (2, movc)
 1225         mnem (2, movx)
 1226         mnem (2, mov)
 1227         mnem (1, mul)
 1228         mnem (2, orl)
 1229         mnem (1, pop)
 1230         mnem (1, push)
 1231         mnem (2, xchd)
 1232         mnem (2, xch)
 1233         mnem (2, xrl)
 1234         mnem (1, rlc)
 1235         mnem (1, rl)
 1236         mnem (1, rrc)
 1237         mnem (1, rr)
 1238         mnem (1, setb)
 1239         mnem (1, sjmp)
 1240         mnem (2, subb)
 1241         mnem (1, swap)
 1242         zeroarg_mnem (nop)
 1243         zeroarg_mnem (reti)
 1244         zeroarg_mnem (ret)
 1245     #undef mnem
 1246         {0}});
 1247 }
 1248 
 1249 /******************************************************************************
 1250  * ## Section 7: radare2 glue and mnemonic tokenization
 1251                  --------------------------------------*/
 1252 
 1253 int assemble_8051(RAsm *a, RAsmOp *op, char const *user_asm) {
 1254     if (!a || !op || !user_asm) {
 1255         return 0;
 1256     }
 1257     r_strbuf_set (&op->buf_asm, user_asm);
 1258     while (!terminates_asm_line (*user_asm)
 1259         && (*user_asm == ' ' || *user_asm == '\t')) {
 1260         user_asm += 1;
 1261     }
 1262     char const *arguments = user_asm;
 1263     while (!terminates_asm_line (*arguments)
 1264         && (('a' <= *arguments && *arguments <= 'z')
 1265         || ('A' <= *arguments && *arguments <= 'Z'))) {
 1266         arguments += 1;
 1267     }
 1268     while (!terminates_asm_line (*arguments)
 1269         && (*arguments == ' ' || *arguments == '\t')) {
 1270         arguments += 1;
 1271     }
 1272     char*arg[3] = {0};
 1273     int nr_of_arguments = get_arguments (arg, arguments);
 1274     char const*carg[3] = { arg[0], arg[1], arg[2] }; /* aliasing pointers...
 1275         I need to pass char const *s, but I can't free char const *s
 1276         not without compiler warnings, at least */
 1277     int wants_arguments;
 1278     parse_mnem_args mnem = mnemonic (user_asm, &wants_arguments);
 1279     if (!mnem || nr_of_arguments != wants_arguments) {
 1280         free (arg[2]); arg[2] = 0; carg[2] = 0;
 1281         free (arg[1]); arg[1] = 0; carg[1] = 0;
 1282         free (arg[0]); arg[0] = 0; carg[0] = 0;
 1283         return 0;
 1284     }
 1285     ut8 instr[4] = {0};
 1286     ut8 *binp = instr;
 1287     if (!mnem (carg, a->pc, &binp)) {
 1288         free (arg[0]); arg[0] = 0; carg[2] = 0;
 1289         free (arg[1]); arg[1] = 0; carg[1] = 0;
 1290         free (arg[2]); arg[2] = 0; carg[0] = 0;
 1291         return 0;
 1292     } else {
 1293         free (arg[0]); arg[0] = 0; carg[2] = 0;
 1294         free (arg[1]); arg[1] = 0; carg[1] = 0;
 1295         free (arg[2]); arg[2] = 0; carg[0] = 0;
 1296         size_t len = binp - instr;
 1297         r_strbuf_setbin (&op->buf, instr, len);
 1298         return binp - instr;
 1299     }
 1300 }