From 57923b33819a1ed4156f4b34211cbbf26d687e05 Mon Sep 17 00:00:00 2001
From: Henny Sipma <hennysipma@mac.com>
Date: Fri, 26 Dec 2025 15:28:38 -0800
Subject: [PATCH 1/7] ARM: update instruction data

---
 chb/arm/opcodes/ARMPreloadData.py | 34 ++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/chb/arm/opcodes/ARMPreloadData.py b/chb/arm/opcodes/ARMPreloadData.py
index 740564db..df7ba0ff 100644
--- a/chb/arm/opcodes/ARMPreloadData.py
+++ b/chb/arm/opcodes/ARMPreloadData.py
@@ -4,7 +4,7 @@
 # ------------------------------------------------------------------------------
 # The MIT License (MIT)
 #
-# Copyright (c) 2021 Aarno Labs LLC
+# Copyright (c) 2021-2025  Aarno Labs LLC
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -30,9 +30,11 @@
 from chb.app.InstrXData import InstrXData
 
 from chb.arm.ARMDictionaryRecord import armregistry
-from chb.arm.ARMOpcode import ARMOpcode, simplify_result
+from chb.arm.ARMOpcode import ARMOpcode, ARMOpcodeXData, simplify_result
 from chb.arm.ARMOperand import ARMOperand
 
+from chb.invariants.XXpr import XXpr
+
 import chb.util.fileutil as UF
 
 from chb.util.IndexedTable import IndexedTableValue
@@ -41,6 +43,29 @@
     import chb.arm.ARMDictionary
 
 
+class ARMPreloadDataXData(ARMOpcodeXData):
+    """Data format:
+    - expressions:
+    0: xbase
+    1: xmem
+    """
+
+    def __init__(self, xdata: InstrXData) -> None:
+        ARMOpcodeXData.__init__(self, xdata)
+
+    @property
+    def xbase(self) -> "XXpr":
+        return self.xpr(0, "xbase")
+
+    @property
+    def xmem(self) -> "XXpr":
+        return self.xpr(1, "xmem")
+
+    @property
+    def annotation(self) -> str:
+        return "Preload-data(" + str(self.xmem)
+
+
 @armregistry.register_tag("PLDW", ARMOpcode)
 @armregistry.register_tag("PLD", ARMOpcode)
 class ARMPreloadData(ARMOpcode):
@@ -72,6 +97,5 @@ def annotation(self, xdata: InstrXData) -> str:
         xprs[0]: value of base register
         xprs[1]: value of memory location
         """
-
-        rhs = str(xdata.xprs[1])
-        return "Preload-data(" + rhs + ")"
+        xd = ARMPreloadDataXData(xdata)
+        return xd.annotation

From 4ebd38d6bd6678985111cb0371466000742b4ad6 Mon Sep 17 00:00:00 2001
From: Henny Sipma <hennysipma@mac.com>
Date: Fri, 26 Dec 2025 15:29:58 -0800
Subject: [PATCH 2/7] AST: handle stack array indices

---
 chb/astinterface/ASTInterface.py | 15 +++++++++------
 chb/invariants/XXpr.py           |  2 +-
 chb/invariants/XXprUtil.py       |  6 ++++++
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/chb/astinterface/ASTInterface.py b/chb/astinterface/ASTInterface.py
index 4b504572..4c3cdb93 100644
--- a/chb/astinterface/ASTInterface.py
+++ b/chb/astinterface/ASTInterface.py
@@ -1016,12 +1016,7 @@ def introduce_stack_variables(
             stackvartypes: Dict[int, "BCTyp"]) -> None:
         """Creates stack variables/buffers for all stack offsets with types."""
 
-        # local variable stack offsets from the type inference are positive,
-        # so they must be negated here. For the same reason, to capture the
-        # largest extent of every varinfo, offsets must be traversed in reverse
-        # order.
-        for (offset, bctype) in sorted(stackvartypes.items(), reverse=True):
-            offset = -offset
+        for (offset, bctype) in sorted(stackvartypes.items()):
             vtype = bctype.convert(self.typconverter)
             self.mk_stack_variable_lval(offset, vtype=vtype)
 
@@ -1115,6 +1110,7 @@ def mk_stack_variable_lval(
         if varinfo.vtype is None:
             return lval
 
+        # create stack variables for all fields and array elements
         if varinfo.vtype.is_compound:
             structtyp = cast(AST.ASTTypComp, varinfo.vtype)
             ckey = structtyp.compkey
@@ -1159,6 +1155,13 @@ def mk_stack_variable_lval(
                         self._stack_variables[elementoffset + cfoff] = fieldlval
                     elementoffset += elsize
 
+            else:
+                elementoffset = offset
+                for i in range(arraysize):
+                    indexoffset = self.mk_scalar_index_offset(i)
+                    elemlval = self.astree.mk_vinfo_lval(varinfo, offset=indexoffset)
+                    self._stack_variables[elementoffset] = elemlval
+                    elementoffset += elsize
         return lval
 
 
diff --git a/chb/invariants/XXpr.py b/chb/invariants/XXpr.py
index 511ef2e5..b1836e3b 100644
--- a/chb/invariants/XXpr.py
+++ b/chb/invariants/XXpr.py
@@ -806,7 +806,7 @@ def stack_address_offset(self) -> int:
         elif self.is_stack_address and self.is_addressof_var:
             xvar = self.get_addressof_var
             if xvar is not None:
-                return xvar.denotation.offset.offsetvalue()
+                return xvar.denotation.offset.offsetconstant
 
         raise UF.CHBError(
             "Expression is not a stack address: " + str(self))
diff --git a/chb/invariants/XXprUtil.py b/chb/invariants/XXprUtil.py
index 093f0af8..a204b473 100644
--- a/chb/invariants/XXprUtil.py
+++ b/chb/invariants/XXprUtil.py
@@ -1615,6 +1615,12 @@ def stack_variable_to_ast_lval(
                 fldoffset, xdata, iaddr, astree, anonymous=anonymous)
             return astree.mk_vinfo_lval(vinfo, offset=astoffset, anonymous=anonymous)
 
+        if offset.offset.is_array_index_offset:
+            idxoffset = cast("VMemoryOffsetArrayIndexOffset", offset.offset)
+            astoffset = array_offset_to_ast_offset(
+                idxoffset, xdata, iaddr, astree, anonymous=anonymous)
+            return astree.mk_vinfo_lval(vinfo, offset=astoffset, anonymous=anonymous)
+
         if not anonymous:
             chklogger.logger.warning(
                 "Stack variable with offset %s not yet supported at address %s",

From 4b3e6ba931dd083326113d89e2ccbc52edad50cc Mon Sep 17 00:00:00 2001
From: Henny Sipma <hennysipma@mac.com>
Date: Mon, 5 Jan 2026 22:54:23 -0800
Subject: [PATCH 3/7] ASTI: allow for discrepancy in variable name

---
 chb/astinterface/ASTIProvenance.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/chb/astinterface/ASTIProvenance.py b/chb/astinterface/ASTIProvenance.py
index 511049f7..a092cd5c 100644
--- a/chb/astinterface/ASTIProvenance.py
+++ b/chb/astinterface/ASTIProvenance.py
@@ -446,10 +446,17 @@ def resolve_reaching_defs(self) -> None:
                                     # Allow for change of name of return value
                                     if str(instr.lhs) == v or v == "R0" or v == "S0":
                                         self.add_reaching_definition(xid, instrid)
+                                    elif instr.lhs is None:
+                                        chklogger.logger.info(
+                                            "Lhs variable %s is suppressed in call to "
+                                            "%s for reaching def address %s",
+                                            v, str(instr.tgt), addr)
+                                        self.add_reaching_definition(xid, instrid)
                                     else:
                                         chklogger.logger.warning(
-                                            "Variable names don't match: %s vs %s",
-                                            str(instr.lhs), v)
+                                            "Lhs variable names don't match: %s vs %s"
+                                            + " to %s for reaching def address %s",
+                                            str(instr.lhs), v, str(instr.tgt), addr)
                                 else:
                                     chklogger.logger.warning(
                                         "Expression is defined by unknown instruction: "

From 7d7e2a6844b3139a45262725355bfb237e792201 Mon Sep 17 00:00:00 2001
From: Henny Sipma <hennysipma@mac.com>
Date: Mon, 5 Jan 2026 22:55:12 -0800
Subject: [PATCH 4/7] DOC: start userguide

---
 chb/app/CHVersion.py                          |   2 +-
 doc/user-guide/userdata.md                    |  77 +++++++
 doc/user-guide/userdata/arm-thumb.md          |  35 ++++
 doc/user-guide/userdata/call-back-tables.md   |  71 +++++++
 doc/user-guide/userdata/call-targets.md       |  65 ++++++
 doc/user-guide/userdata/data-blocks.md        |  34 ++++
 .../userdata/function-annotations.md          | 190 ++++++++++++++++++
 .../userdata/function-entry-points.md         | 106 ++++++++++
 8 files changed, 579 insertions(+), 1 deletion(-)
 create mode 100644 doc/user-guide/userdata.md
 create mode 100644 doc/user-guide/userdata/arm-thumb.md
 create mode 100644 doc/user-guide/userdata/call-back-tables.md
 create mode 100644 doc/user-guide/userdata/call-targets.md
 create mode 100644 doc/user-guide/userdata/data-blocks.md
 create mode 100644 doc/user-guide/userdata/function-annotations.md
 create mode 100644 doc/user-guide/userdata/function-entry-points.md

diff --git a/chb/app/CHVersion.py b/chb/app/CHVersion.py
index 62bec65c..34978db2 100644
--- a/chb/app/CHVersion.py
+++ b/chb/app/CHVersion.py
@@ -1 +1 @@
-chbversion: str = "0.3.0-20251022"
+chbversion: str = "0.3.0-20260105"
diff --git a/doc/user-guide/userdata.md b/doc/user-guide/userdata.md
new file mode 100644
index 00000000..ccd322ac
--- /dev/null
+++ b/doc/user-guide/userdata.md
@@ -0,0 +1,77 @@
+# Userdata
+
+User data can improve analysis and decompilation. Userdata can be provided in two
+ways: json files and C header files. This section describes the json files; C
+header files are described here.
+
+
+## Add userdata
+
+Userdata files are passed to the analyzer via the command-line with the
+command-line option <code>--hints</code>. Multiple userdata files can be
+passed with this option. If data in multiple files conflict the data from
+the last file passed is taken; previous version of the same data are
+overwritten.
+
+Some command that provide the <code>--hints</code> option include
+```
+> chkx analyze ... --hints ...
+> chkx results ast ... --hints ...
+> chkx relational prepare ... --hints ...
+...
+```
+
+## Userdata file layout
+
+Userdata format is json. The general layout of the json file is
+```
+{
+    "userdata": {
+        "<section-1>": { ... },
+        "<section-2>": { ... },
+        "<section-3>": { ... },
+        ....
+        "<section-n>": { ... }
+     }
+}
+```
+where <code>section-i</code> is the name of of a particular kind of userdata that is
+supported. Each kind of userdata has its own format and meaning, as explained
+below. It is recommended to add some additional top-level properties to the file,
+such as a hash (e.g., md5 or sha256) to identify the binary to which the userdata
+applies, or the name and release date of the binary. These additional properties,
+however, are not enforced or used otherwise.
+
+**Caution** The section names must be exact. Sections with misspelled names are
+silently ignored. To check if a section was read correctly, inspect the file
+<binary>.ch/u/<binary>_system_u.xml after initiating the analysis, to verify
+the corresponding xml section that is passed to the back-end ocaml analyzer.
+
+
+## Kinds of userdata
+
+The kinds of userdata that can be passed to the analysis is varied and tends to
+grow/change over time. Below is a list of the kinds of userdata currently
+supported.
+
+- **ARM-Thumb switch points** ([arm-thumb](userdata/arm-thumb.md)):
+  A list of addresses where an ARM binary
+  switches from ARM representation to Thumb-2 and v.v.
+
+- **Call-back Tables** ([call-back-tables](userdata/call-back-tables.md)):
+  A table of addresses
+  mapped to the declared name of a call-back table in memory.
+
+- **Call Targets for Indirect Calls** ([call-targets](userdata/call-targets.md)):
+  A list of targets for indirect function calls.
+
+- **Data Regions within Code** ([data-blocks](userdata/data-blocks.md)):
+  A list of start and end addresses
+  of regions within the code section that contain data.
+
+- **Function Annotations** ([function-annotations](userdata/function-annotations.md)):
+  Annotations with the aim to improve the quality of a decompilation to C, including
+  names/types for register and stack variables.
+
+- **Function Entry Points** ([function-entry-points](userdata/function-entry-points.md)):
+  A list of addresses that are the start of a function.
\ No newline at end of file
diff --git a/doc/user-guide/userdata/arm-thumb.md b/doc/user-guide/userdata/arm-thumb.md
new file mode 100644
index 00000000..e1516122
--- /dev/null
+++ b/doc/user-guide/userdata/arm-thumb.md
@@ -0,0 +1,35 @@
+### Arm-Thumb switch points
+
+**Description**
+
+ARM binaries may mix the ARM and Thumb-2 representation for code. The analyzer
+supports both representations. In many binaries these switch points are indicated
+in the binary itself by the compiler (this is always the case for binaries
+compiled with debug, and often in other binaries as well). However, if the
+swich points are not explicitly present in the binary, the current version of
+the disassembler cannot automatically
+determine them. For these binaries the user has the option to manually indicate
+the switch points in the userdata.
+
+**Format**
+
+A list of addresses followed by a colon and the letter 'T' or 'A'
+that indicate starting addresses of Thumb-2 and ARM code representation regions.
+
+
+**Example**
+
+```
+{
+   "userdata": {
+       ....
+       "arm-thumb": [
+           "0x18638:A",
+           "0x18908:T",
+           "0x18950:A",
+           "0x18974:T",
+           "0x21210:A"
+        ]
+    }
+}
+```
diff --git a/doc/user-guide/userdata/call-back-tables.md b/doc/user-guide/userdata/call-back-tables.md
new file mode 100644
index 00000000..ca4ce0c5
--- /dev/null
+++ b/doc/user-guide/userdata/call-back-tables.md
@@ -0,0 +1,71 @@
+### Call-back Tables
+
+**Description**
+
+Call-back tables are arrays of structs in global memory that contain related
+function pointers, usually associated with some other identifying data.
+Common examples of call-back tables are in binaries that serve requests based
+on a particular keyword. In such systems the response to the request is often
+invoked by matching the key to the identifying key in the table and executing
+the associated function pointer.
+
+The userdata representation for such call-back tables consists of three elements:
+1. The definition of the table in C (in the C header file)
+2. The start address of the table in memory (in userdata)
+3. The addresses of the indirect calls into the table (in userdata)
+
+This section only shows the format for (2). The addresses of the indirect
+calls are specified in a separate section, described in
+[call-targets](call-targets.md).
+
+
+**Format**
+
+A table of virtual addresses in memory mapped to names of defined tables.
+
+
+**Example**
+
+```
+{
+    "userdata": {
+        ....
+	"call-back-tables": {
+	    "0x4a5910": "request_table",
+	    "0x4a5c30": "cgi_setobject_table"
+	}
+    }
+}
+```
+
+This section must be accompanied by a definition of the corresponding table
+in a header file that is passed to the analyzer at the same time. The
+corresponding header definition in this case could be something like:
+
+```
+struct _cbt_http_request {
+  char *formname;
+  char *filetype;
+  char *cachecontrol;
+  int (*cpb_request_12)(void *state, void *stream, int len);
+  int (*cbp_request_16)(char *filename, void *stream);
+  int (*cbp_request_20)(char *level);
+} cbt_http_request;
+
+
+struct _cbt_http_request *request_table;
+
+
+struct _cbt_cgi_setobject {
+  char *tag;
+  int num;
+  int (*cbp_cgi_setobject)(struct keyvaluepair_t *kvp, int len);
+} cbt_cgi_setobject;
+
+
+struct _cbt_cgi_setobject *cgi_setobject_table;
+```
+
+
+
+        
\ No newline at end of file
diff --git a/doc/user-guide/userdata/call-targets.md b/doc/user-guide/userdata/call-targets.md
new file mode 100644
index 00000000..60aa7e53
--- /dev/null
+++ b/doc/user-guide/userdata/call-targets.md
@@ -0,0 +1,65 @@
+### Call targets
+
+**Description**
+
+In many cases the analyzer is able to resolve indirect function calls. For
+those cases where automatic resolution of targets fails the user can supply
+a list of targets explicitly in the userdata.
+
+A call target may be specified in a number of ways depending on the kind of
+target:
+- *application function:* <code>app:\<function-address\></code>
+- *shared-object function:* <code>so:\<function-name\></code>
+- *java native interface:* <code>jni:\<jni-index\></code>
+- *call-back table function:* <code>cba:\<call-back table address/>:\<offset\></code>
+
+**Format**
+
+A list of records of the following structure:
+```
+   {"fa":<function-address>,
+    "ia":<instruction-address of call-site>,
+    "tgts": [
+       | {"app":<address of target application function>}
+       | {"so":<name of target library function>}
+       | {"jni": <index of java native function>}
+       | {"cba": <address of call-back table>:<offset of function pointer in record>}
+    ]
+    }
+```
+
+**Example**
+
+```
+{
+    "userdata": {
+        ...
+        "call-targets": [
+            {"ia": "0x40d5dc",
+             "fa": "0x40d510",
+             "tgts": [{"cba": "0x4a5c30:8"}]
+            },
+            {"ia": "0x40a6a4",
+             "fa": "0x409dd0",
+             "tgts": [{"cba": "0x4a5910:12"}]
+            },
+            {"ia": "0x40aba8",
+             "fa": "0x409dd0",
+             "tgts": [{"cba": "0x4a5910:16"}]
+            },
+            {"ia": "0x40afd8",
+             "fa": "0x409dd0",
+             "tgts": [{"cba": "0x4a5910:20"}]
+            },
+            {"ia": "0x40b304",
+             "fa": "0x40b288",
+             "tgts": [{"app": "0x401018"}, {"app": "0x403200"}]
+            },
+            {"ia": "0x40c800",
+             "fa": "0x40c780",
+             "tgts": [{"so": "memcpy"}]
+            }            
+        ]
+    }
+}
+```
\ No newline at end of file
diff --git a/doc/user-guide/userdata/data-blocks.md b/doc/user-guide/userdata/data-blocks.md
new file mode 100644
index 00000000..31d2f9fc
--- /dev/null
+++ b/doc/user-guide/userdata/data-blocks.md
@@ -0,0 +1,34 @@
+### Data blocks
+
+**Description**
+
+Code sections may interleave code with data regions. This is particularly common
+in ARM binaries. Most of these data regions are detected automatically by the
+disassembler. For the cases where this fails the user can point out these data
+regions in the userdata with the data-blocks section.
+
+**Format**
+
+A list of records that specify the start (inclusive) and end (exclusive) address
+of a data region, where the record has the format:
+```
+    {"r": [<start-address>, <end-address>]}
+```
+
+
+**Example**
+
+```
+{
+    "userdata": {
+        ....
+        "data-blocks": [
+            {"r": ["0xa02425fc", "0xa0242674"]},
+            {"r": ["0xa0255e68", "0xa0255e94"]},
+            {"r": ["0xa03005d4", "0xa03005f8"]},
+            {"r": ["0xa0300a9e", "0xa0300ab0"]},
+            ...
+         ]
+    }
+}
+```    
\ No newline at end of file
diff --git a/doc/user-guide/userdata/function-annotations.md b/doc/user-guide/userdata/function-annotations.md
new file mode 100644
index 00000000..e6e60d4e
--- /dev/null
+++ b/doc/user-guide/userdata/function-annotations.md
@@ -0,0 +1,190 @@
+### Function Annotations
+
+Function annotations can be used to improve the quality of a decompilation of
+a function to C code. A function annotation ranges from names and types for
+register and stack
+variables to corrections to reaching definitions and typing inference rules.
+
+**Format**
+
+The top-level format of function annotations is a list of individual function
+annotations:
+```
+{
+    "userdata": {
+        ...
+        {
+            "function-annotations": [
+                {
+                    "faddr": <function-address in hex>,
+                    "register-variable-introductions": [
+                        ...
+                    ],
+                    "stack-variable-introductions: [
+                        ...
+                    ],
+                    "typing-rules": [
+                        ...
+                    ],
+                    "remove-reaching-definitions": [
+                        ...
+                    ]
+                },
+                ...
+           }
+       }
+}
+```
+where all properties are optional except for the function address.
+
+**Format: register-variable-introductions**:
+
+The format for **register-variable introductions** is a list of individual
+register annotations
+```
+     [
+         {
+             "iaddr": <instruction-address in hex>,
+             "name": <chosen name>,
+             "typename": <name of a data type>,
+             "mods": [<modifications of the type>]
+         },
+         {
+             ...
+         
+     ]
+```
+The instruction address is the address of the instruction where the
+register to be renamed gets assigned, that is, the register is the
+left-hand side in an instruction (assignment or call). If a register
+gets assigned in multiple paths in parallel, the instruction address
+should be the lowest address. These introductions can be considered
+as ssa (static single assignment) locations. 
+
+The chosen name is the name to be given to the register. The name will
+be used in the lifting as long as the register has the current definition.
+It is the user's responsibility to ensure that there are no name clashes
+with other variables. 
+
+The type name is the name of the type of the register for that particular
+assignment (a register can have many types during its lifetime within a
+function). The type name is either a primitive C type (like int or
+unsigned short, etc.) or the name of a type for which a typedef is given
+in the header file. The reason for restricting the type name to simple
+names is that full-featured C parsing needs to be applied when reading
+in these files. For convenience, some modifications can be added to the
+mods property to modify the typename: 
+- <code>ptrto</code>: indicating that the register type is a pointer to
+  the type indicated by the type name
+- <code>cast</code>: indicating that the type given should override the
+  type that may have been inferred by type inference. Adding <code>cast</code>
+  furthermore ensures that the assigning instruction will be exposed in
+  the lifting.
+
+*Note:* The name of the register itself does not have to be included in
+the record, as it is automatically inferred from the instruction address.
+At present the annotation is limited to instructions with a single LHS
+register. That is, instructions that assign to multiple registers such
+as the ARM instructions <code>LDM</code> or ARM call instructions that
+assign to both <code>R0</code> and <code>R1</code> are currently not
+handled.
+
+*Note:* The typename is optional. The analyzer performs its own type inference
+based on function signatures and other type information. Unless types are
+introduced that are not present in any function signatures or other type
+information it is often better to omit the typename initially and only add
+a typename if a typename is not inferred automatically.
+
+**Example: register-variable-introductions:**
+
+```
+                "register-variable-introductions": [
+                    {
+                        "iaddr": "0xe2b34",
+                        "name": "t",
+                        "typename": "EVP_PKEY_ASN1_METHOD",
+                        "mods": ["ptrto", "cast"]
+                    },
+                    {
+                        "iaddr": "0xe2b40",
+                        "name": "flags",
+                        "typename": "unsigned long"
+                    },
+                    {
+                        "iaddr": "0xe2b88",
+                        "name": "obj"
+                    },
+                    ...
+```
+
+**Format: stack-variable-introductions:**
+
+The format for **stack-variable-introductions** is a list of individual
+(local) stack variable annotations:
+```
+    [
+        {
+            "offset": <offset in bytes (positive)>,
+            "name": <chosen name>
+            "typename": <name of a data type>,
+            "mods": [<modifications of the type>]
+        },
+        {
+            ...
+     ]
+```
+The offset is the offset *in bytes* where the stack variable is located, defined
+as 
+```
+<address of stack-pointer at function entry> - <start address of stack variable>
+```
+Note that this number must be positive as the stack grows down, and thus any
+local stack variable is located at an address that is less in value than the
+address of the stack-pointer at function entry.
+
+The name, typename, and mods are the same as for register-variable introductions
+with the exception that stack variables can have an additional type of modification
+expressed in the mods property:
+- <code>array:\<n\></code>: indicating that the stack variable type is an array
+  of <code>n</code> elements of the type given.
+
+It is the user's responsibility to ensure that stack variables do not overlap and
+that names do not clash with each other or with register variables.
+
+
+**Example: stack-variable-introductions:**
+
+```
+                "stack-variable-introductions": [
+                    {
+                        "offset": 32,
+                        "name": "md",
+                        "typename": "unsigned char",
+                        "mods": ["array:16"]
+                    },
+                    {
+                        "offset": 56,
+                        "name": "md_ctx",
+                        "typename": "EVP_MD_CTX"
+                    }
+                ]
+```
+
+**Format: remove-reaching-definitions:**
+
+The format for **remove-reaching-definitions** is a list of register variables
+associated with the reaching definitions to be removed:
+```
+    [
+        {
+            "var": <name-of-register>,
+            "uselocs": [ hex-addresses ],
+            "rdeflocs": [ hex-addresses ]
+        },
+        {
+            ...
+    ]
+```
+The <code>var</code> property holds the name of the register for which the
+addresses given in the <code>rdeflocs</code> property are to be removed 
+from the instructions with addresses given in the <code>uselocs</code> property.
diff --git a/doc/user-guide/userdata/function-entry-points.md b/doc/user-guide/userdata/function-entry-points.md
new file mode 100644
index 00000000..a24b450f
--- /dev/null
+++ b/doc/user-guide/userdata/function-entry-points.md
@@ -0,0 +1,106 @@
+### Function Entry Points
+
+**Description**
+
+For most binaries the disassembler is able to determine all function entry points
+automatically. In some cases, however, some function entry points may be missed,
+and may be manually pointed out in the userdata.
+
+**Format**
+
+A list of addresses that are the starting address of a function.
+
+**Example**
+```
+{
+    "userdata": {
+        ...
+        "function-entry-points": [
+            "0xa0100044",
+            "0xa010011c",
+            "0xa0100292",
+            "0xa010029c",
+            "0xa0100710",
+            "0xa010072a",
+            ...
+         ]
+     }
+}
+```
+
+**Finding Function Entry Points**
+
+Low function coverage may be an indicator of function entry points missed.
+Function coverage is defined as the ratio of the number of instructions that
+are part of some function and the total number of instructions in the code
+sections (minus confirmed embedded data regions). Function coverage is
+displayed in the printed output when running the disassembler (without
+analysis):
+
+```
+> chkx analyze -d <binary>
+...
+Disassembly        : 0.16
+Construct functions: 0.86
+Disassembly information: 
+   Instructions         : 32699
+   Unknown instructions : 0
+   Functions            : 429 (coverage: 96.68%)
+   Function overlap     : 993 (counting multiples: 993)
+   Jumptables           : 16
+   Data blocks          : 20
+...
+```
+
+To aid the identificaton of function entry points, the disassembler prints
+out a (text) file that contains a listing of all instructions not contained
+in functions. E.g.,
+```
+> chkx analyze -d <binary>
+...
+> more <binary>.cch/a/<binary>_orphan.log
+...
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Data block (size: 12 bytes)
+
+  0x9870      Code:<0x295d4>
+  0x9874      FAddr:<0x9914>
+  0x9878      Code:<0x9300>
+================================================================================
+
+    0x987c  08 40 2d e9       PUSH         {R3,LR}
+    0x9880  2c 30 9f e5       LDR          R3, 0x98b4
+    0x9884  00 30 d3 e5       LDRB         R3, [R3]
+    0x9888  00 00 53 e3       CMP          R3, #0
+    0x988c  08 80 bd 18       POPNE        {R3,PC}
+  B 0x9890  20 30 9f e5       LDR          R3, 0x98b8
+    0x9894  00 00 53 e3       CMP          R3, #0
+    0x9898  01 00 00 0a       BEQ          0x98a4
+  B 0x989c  18 00 9f e5       LDR          R0, 0x98bc
+    0x98a0  23 ff ff eb       BL           0x9534
+  B 0x98a4  08 30 9f e5       LDR          R3, 0x98b4
+    0x98a8  01 20 a0 e3       MOV          R2, #1
+    0x98ac  00 20 c3 e5       STRB         R2, [R3]
+    0x98b0  08 80 bd e8       POP          {R3,PC}
+  B 0x98b4  38 64 03 00       ANDEQ        R6, R3, R8,LSR R4
+    0x98b8  00 00 00 00       ANDEQ        R0, R0, R0
+    0x98bc  cc dd 02 00       ANDEQ        SP, R2, R12,ASR#27
+    0x98c0  08 40 2d e9       PUSH         {R3,LR}
+    0x98c4  34 30 9f e5       LDR          R3, 0x9900
+    0x98c8  00 00 53 e3       CMP          R3, #0
+    0x98cc  02 00 00 0a       BEQ          0x98dc
+  B 0x98d0  2c 00 9f e5       LDR          R0, 0x9904
+    0x98d4  2c 10 9f e5       LDR          R1, 0x9908
+    0x98d8  cc ff ff eb       BL           0x9810
+  B 0x98dc  28 00 9f e5       LDR          R0, 0x990c
+    0x98e0  00 30 90 e5       LDR          R3, [R0]
+    0x98e4  00 00 53 e3       CMP          R3, #0
+    0x98e8  08 80 bd 08       POPEQ        {R3,PC}
+  B 0x98ec  1c 30 9f e5       LDR          R3, 0x9910
+    0x98f0  00 00 53 e3       CMP          R3, #0
+    0x98f4  08 80 bd 08       POPEQ        {R3,PC}
+  B 0x98f8  33 ff 2f e1       BLX          R3
+    0x98fc  08 80 bd e8       POP          {R3,PC}
+...
+```
+Missing function entry points are easy to spot at 0x987c and 0x98c0.
\ No newline at end of file

From 5917f7d4102e69c351d19c82586162000b6d6de9 Mon Sep 17 00:00:00 2001
From: Henny Sipma <hennysipma@mac.com>
Date: Sun, 11 Jan 2026 17:54:30 -0800
Subject: [PATCH 5/7] CMD: add option to show api calls to classification

---
 chb/app/InstrXData.py      |  8 ++--
 chb/cmdline/chkx           |  8 ++++
 chb/cmdline/commandutil.py | 89 +++++++++++++++++++++++++++-----------
 3 files changed, 76 insertions(+), 29 deletions(-)

diff --git a/chb/app/InstrXData.py b/chb/app/InstrXData.py
index 3017943d..b7ba00cd 100644
--- a/chb/app/InstrXData.py
+++ b/chb/app/InstrXData.py
@@ -432,7 +432,9 @@ def has_call_target(self) -> bool:
             key = self.tags[0]
             if key.startswith("a:"):
                 keyletters = key[2:]
-                return len(self.args) == len(keyletters) + 1
+                return (
+                    len(self.args) == len(keyletters) + 1
+                    and self.args[-1] > 0)
             else:
                 return False
         elif len(self.tags) >= 2 and self.tags[1] == "call":
@@ -470,9 +472,9 @@ def has_indirect_call_target_exprs(self) -> bool:
         return (len(self.tags) == 2 and self.tags[1] == "u" and len(self.args) > 1)
 
     def call_target(self, ixd: "InterfaceDictionary") -> "CallTarget":
-        if self.has_call_target() and self.is_bx_call:
+        if self.has_call_target() and self.is_bx_call and self.args[-5] > 0:
             return ixd.call_target(self.args[-5])
-        elif self.has_call_target():
+        elif self.has_call_target() and self.args[-1] > 0:
             return ixd.call_target(self.args[-1])
         else:
             raise UF.CHBError(
diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx
index 4ea0fab4..a511cbf3 100755
--- a/chb/cmdline/chkx
+++ b/chb/cmdline/chkx
@@ -881,6 +881,14 @@ def parse() -> argparse.Namespace:
     resultsclassifyfunctions.add_argument(
         "classification_file",
         help="name of json classification file")
+    resultsclassifyfunctions.add_argument(
+        "--output", "-o",
+        required=True,
+        help="name of file to save results")
+    resultsclassifyfunctions.add_argument(
+        "--showapicalls",
+        action="store_true",
+        help="list classified functions individually in output file")
     resultsclassifyfunctions.set_defaults(func=UCC.results_classifyfunctions)
 
     # --- results functions ---
diff --git a/chb/cmdline/commandutil.py b/chb/cmdline/commandutil.py
index 9d9dec60..19136f83 100644
--- a/chb/cmdline/commandutil.py
+++ b/chb/cmdline/commandutil.py
@@ -930,6 +930,8 @@ def results_classifyfunctions(args: argparse.Namespace) -> NoReturn:
 
     xname: str = str(args.xname)
     classificationfile: str = str(args.classification_file)
+    showapicalls: bool = args.showapicalls
+    outputfilename: str = args.output
 
     with open(classificationfile, "r") as fp:
         classifier = json.load(fp)
@@ -953,44 +955,76 @@ def results_classifyfunctions(args: argparse.Namespace) -> NoReturn:
     fns = app.appfunction_addrs
 
     classification: Dict[str, Dict[str, int]] = {}  # faddr -> libcat -> count
+    classificationapi: Dict[str, Dict[str, Dict[str, int]]] = {}
 
     for faddr in fns:
-        classification.setdefault(faddr, {})
+        if showapicalls:
+            classificationapi.setdefault(faddr, {})
+        else:
+            classification.setdefault(faddr, {})
         f = app.function(faddr)
         fcalls = f.call_instructions()
         for baddr in fcalls:
             for instr in fcalls[baddr]:
                 tgtname = instr.call_target.name
                 if tgtname in revclassifier:
-                    category = revclassifier[tgtname]
-                    classification[faddr].setdefault(category, 0)
-                    classification[faddr][category] += 1
+                    if showapicalls:
+                        category = revclassifier[tgtname]
+                        classificationapi[faddr].setdefault(category, {})
+                        classificationapi[faddr][category].setdefault(tgtname, 0)
+                        classificationapi[faddr][category][tgtname] += 1
+                    else:
+                        category = revclassifier[tgtname]
+                        classification[faddr].setdefault(category, 0)
+                        classification[faddr][category] += 1
 
     catfprevalence: Dict[str, int] = {}
     catcprevalence: Dict[str, int] = {}
     catstats: Dict[int, int] = {}
     singlecat: Dict[str, int] = {}
     doublecat: Dict[Tuple[str, str], int] = {}
-    for faddr in classification:
-        for cat in classification[faddr]:
-            catfprevalence.setdefault(cat, 0)
-            catcprevalence.setdefault(cat, 0)
-            catfprevalence[cat] += 1
-            catcprevalence[cat] += classification[faddr][cat]
-
-        numcats = len(classification[faddr])
-        catstats.setdefault(numcats, 0)
-        catstats[numcats] += 1
-        if numcats == 1:
-            cat = list(classification[faddr].keys())[0]
-            singlecat.setdefault(cat, 0)
-            singlecat[cat] += 1
-
-        if numcats == 2:
-            cats = sorted(list(classification[faddr].keys()))
-            cattuple = (cats[0], cats[1])
-            doublecat.setdefault(cattuple, 0)
-            doublecat[cattuple] += 1
+
+    if showapicalls:
+        for faddr in classificationapi:
+            for cat in classificationapi[faddr]:
+                catfprevalence.setdefault(cat, 0)
+                catcprevalence.setdefault(cat, 0)
+                catfprevalence[cat] += 1
+                catcprevalence[cat] += sum(classificationapi[faddr][cat].values())
+            numcats = len(classificationapi[faddr])
+            catstats.setdefault(numcats, 0)
+            catstats[numcats] += 1
+            if numcats == 1:
+                cat = list(classificationapi[faddr].keys())[0]
+                singlecat.setdefault(cat, 0)
+                singlecat[cat] = 1
+
+            if numcats == 2:
+                cats = sorted(list(classificationapi[faddr].keys()))
+                cattuple = (cats[0], cats[1])
+                doublecat.setdefault(cattuple, 0)
+                doublecat[cattuple] += 1
+    else:
+
+        for faddr in classification:
+            for cat in classification[faddr]:
+                catfprevalence.setdefault(cat, 0)
+                catcprevalence.setdefault(cat, 0)
+                catfprevalence[cat] += 1
+                catcprevalence[cat] += classification[faddr][cat]
+            numcats = len(classification[faddr])
+            catstats.setdefault(numcats, 0)
+            catstats[numcats] += 1
+            if numcats == 1:
+                cat = list(classification[faddr].keys())[0]
+                singlecat.setdefault(cat, 0)
+                singlecat[cat] += 1
+
+            if numcats == 2:
+                cats = sorted(list(classification[faddr].keys()))
+                cattuple = (cats[0], cats[1])
+                doublecat.setdefault(cattuple, 0)
+                doublecat[cattuple] += 1
 
     for (m, c) in sorted(catstats.items()):
         print(str(m).rjust(5) + ": " + str(c).rjust(5))
@@ -1006,9 +1040,12 @@ def results_classifyfunctions(args: argparse.Namespace) -> NoReturn:
     classificationresults: Dict[str, Any] = {}
     classificationresults["catfprevalence"] = catfprevalence
     classificationresults["catcprevalence"] = catcprevalence
-    classificationresults["functions"] = classification
+    if showapicalls:
+        classificationresults["functions"] = classificationapi
+    else:
+        classificationresults["functions"] = classification
 
-    with open("classification_results.json", "w") as fp:
+    with open(outputfilename, "w") as fp:
         json.dump(classificationresults, fp, indent=2)
 
     exit(0)

From c75db1f510b0ba742e490e7ca9cbf99af4d7c694 Mon Sep 17 00:00:00 2001
From: Henny Sipma <hennysipma@mac.com>
Date: Mon, 12 Jan 2026 15:26:26 -0800
Subject: [PATCH 6/7] CMD: command to collect constant string arguments

---
 chb/cmdline/chkx                  |  7 ++++
 chb/cmdline/commandutil.py        |  5 ++-
 chb/cmdline/jsonresultutil.py     |  5 +--
 chb/cmdline/reportcmds.py         | 54 +++++++++++++++++++++++++++++++
 chb/invariants/FnVarDictionary.py |  3 +-
 5 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx
index a511cbf3..b0cf38be 100755
--- a/chb/cmdline/chkx
+++ b/chb/cmdline/chkx
@@ -1210,6 +1210,13 @@ def parse() -> argparse.Namespace:
             + " source for callgraph path"))
     report_calls.set_defaults(func=REP.report_calls_cmd)
 
+    # -- report arguments
+    report_arguments = reportparsers.add_parser("string_arguments")
+    report_arguments.add_argument("xname", help="name of executable")
+    report_arguments.add_argument(
+        "--output", "-o", required=True, help="name of json output file")
+    report_arguments.set_defaults(func=REP.report_string_arguments)
+
     # -- report function api's
     report_functionapis = reportparsers.add_parser("function_apis")
     report_functionapis.add_argument("xname", help="name of executable")
diff --git a/chb/cmdline/commandutil.py b/chb/cmdline/commandutil.py
index 19136f83..d8dbc09c 100644
--- a/chb/cmdline/commandutil.py
+++ b/chb/cmdline/commandutil.py
@@ -1045,8 +1045,11 @@ def results_classifyfunctions(args: argparse.Namespace) -> NoReturn:
     else:
         classificationresults["functions"] = classification
 
+    jresult = JU.jsonok("none", classificationresults)
+    jresult["meta"]["app"] = JU.jsonappdata(xinfo, includepath=False)
+
     with open(outputfilename, "w") as fp:
-        json.dump(classificationresults, fp, indent=2)
+        json.dump(jresult, fp, indent=2)
 
     exit(0)
 
diff --git a/chb/cmdline/jsonresultutil.py b/chb/cmdline/jsonresultutil.py
index 0b84d8f5..02e0ca49 100644
--- a/chb/cmdline/jsonresultutil.py
+++ b/chb/cmdline/jsonresultutil.py
@@ -77,9 +77,10 @@ def jsonok(schemaname: str, content: Dict[str, Any]) -> Dict[str, Any]:
     return jresult
 
 
-def jsonappdata(xinfo: "XInfo") -> Dict[str, str]:
+def jsonappdata(xinfo: "XInfo", includepath=True) -> Dict[str, str]:
     result: Dict[str, str] = {}
-    result["path"] = xinfo.path
+    if includepath:
+        result["path"] = xinfo.path
     result["file"] = xinfo.file
     result["md5"] = xinfo.md5
     result["arch"] = xinfo.architecture
diff --git a/chb/cmdline/reportcmds.py b/chb/cmdline/reportcmds.py
index 73fd9dde..df442494 100644
--- a/chb/cmdline/reportcmds.py
+++ b/chb/cmdline/reportcmds.py
@@ -72,6 +72,7 @@
     from chb.app.AppAccess import AppAccess
     from chb.app.BasicBlock import BasicBlock
     from chb.app.Instruction import Instruction
+    from chb.invariants.XConstant import XIntConst
     from chb.mips.MIPSInstruction import MIPSInstruction
     from chb.models.BTerm import BTerm, BTermArithmetic
     from chb.models.FunctionSummary import FunctionSummary
@@ -602,6 +603,59 @@ def report_calls_cmd(args: argparse.Namespace) -> NoReturn:
         exit(1)
 
 
+def report_string_arguments(args: argparse.Namespace) -> NoReturn:
+
+    # arguments
+    xname: str = args.xname
+    outputfilename: str = args.output
+
+    try:
+        (path, xfile) = UC.get_path_filename(xname)
+        UF.check_analysis_results(path, xfile)
+    except UF.CHBError as e:
+        print(str(e.wrap()))
+        exit(1)
+
+    xinfo = XI.XInfo()
+    xinfo.load(path, xfile)
+
+    app = UC.get_app(path, xfile, xinfo)
+    fns = app.functions
+
+    argvals: Dict[str, Dict[str, Any]] = {}
+
+    for (faddr, f) in fns.items():
+        fcalls = f.call_instructions()
+        for baddr in fcalls:
+            for instr in fcalls[baddr]:
+                callee = instr.call_target.name
+                callargs = instr.call_arguments
+                for (index, callarg) in enumerate(callargs):
+                    if callarg.is_string_reference:
+                        constcallarg = cast("XprConstant", callarg).constant
+                        intcallarg = cast("XIntConst", constcallarg)
+                        argvals.setdefault(faddr, {})
+                        argvals[faddr].setdefault("call-string-args", [])
+                        argrec = {
+                            "iaddr": instr.iaddr,
+                            "callee": callee,
+                            "index": index + 1,
+                            "value": intcallarg.string_reference()
+                        }
+                        argvals[faddr]["call-string-args"].append(argrec)
+
+    result: Dict[str, Any] = {}
+    result["functions"] = argvals
+
+    jresult = JU.jsonok("none", result)
+    jresult["meta"]["app"] = JU.jsonappdata(xinfo, includepath=False)
+
+    with open(outputfilename, "w") as fp:
+        json.dump(jresult, fp, indent=2)
+
+    exit(0)
+
+
 def report_function_apis(args: argparse.Namespace) -> NoReturn:
 
     # arguments
diff --git a/chb/invariants/FnVarDictionary.py b/chb/invariants/FnVarDictionary.py
index 5a044d5a..58842ffb 100644
--- a/chb/invariants/FnVarDictionary.py
+++ b/chb/invariants/FnVarDictionary.py
@@ -43,6 +43,7 @@
 
 import chb.util.fileutil as UF
 import chb.util.IndexedTable as IT
+from chb.util.loggingutil import chklogger
 
 if TYPE_CHECKING:
     from chb.api.InterfaceDictionary import InterfaceDictionary
@@ -201,4 +202,4 @@ def initialize(self, xnode: ET.Element) -> None:
                 t.reset()
                 t.read_xml(xtable, "n")
             else:
-                raise UF.CHBError("Var dictionary table " + t.name + " not found")
+                chklogger.logger.error("Var dictionary table %s not found", t.name)

From f867714a5bf85f55005956e38ba5a00be48941ba Mon Sep 17 00:00:00 2001
From: Henny Sipma <hennysipma@mac.com>
Date: Thu, 22 Jan 2026 13:53:13 -0800
Subject: [PATCH 7/7] XPR: handle no-offset separately

---
 chb/app/CHVersion.py       | 2 +-
 chb/invariants/XXprUtil.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/chb/app/CHVersion.py b/chb/app/CHVersion.py
index 34978db2..ad754b54 100644
--- a/chb/app/CHVersion.py
+++ b/chb/app/CHVersion.py
@@ -1 +1 @@
-chbversion: str = "0.3.0-20260105"
+chbversion: str = "0.3.0-20260122"
diff --git a/chb/invariants/XXprUtil.py b/chb/invariants/XXprUtil.py
index a204b473..855efea8 100644
--- a/chb/invariants/XXprUtil.py
+++ b/chb/invariants/XXprUtil.py
@@ -449,6 +449,8 @@ def memory_variable_to_lval_expression(
                 offset = cast("VMemoryOffsetFieldOffset", offset)
                 astoffset: AST.ASTOffset = field_offset_to_ast_offset(
                     offset, xdata, iaddr, astree, anonymous=anonymous)
+            elif offset.is_no_offset:
+                astoffset = nooffset
             elif offset.is_array_index_offset:
                 offset = cast("VMemoryOffsetArrayIndexOffset", offset)
                 astoffset = array_offset_to_ast_offset(
@@ -460,6 +462,11 @@ def memory_variable_to_lval_expression(
             return astree.mk_memref_expr(
                 astbase, offset=astoffset, anonymous=anonymous)
 
+        elif offset.is_no_offset:
+            astlval = xvariable_to_ast_def_lval_expression(
+                base.basevar, xdata, iaddr, astree, anonymous=anonymous)
+            return astree.mk_memref_expr(astlval, anonymous=anonymous)
+
         elif (
                 offset.is_field_offset
                 or offset.is_array_index_offset
@@ -1952,6 +1959,8 @@ def basevar_variable_to_ast_lval(
         offset = cast("VMemoryOffsetArrayIndexOffset", offset)
         astoffset = array_offset_to_ast_offset(
             offset, xdata, iaddr, astree, anonymous=anonymous)
+    elif offset.is_no_offset:
+        astoffset = nooffset
     elif offset.is_constant_value_offset:
         astoffset = astree.mk_scalar_index_offset(offset.offsetvalue())
     else: