PolicyEngine · baogorek · Jan 26, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml
@@ -65,8 +65,7 @@ jobs:
         run: |
           modal run modal_app/data_build.py \
             ${{ inputs.upload_data && '--upload' || '--no-upload' }} \
-            --branch=${{ github.head_ref || github.ref_name }} \
-            ${{ inputs.upload_data && '--no-test-lite' || '--test-lite' }}
+            --branch=${{ github.head_ref || github.ref_name }}
 
       - name: Install package
         run: uv sync --dev

diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml
@@ -23,8 +23,12 @@ jobs:
             uses: actions/setup-python@v5
             with:
               python-version: 3.12
+          - name: Install uv
+            uses: astral-sh/setup-uv@v5
           - name: Build changelog
             run: pip install yaml-changelog && make changelog
+          - name: Update lockfile
+            run: uv lock
           - name: Preview changelog update
             run: ".github/get-changelog-diff.sh"
           - name: Update changelog

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: all format test install download upload docker documentation data data-local-area publish-local-area clean build paper clean-paper presentations
+.PHONY: all format test install download upload docker documentation data publish-local-area clean build paper clean-paper presentations
 
 all: data test
 
@@ -71,13 +71,6 @@ data: download
 	python policyengine_us_data/datasets/cps/extended_cps.py
 	python policyengine_us_data/datasets/cps/enhanced_cps.py
 	python policyengine_us_data/datasets/cps/small_enhanced_cps.py
-	mv policyengine_us_data/storage/enhanced_cps_2024.h5 policyengine_us_data/storage/dense_enhanced_cps_2024.h5
-	cp policyengine_us_data/storage/sparse_enhanced_cps_2024.h5 policyengine_us_data/storage/enhanced_cps_2024.h5
-
-data-local-area: data
-	LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/cps/cps.py
-	LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/puf/puf.py
-	LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/cps/extended_cps.py
 	python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py 10500
 
 publish-local-area:

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,13 @@
+- bump: minor
+  changes:
+    added:
+    - Support for health_insurance_premiums_without_medicare_part_b in local area calibration
+    changed:
+    - Removed dense reweighting path from enhanced CPS; only sparse (L0) weights are produced
+    - Eliminated TEST_LITE and LOCAL_AREA_CALIBRATION flags; all datasets generated unconditionally
+    - Merged data-local-area Makefile target into data target
+    removed:
+    - Redundant test_sparse_matrix_builder.py (tests consolidated in test_matrix_national_variation.py)
+    - Redundant build_calibration_matrix.py (functionality in fit_calibration_weights.py)
+    fixed:
+    - Versioning workflow now runs uv lock after version bump to keep uv.lock in sync
diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb
@@ -459,10 +459,10 @@
     "print(\"Remember, this is a North Carolina target:\\n\")\n",
     "print(targets_df.iloc[row_loc])\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nNC State target. Household donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['3702']])  # Household donated to NC's 2nd district\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nSame target, same household, donated to AK's at Large district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['201']])  # Household donated to AK's at Large District"
    ]
   },

diff --git a/modal_app/README.md b/modal_app/README.md
@@ -0,0 +1,62 @@
+# Modal App for GPU Weight Fitting
+
+Run calibration weight fitting on Modal's cloud GPUs.
+
+## Prerequisites
+
+- [Modal](https://modal.com/) account and CLI installed (`pip install modal`)
+- `modal token new` to authenticate
+- HuggingFace token stored as Modal secret named `huggingface-token`
+
+## Usage
+
+```bash
+modal run modal_app/remote_calibration_runner.py --branch <branch> --epochs <n> --gpu <type>
+```
+
+### Arguments
+
+| Argument | Default | Description |
+|----------|---------|-------------|
+| `--branch` | `main` | Git branch to clone and run |
+| `--epochs` | `200` | Number of training epochs |
+| `--gpu` | `T4` | GPU type: `T4`, `A10`, `A100-40GB`, `A100-80GB`, `H100` |
+| `--output` | `calibration_weights.npy` | Local path for weights file |
+| `--log-output` | `calibration_log.csv` | Local path for calibration log |
+
+### Example
+
+```bash
+modal run modal_app/remote_calibration_runner.py --branch health-insurance-premiums --epochs 100 --gpu T4
+```
+
+## Output Files
+
+- **calibration_weights.npy** - Fitted household weights
+- **calibration_log.csv** - Per-target performance metrics across epochs (target_name, estimate, target, epoch, error, rel_error, abs_error, rel_abs_error, loss)
+
+## Changing Hyperparameters
+
+Hyperparameters are in `policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py`:
+
+```python
+BETA = 0.35
+GAMMA = -0.1
+ZETA = 1.1
+INIT_KEEP_PROB = 0.999
+LOG_WEIGHT_JITTER_SD = 0.05
+LOG_ALPHA_JITTER_SD = 0.01
+LAMBDA_L0 = 1e-8
+LAMBDA_L2 = 1e-8
+LEARNING_RATE = 0.15
+```
+
+To change them:
+1. Edit `fit_calibration_weights.py`
+2. Commit and push to your branch
+3. Re-run the Modal command with that branch
+
+## Important Notes
+
+- **Keep your connection open** - Modal needs to stay connected to download results. Don't close your laptop or let it sleep until you see the local "Weights saved to:" and "Calibration log saved to:" messages.
+- Modal clones from GitHub, so local changes must be pushed before they take effect.
diff --git a/modal_app/data_build.py b/modal_app/data_build.py
@@ -38,7 +38,6 @@ def setup_gcp_credentials():
 def build_datasets(
     upload: bool = False,
     branch: str = "main",
-    test_lite: bool = False,
 ):
     setup_gcp_credentials()
 
@@ -49,8 +48,6 @@ def build_datasets(
     subprocess.run(["uv", "sync", "--locked"], check=True)
 
     env = os.environ.copy()
-    if test_lite:
-        env["TEST_LITE"] = "true"
 
     # Download prerequisites
     subprocess.run(
@@ -79,44 +76,8 @@ def build_datasets(
         print(f"Running {script}...")
         subprocess.run(["uv", "run", "python", script], check=True, env=env)
 
-    os.rename(
-        "policyengine_us_data/storage/enhanced_cps_2024.h5",
-        "policyengine_us_data/storage/dense_enhanced_cps_2024.h5",
-    )
-    subprocess.run(
-        [
-            "cp",
-            "policyengine_us_data/storage/sparse_enhanced_cps_2024.h5",
-            "policyengine_us_data/storage/enhanced_cps_2024.h5",
-        ],
-        check=True,
-    )
-
-    # Build local area calibration datasets (without TEST_LITE - must match full dataset)
-    print("Building local area calibration datasets...")
-    local_area_env = os.environ.copy()
-    local_area_env["LOCAL_AREA_CALIBRATION"] = "true"
-
-    subprocess.run(
-        ["uv", "run", "python", "policyengine_us_data/datasets/cps/cps.py"],
-        check=True,
-        env=local_area_env,
-    )
-    subprocess.run(
-        ["uv", "run", "python", "policyengine_us_data/datasets/puf/puf.py"],
-        check=True,
-        env=local_area_env,
-    )
-    subprocess.run(
-        [
-            "uv",
-            "run",
-            "python",
-            "policyengine_us_data/datasets/cps/extended_cps.py",
-        ],
-        check=True,
-        env=local_area_env,
-    )
+    # Build stratified CPS for local area calibration
+    print("Running create_stratified_cps.py...")
     subprocess.run(
         [
             "uv",
@@ -126,7 +87,7 @@ def build_datasets(
             "10500",
         ],
         check=True,
-        env=local_area_env,
+        env=env,
     )
 
     # Run local area calibration tests
@@ -140,7 +101,7 @@ def build_datasets(
             "-v",
         ],
         check=True,
-        env=local_area_env,
+        env=env,
     )
 
     # Run main test suite
@@ -167,11 +128,9 @@ def build_datasets(
 def main(
     upload: bool = False,
     branch: str = "main",
-    test_lite: bool = False,
 ):
     result = build_datasets.remote(
         upload=upload,
         branch=branch,
-        test_lite=test_lite,
     )
     print(result)