diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml
index 95d55281..4575a508 100644
--- a/.github/workflows/reusable_test.yaml
+++ b/.github/workflows/reusable_test.yaml
@@ -65,8 +65,7 @@ jobs:
         run: |
           modal run modal_app/data_build.py \
             ${{ inputs.upload_data && '--upload' || '--no-upload' }} \
-            --branch=${{ github.head_ref || github.ref_name }} \
-            ${{ inputs.upload_data && '--no-test-lite' || '--test-lite' }}
+            --branch=${{ github.head_ref || github.ref_name }}
 
       - name: Install package
         run: uv sync --dev
diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml
index 84667308..48658dbc 100644
--- a/.github/workflows/versioning.yaml
+++ b/.github/workflows/versioning.yaml
@@ -23,8 +23,12 @@ jobs:
             uses: actions/setup-python@v5
             with:
               python-version: 3.12
+          - name: Install uv
+            uses: astral-sh/setup-uv@v5
           - name: Build changelog
             run: pip install yaml-changelog && make changelog
+          - name: Update lockfile
+            run: uv lock
           - name: Preview changelog update
             run: ".github/get-changelog-diff.sh"
           - name: Update changelog
diff --git a/Makefile b/Makefile
index 4d5f5bef..fd212a08 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: all format test install download upload docker documentation data data-local-area publish-local-area clean build paper clean-paper presentations
+.PHONY: all format test install download upload docker documentation data publish-local-area clean build paper clean-paper presentations
 
 all: data test
 
@@ -71,13 +71,6 @@ data: download
 	python policyengine_us_data/datasets/cps/extended_cps.py
 	python policyengine_us_data/datasets/cps/enhanced_cps.py
 	python policyengine_us_data/datasets/cps/small_enhanced_cps.py
-	mv policyengine_us_data/storage/enhanced_cps_2024.h5 policyengine_us_data/storage/dense_enhanced_cps_2024.h5
-	cp policyengine_us_data/storage/sparse_enhanced_cps_2024.h5 policyengine_us_data/storage/enhanced_cps_2024.h5
-
-data-local-area: data
-	LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/cps/cps.py
-	LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/puf/puf.py
-	LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/cps/extended_cps.py
 	python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py 10500
 
 publish-local-area:
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..193004c8 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,13 @@
+- bump: minor
+  changes:
+    added:
+    - Support for health_insurance_premiums_without_medicare_part_b in local area calibration
+    changed:
+    - Removed dense reweighting path from enhanced CPS; only sparse (L0) weights are produced
+    - Eliminated TEST_LITE and LOCAL_AREA_CALIBRATION flags; all datasets generated unconditionally
+    - Merged data-local-area Makefile target into data target
+    removed:
+    - Redundant test_sparse_matrix_builder.py (tests consolidated in test_matrix_national_variation.py)
+    - Redundant build_calibration_matrix.py (functionality in fit_calibration_weights.py)
+    fixed:
+    - Versioning workflow now runs uv lock after version bump to keep uv.lock in sync
diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb
index cdd1cc97..9060a3df 100644
--- a/docs/local_area_calibration_setup.ipynb
+++ b/docs/local_area_calibration_setup.ipynb
@@ -459,10 +459,10 @@
     "print(\"Remember, this is a North Carolina target:\\n\")\n",
     "print(targets_df.iloc[row_loc])\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nNC State target. Household donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['3702']])  # Household donated to NC's 2nd district\n",
     "\n",
-    "print(\"\\nHousehold donated to NC's 2nd district, 2023 SNAP dollars:\")\n",
+    "print(\"\\nSame target, same household, donated to AK's at Large district, 2023 SNAP dollars:\")\n",
     "print(X_sparse[row_loc, positions['201']])  # Household donated to AK's at Large District"
    ]
   },
diff --git a/modal_app/README.md b/modal_app/README.md
new file mode 100644
index 00000000..0b10cf72
--- /dev/null
+++ b/modal_app/README.md
@@ -0,0 +1,62 @@
+# Modal App for GPU Weight Fitting
+
+Run calibration weight fitting on Modal's cloud GPUs.
+
+## Prerequisites
+
+- [Modal](https://modal.com/) account and CLI installed (`pip install modal`)
+- `modal token new` to authenticate
+- HuggingFace token stored as Modal secret named `huggingface-token`
+
+## Usage
+
+```bash
+modal run modal_app/remote_calibration_runner.py --branch <branch> --epochs <n> --gpu <type>
+```
+
+### Arguments
+
+| Argument | Default | Description |
+|----------|---------|-------------|
+| `--branch` | `main` | Git branch to clone and run |
+| `--epochs` | `200` | Number of training epochs |
+| `--gpu` | `T4` | GPU type: `T4`, `A10`, `A100-40GB`, `A100-80GB`, `H100` |
+| `--output` | `calibration_weights.npy` | Local path for weights file |
+| `--log-output` | `calibration_log.csv` | Local path for calibration log |
+
+### Example
+
+```bash
+modal run modal_app/remote_calibration_runner.py --branch health-insurance-premiums --epochs 100 --gpu T4
+```
+
+## Output Files
+
+- **calibration_weights.npy** - Fitted household weights
+- **calibration_log.csv** - Per-target performance metrics across epochs (target_name, estimate, target, epoch, error, rel_error, abs_error, rel_abs_error, loss)
+
+## Changing Hyperparameters
+
+Hyperparameters are in `policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py`:
+
+```python
+BETA = 0.35
+GAMMA = -0.1
+ZETA = 1.1
+INIT_KEEP_PROB = 0.999
+LOG_WEIGHT_JITTER_SD = 0.05
+LOG_ALPHA_JITTER_SD = 0.01
+LAMBDA_L0 = 1e-8
+LAMBDA_L2 = 1e-8
+LEARNING_RATE = 0.15
+```
+
+To change them:
+1. Edit `fit_calibration_weights.py`
+2. Commit and push to your branch
+3. Re-run the Modal command with that branch
+
+## Important Notes
+
+- **Keep your connection open** - Modal needs to stay connected to download results. Don't close your laptop or let it sleep until you see the local "Weights saved to:" and "Calibration log saved to:" messages.
+- Modal clones from GitHub, so local changes must be pushed before they take effect.
diff --git a/modal_app/data_build.py b/modal_app/data_build.py
index 52803568..f56b96a7 100644
--- a/modal_app/data_build.py
+++ b/modal_app/data_build.py
@@ -38,7 +38,6 @@ def setup_gcp_credentials():
 def build_datasets(
     upload: bool = False,
     branch: str = "main",
-    test_lite: bool = False,
 ):
     setup_gcp_credentials()
 
@@ -49,8 +48,6 @@ def build_datasets(
     subprocess.run(["uv", "sync", "--locked"], check=True)
 
     env = os.environ.copy()
-    if test_lite:
-        env["TEST_LITE"] = "true"
 
     # Download prerequisites
     subprocess.run(
@@ -79,44 +76,8 @@ def build_datasets(
         print(f"Running {script}...")
         subprocess.run(["uv", "run", "python", script], check=True, env=env)
 
-    os.rename(
-        "policyengine_us_data/storage/enhanced_cps_2024.h5",
-        "policyengine_us_data/storage/dense_enhanced_cps_2024.h5",
-    )
-    subprocess.run(
-        [
-            "cp",
-            "policyengine_us_data/storage/sparse_enhanced_cps_2024.h5",
-            "policyengine_us_data/storage/enhanced_cps_2024.h5",
-        ],
-        check=True,
-    )
-
-    # Build local area calibration datasets (without TEST_LITE - must match full dataset)
-    print("Building local area calibration datasets...")
-    local_area_env = os.environ.copy()
-    local_area_env["LOCAL_AREA_CALIBRATION"] = "true"
-
-    subprocess.run(
-        ["uv", "run", "python", "policyengine_us_data/datasets/cps/cps.py"],
-        check=True,
-        env=local_area_env,
-    )
-    subprocess.run(
-        ["uv", "run", "python", "policyengine_us_data/datasets/puf/puf.py"],
-        check=True,
-        env=local_area_env,
-    )
-    subprocess.run(
-        [
-            "uv",
-            "run",
-            "python",
-            "policyengine_us_data/datasets/cps/extended_cps.py",
-        ],
-        check=True,
-        env=local_area_env,
-    )
+    # Build stratified CPS for local area calibration
+    print("Running create_stratified_cps.py...")
     subprocess.run(
         [
             "uv",
@@ -126,7 +87,7 @@ def build_datasets(
             "10500",
         ],
         check=True,
-        env=local_area_env,
+        env=env,
     )
 
     # Run local area calibration tests
@@ -140,7 +101,7 @@ def build_datasets(
             "-v",
         ],
         check=True,
-        env=local_area_env,
+        env=env,
     )
 
     # Run main test suite
@@ -167,11 +128,9 @@ def build_datasets(
 def main(
     upload: bool = False,
     branch: str = "main",
-    test_lite: bool = False,
 ):
     result = build_datasets.remote(
         upload=upload,
         branch=branch,
-        test_lite=test_lite,
     )
     print(result)
diff --git a/modal_app/remote_calibration_runner.py b/modal_app/remote_calibration_runner.py
new file mode 100644
index 00000000..43e35445
--- /dev/null
+++ b/modal_app/remote_calibration_runner.py
@@ -0,0 +1,197 @@
+import os
+import subprocess
+import modal
+
+app = modal.App("policyengine-us-data-fit-weights")
+
+hf_secret = modal.Secret.from_name("huggingface-token")
+
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .apt_install("git")
+    .pip_install("uv")
+)
+
+REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git"
+
+
+def _fit_weights_impl(branch: str, epochs: int) -> dict:
+    """Shared implementation for weight fitting."""
+    os.chdir("/root")
+    subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True)
+    os.chdir("policyengine-us-data")
+
+    subprocess.run(["uv", "sync", "--extra", "l0"], check=True)
+
+    print("Downloading calibration inputs from HuggingFace...")
+    download_result = subprocess.run(
+        [
+            "uv",
+            "run",
+            "python",
+            "-c",
+            "from policyengine_us_data.utils.huggingface import "
+            "download_calibration_inputs; "
+            "paths = download_calibration_inputs('/root/calibration_data'); "
+            "print(f\"DB: {paths['database']}\"); "
+            "print(f\"DATASET: {paths['dataset']}\")",
+        ],
+        capture_output=True,
+        text=True,
+        env=os.environ.copy(),
+    )
+    print(download_result.stdout)
+    if download_result.stderr:
+        print("Download STDERR:", download_result.stderr)
+    if download_result.returncode != 0:
+        raise RuntimeError(f"Download failed: {download_result.returncode}")
+
+    db_path = dataset_path = None
+    for line in download_result.stdout.split("\n"):
+        if line.startswith("DB:"):
+            db_path = line.split("DB:")[1].strip()
+        elif line.startswith("DATASET:"):
+            dataset_path = line.split("DATASET:")[1].strip()
+
+    script_path = (
+        "policyengine_us_data/datasets/cps/"
+        "local_area_calibration/fit_calibration_weights.py"
+    )
+    result = subprocess.run(
+        [
+            "uv",
+            "run",
+            "python",
+            script_path,
+            "--device",
+            "cuda",
+            "--epochs",
+            str(epochs),
+            "--db-path",
+            db_path,
+            "--dataset-path",
+            dataset_path,
+        ],
+        capture_output=True,
+        text=True,
+        env=os.environ.copy(),
+    )
+    print(result.stdout)
+    if result.stderr:
+        print("STDERR:", result.stderr)
+    if result.returncode != 0:
+        raise RuntimeError(f"Script failed with code {result.returncode}")
+
+    output_path = None
+    log_path = None
+    for line in result.stdout.split("\n"):
+        if "OUTPUT_PATH:" in line:
+            output_path = line.split("OUTPUT_PATH:")[1].strip()
+        elif "LOG_PATH:" in line:
+            log_path = line.split("LOG_PATH:")[1].strip()
+
+    with open(output_path, "rb") as f:
+        weights_bytes = f.read()
+
+    log_bytes = None
+    if log_path:
+        with open(log_path, "rb") as f:
+            log_bytes = f.read()
+
+    return {"weights": weights_bytes, "log": log_bytes}
+
+
+@app.function(
+    image=image,
+    secrets=[hf_secret],
+    memory=32768,
+    cpu=4.0,
+    gpu="T4",
+    timeout=14400,
+)
+def fit_weights_t4(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image,
+    secrets=[hf_secret],
+    memory=32768,
+    cpu=4.0,
+    gpu="A10",
+    timeout=14400,
+)
+def fit_weights_a10(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image,
+    secrets=[hf_secret],
+    memory=32768,
+    cpu=4.0,
+    gpu="A100-40GB",
+    timeout=14400,
+)
+def fit_weights_a100_40(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image,
+    secrets=[hf_secret],
+    memory=32768,
+    cpu=4.0,
+    gpu="A100-80GB",
+    timeout=14400,
+)
+def fit_weights_a100_80(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+@app.function(
+    image=image,
+    secrets=[hf_secret],
+    memory=32768,
+    cpu=4.0,
+    gpu="H100",
+    timeout=14400,
+)
+def fit_weights_h100(branch: str = "main", epochs: int = 200) -> dict:
+    return _fit_weights_impl(branch, epochs)
+
+
+GPU_FUNCTIONS = {
+    "T4": fit_weights_t4,
+    "A10": fit_weights_a10,
+    "A100-40GB": fit_weights_a100_40,
+    "A100-80GB": fit_weights_a100_80,
+    "H100": fit_weights_h100,
+}
+
+
+@app.local_entrypoint()
+def main(
+    branch: str = "main",
+    epochs: int = 200,
+    gpu: str = "T4",
+    output: str = "calibration_weights.npy",
+    log_output: str = "calibration_log.csv",
+):
+    if gpu not in GPU_FUNCTIONS:
+        raise ValueError(
+            f"Unknown GPU: {gpu}. Choose from: {list(GPU_FUNCTIONS.keys())}"
+        )
+
+    print(f"Running with GPU: {gpu}, epochs: {epochs}, branch: {branch}")
+    func = GPU_FUNCTIONS[gpu]
+    result = func.remote(branch=branch, epochs=epochs)
+
+    with open(output, "wb") as f:
+        f.write(result["weights"])
+    print(f"Weights saved to: {output}")
+
+    if result["log"]:
+        with open(log_output, "wb") as f:
+            f.write(result["log"])
+        print(f"Calibration log saved to: {log_output}")
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 27a41bec..249e40e5 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -15,9 +15,6 @@
 from microimpute.models.qrf import QRF
 import logging
 
-test_lite = os.environ.get("TEST_LITE") == "true"
-print(f"TEST_LITE == {test_lite}")
-
 
 class CPS(Dataset):
     name = "cps"
@@ -2141,21 +2138,13 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     url = "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"
 
 
-local_area_calibration = os.environ.get("LOCAL_AREA_CALIBRATION") == "true"
-
 if __name__ == "__main__":
-    if test_lite:
-        CPS_2024().generate()
-        CPS_2025().generate()
-    elif local_area_calibration:
-        CPS_2023_Full().generate()
-    else:
-        CPS_2021().generate()
-        CPS_2022().generate()
-        CPS_2023().generate()
-        CPS_2024().generate()
-        CPS_2025().generate()
-        CPS_2021_Full().generate()
-        CPS_2022_Full().generate()
-        CPS_2023_Full().generate()
-        Pooled_3_Year_CPS_2023().generate()
+    CPS_2021().generate()
+    CPS_2022().generate()
+    CPS_2023().generate()
+    CPS_2024().generate()
+    CPS_2025().generate()
+    CPS_2021_Full().generate()
+    CPS_2022_Full().generate()
+    CPS_2023_Full().generate()
+    Pooled_3_Year_CPS_2023().generate()
diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index 4eb0a660..dc8f5040 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -18,8 +18,6 @@
     CPS_2019,
     CPS_2024,
 )
-import os
-from pathlib import Path
 import logging
 
 try:
@@ -32,7 +30,6 @@ def reweight(
     original_weights,
     loss_matrix,
     targets_array,
-    dropout_rate=0.05,
     log_path="calibration_log.csv",
     epochs=500,
     l0_lambda=2.6445e-07,
@@ -40,7 +37,6 @@ def reweight(
     temperature=0.25,
     seed=1456,
 ):
-    set_seeds(seed)
     target_names = np.array(loss_matrix.columns)
     is_national = loss_matrix.columns.str.startswith("nation/")
     loss_matrix = torch.tensor(loss_matrix.values, dtype=torch.float32)
@@ -53,14 +49,10 @@ def reweight(
         normalisation_factor, dtype=torch.float32
     )
     targets_array = torch.tensor(targets_array, dtype=torch.float32)
-    weights = torch.tensor(
-        np.log(original_weights), requires_grad=True, dtype=torch.float32
-    )
 
     inv_mean_normalisation = 1 / np.mean(normalisation_factor.numpy())
 
     def loss(weights):
-        # Check for Nans in either the weights or the loss matrix
         if torch.isnan(weights).any():
             raise ValueError("Weights contain NaNs")
         if torch.isnan(loss_matrix).any():
@@ -78,75 +70,11 @@ def loss(weights):
             raise ValueError("Relative error contains NaNs")
         return rel_error_normalized.mean()
 
-    def dropout_weights(weights, p):
-        if p == 0:
-            return weights
-        # Replace p% of the weights with the mean value of the rest of them
-        mask = torch.rand_like(weights) < p
-        mean = weights[~mask].mean()
-        masked_weights = weights.clone()
-        masked_weights[mask] = mean
-        return masked_weights
-
-    # Original (Dense) path ---
-    optimizer = torch.optim.Adam([weights], lr=3e-1)
-    start_loss = None
-
-    iterator = trange(epochs)
-    performance = pd.DataFrame()
-    for i in iterator:
-        optimizer.zero_grad()
-        weights_ = dropout_weights(weights, dropout_rate)
-        l = loss(torch.exp(weights_))
-        if (log_path is not None) and (i % 10 == 0):
-            estimates = torch.exp(weights) @ loss_matrix
-            estimates = estimates.detach().numpy()
-            df = pd.DataFrame(
-                {
-                    "target_name": target_names,
-                    "estimate": estimates,
-                    "target": targets_array.detach().numpy(),
-                }
-            )
-            df["epoch"] = i
-            df["error"] = df.estimate - df.target
-            df["rel_error"] = df.error / df.target
-            df["abs_error"] = df.error.abs()
-            df["rel_abs_error"] = df.rel_error.abs()
-            df["loss"] = df.rel_abs_error**2
-            performance = pd.concat([performance, df], ignore_index=True)
-
-        if (log_path is not None) and (i % 1000 == 0):
-            performance.to_csv(log_path, index=False)
-        if start_loss is None:
-            start_loss = l.item()
-        loss_rel_change = (l.item() - start_loss) / start_loss
-        l.backward()
-        iterator.set_postfix(
-            {"loss": l.item(), "loss_rel_change": loss_rel_change}
-        )
-        optimizer.step()
-        if log_path is not None:
-            performance.to_csv(log_path, index=False)
-
-    final_weights_dense = torch.exp(weights).detach().numpy()
-
-    optimised_weights = final_weights_dense
-    print_reweighting_diagnostics(
-        final_weights_dense,
-        loss_matrix,
-        targets_array,
-        "Dense Solutions",
-    )
-
-    # New (Sparse) path depending on temperature, init_mean, l0_lambda -----
     logging.info(
         f"Sparse optimization using seed {seed}, temp {temperature} "
         + f"init_mean {init_mean}, l0_lambda {l0_lambda}"
     )
     set_seeds(seed)
-    p = Path(log_path)
-    log_path_sparse = p.with_name(f"{p.stem}_sparse{p.suffix}")
 
     weights = torch.tensor(
         np.log(original_weights), requires_grad=True, dtype=torch.float32
@@ -166,7 +94,7 @@ def dropout_weights(weights, p):
         masked = torch.exp(weights) * gates()
         l_main = loss(masked)
         l = l_main + l0_lambda * gates.get_penalty()
-        if (log_path_sparse is not None) and (i % 10 == 0):
+        if (log_path is not None) and (i % 10 == 0):
             gates.eval()
             estimates = (torch.exp(weights) * gates()) @ loss_matrix
             gates.train()
@@ -186,8 +114,8 @@ def dropout_weights(weights, p):
             df["loss"] = df.rel_abs_error**2
             performance = pd.concat([performance, df], ignore_index=True)
 
-        if (log_path_sparse is not None) and (i % 1000 == 0):
-            performance.to_csv(log_path_sparse, index=False)
+        if (log_path is not None) and (i % 1000 == 0):
+            performance.to_csv(log_path, index=False)
         if start_loss is None:
             start_loss = l.item()
         loss_rel_change = (l.item() - start_loss) / start_loss
@@ -196,8 +124,8 @@ def dropout_weights(weights, p):
             {"loss": l.item(), "loss_rel_change": loss_rel_change}
         )
         optimizer.step()
-        if log_path_sparse is not None:
-            performance.to_csv(log_path_sparse, index=False)
+        if log_path is not None:
+            performance.to_csv(log_path, index=False)
 
     gates.eval()
     final_weights_sparse = (torch.exp(weights) * gates()).detach().numpy()
@@ -209,7 +137,7 @@ def dropout_weights(weights, p):
         "L0 Sparse Solution",
     )
 
-    return final_weights_dense, final_weights_sparse
+    return final_weights_sparse
 
 
 def train_previous_year_income_model():
@@ -268,7 +196,6 @@ def generate(self):
         sim = Microsimulation(dataset=self.input_dataset)
         data = sim.dataset.load_dataset()
         data["household_weight"] = {}
-        data["household_sparse_weight"] = {}
         original_weights = sim.calculate("household_weight")
         original_weights = original_weights.values + np.random.normal(
             1, 0.1, len(original_weights)
@@ -309,7 +236,7 @@ def generate(self):
             targets_array_clean = targets_array[keep_idx]
             assert loss_matrix_clean.shape[1] == targets_array_clean.size
 
-            optimised_weights_dense, optimised_weights_sparse = reweight(
+            optimised_weights = reweight(
                 original_weights,
                 loss_matrix_clean,
                 targets_array_clean,
@@ -317,8 +244,7 @@ def generate(self):
                 epochs=200,
                 seed=1456,
             )
-            data["household_weight"][year] = optimised_weights_dense
-            data["household_sparse_weight"][year] = optimised_weights_sparse
+            data["household_weight"][year] = optimised_weights
 
         self.save_dataset(data)
 
diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py
index a9bf07a4..b5b4fa24 100644
--- a/policyengine_us_data/datasets/cps/extended_cps.py
+++ b/policyengine_us_data/datasets/cps/extended_cps.py
@@ -4,7 +4,6 @@
 from policyengine_us_data.datasets.cps.cps import *
 from policyengine_us_data.datasets.puf import *
 import pandas as pd
-import os
 from microimpute.models.qrf import QRF
 import time
 import logging
@@ -340,11 +339,5 @@ class ExtendedCPS_2024(ExtendedCPS):
 
 
 if __name__ == "__main__":
-    local_area_calibration = (
-        os.environ.get("LOCAL_AREA_CALIBRATION", "").lower() == "true"
-    )
-
-    if local_area_calibration:
-        ExtendedCPS_2023().generate()
-    else:
-        ExtendedCPS_2024().generate()
+    ExtendedCPS_2023().generate()
+    ExtendedCPS_2024().generate()
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py b/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
index d9507d17..da3dffc0 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
@@ -1,15 +1,14 @@
 """
-Create a stratified sample of extended_cps_2023.h5 that preserves high-income households.
-This is needed for congressional district geo-stacking where the full dataset is too large.
+Create a stratified sample of extended_cps_2023.h5 that preserves high-income households
+while maintaining diversity in lower income strata for poverty analysis.
 
 Strategy:
-- Keep ALL households above a high income threshold (e.g., top 1%)
-- Sample progressively less from lower income strata
-- Ensure representation across all income levels
+- Keep ALL households in top 1% (for high-income tax analysis)
+- Uniform sample from the remaining 99% (preserves low-income diversity)
+- Optional: slight oversample of bottom quartile for poverty-focused analysis
 """
 
 import numpy as np
-import pandas as pd
 import h5py
 from policyengine_us import Microsimulation
 from policyengine_core.data.dataset import Dataset
@@ -21,16 +20,22 @@
 
 def create_stratified_cps_dataset(
     target_households=30_000,
-    high_income_percentile=99,  # Keep ALL households above this percentile
+    high_income_percentile=99,
+    oversample_poor=False,
+    seed=None,
     base_dataset=None,
     output_path=None,
 ):
     """
-    Create a stratified sample of CPS data preserving high-income households.
+    Create a stratified sample of CPS data preserving high-income households
+    while maintaining low-income diversity for poverty analysis.
 
     Args:
         target_households: Target number of households in output (approximate)
-        high_income_percentile: Keep ALL households above this AGI percentile
+        high_income_percentile: Keep ALL households above this AGI percentile (e.g., 99 or 99.5)
+        oversample_poor: If True, boost sampling rate for bottom 25% by 1.5x
+        seed: Random seed for reproducibility (default: None for random)
+        base_dataset: Path to source h5 file (default: extended_cps_2023.h5)
         output_path: Where to save the stratified h5 file
     """
     print("\n" + "=" * 70)
@@ -57,100 +62,120 @@ def create_stratified_cps_dataset(
     print(f"Target dataset: {target_households:,} households")
     print(f"Reduction ratio: {target_households/n_households_orig:.1%}")
 
-    # Calculate AGI percentiles
-    print("\nAnalyzing income distribution...")
-    percentiles = [0, 25, 50, 75, 90, 95, 99, 99.5, 99.9, 100]
-    agi_percentiles = np.percentile(agi, percentiles)
-
-    print("AGI Percentiles:")
-    for p, val in zip(percentiles, agi_percentiles):
-        print(f"  {p:5.1f}%: ${val:,.0f}")
+    # Show income distribution
+    print("\nAGI Percentiles (original):")
+    for p in [0, 25, 50, 75, 90, 95, 99, 99.5, 99.9, 100]:
+        val = np.percentile(agi, p)
+        print(f"  {p:5.1f}%: ${val:>12,.0f}")
 
-    # Define sampling strategy
-    # Keep ALL high earners, sample progressively less from lower strata
+    # Define strata thresholds
     high_income_threshold = np.percentile(agi, high_income_percentile)
-    print(
-        f"\nHigh-income threshold (top {100-high_income_percentile}%): ${high_income_threshold:,.0f}"
-    )
+    bottom_25_pct_threshold = np.percentile(agi, 25)
 
-    # Create strata with sampling rates
-    strata = [
-        (99.9, 100, 1.00),  # Top 0.1% - keep ALL
-        (99.5, 99.9, 1.00),  # 99.5-99.9% - keep ALL
-        (99, 99.5, 1.00),  # 99-99.5% - keep ALL
-        (95, 99, 0.80),  # 95-99% - keep 80%
-        (90, 95, 0.60),  # 90-95% - keep 60%
-        (75, 90, 0.40),  # 75-90% - keep 40%
-        (50, 75, 0.25),  # 50-75% - keep 25%
-        (25, 50, 0.15),  # 25-50% - keep 15%
-        (0, 25, 0.10),  # Bottom 25% - keep 10%
-    ]
-
-    # Adjust sampling rates to hit target
-    print("\nInitial sampling strategy:")
-    expected_count = 0
-    for low_p, high_p, rate in strata:
-        low_val = np.percentile(agi, low_p) if low_p > 0 else -np.inf
-        high_val = np.percentile(agi, high_p) if high_p < 100 else np.inf
-        in_stratum = np.sum((agi > low_val) & (agi <= high_val))
-        expected = int(in_stratum * rate)
-        expected_count += expected
-        print(
-            f"  {low_p:5.1f}-{high_p:5.1f}%: {in_stratum:6,} households x {rate:.0%} = {expected:6,}"
-        )
+    # Count households in each stratum
+    n_top = np.sum(agi >= high_income_threshold)
+    n_bottom_25 = np.sum(agi < bottom_25_pct_threshold)
+    n_middle = n_households_orig - n_top - n_bottom_25
 
-    print(f"Expected total: {expected_count:,} households")
+    print(f"\nStratum sizes:")
+    print(
+        f"  Top {100 - high_income_percentile}% (AGI >= ${high_income_threshold:,.0f}): {n_top:,}"
+    )
+    print(f"  Middle 25-{high_income_percentile}%: {n_middle:,}")
+    print(
+        f"  Bottom 25% (AGI < ${bottom_25_pct_threshold:,.0f}): {n_bottom_25:,}"
+    )
 
-    # Adjust rates if needed
-    if expected_count > target_households * 1.1:  # Allow 10% overage
-        adjustment = target_households / expected_count
-        print(
-            f"\nAdjusting rates by factor of {adjustment:.2f} to meet target..."
+    # Calculate sampling rates
+    # Keep ALL top earners, distribute remaining quota between middle and bottom
+    remaining_quota = target_households - n_top
+    if remaining_quota <= 0:
+        raise ValueError(
+            f"Target ({target_households:,}) is less than top {100-high_income_percentile}% "
+            f"count ({n_top:,}). Increase target_households."
         )
 
-        # Never reduce the top percentiles
-        strata_adjusted = []
-        for low_p, high_p, rate in strata:
-            if high_p >= 99:  # Never reduce top 1%
-                strata_adjusted.append((low_p, high_p, rate))
-            else:
-                strata_adjusted.append(
-                    (low_p, high_p, min(1.0, rate * adjustment))
-                )
-        strata = strata_adjusted
-
-    # Select households based on strata
+    if oversample_poor:
+        # Give bottom 25% a 1.5x boost relative to middle
+        r_middle = remaining_quota / (1.5 * n_bottom_25 + n_middle)
+        r_bottom = 1.5 * r_middle
+        r_middle = min(1.0, r_middle)
+        r_bottom = min(1.0, r_bottom)
+    else:
+        # Uniform sampling for the rest
+        r_middle = remaining_quota / (n_bottom_25 + n_middle)
+        r_bottom = r_middle
+        r_middle = min(1.0, r_middle)
+        r_bottom = min(1.0, r_bottom)
+
+    print(f"\nSampling rates:")
+    print(f"  Top {100 - high_income_percentile}%: 100%")
+    print(f"  Middle 25-{high_income_percentile}%: {r_middle:.1%}")
+    print(f"  Bottom 25%: {r_bottom:.1%}")
+
+    # Expected counts
+    expected_top = n_top
+    expected_middle = int(n_middle * r_middle)
+    expected_bottom = int(n_bottom_25 * r_bottom)
+    expected_total = expected_top + expected_middle + expected_bottom
+
+    print(f"\nExpected selection:")
+    print(f"  Top {100 - high_income_percentile}%: {expected_top:,}")
+    print(f"  Middle 25-{high_income_percentile}%: {expected_middle:,}")
+    print(f"  Bottom 25%: {expected_bottom:,}")
+    print(f"  Total: {expected_total:,}")
+
+    # Select households
     print("\nSelecting households...")
+    if seed is not None:
+        np.random.seed(seed)
+        print(f"  Using random seed: {seed}")
     selected_mask = np.zeros(n_households_orig, dtype=bool)
 
-    for low_p, high_p, rate in strata:
-        low_val = np.percentile(agi, low_p) if low_p > 0 else -np.inf
-        high_val = np.percentile(agi, high_p) if high_p < 100 else np.inf
-
-        in_stratum = (agi > low_val) & (agi <= high_val)
-        stratum_indices = np.where(in_stratum)[0]
-        n_in_stratum = len(stratum_indices)
-
-        if rate >= 1.0:
-            # Keep all
-            selected_mask[stratum_indices] = True
-            n_selected = n_in_stratum
-        else:
-            # Random sample within stratum
-            n_to_select = int(n_in_stratum * rate)
-            if n_to_select > 0:
-                np.random.seed(42)  # For reproducibility
-                selected_indices = np.random.choice(
-                    stratum_indices, n_to_select, replace=False
-                )
-                selected_mask[selected_indices] = True
-                n_selected = n_to_select
-            else:
-                n_selected = 0
+    # Top earners - keep all
+    top_mask = agi >= high_income_threshold
+    selected_mask[top_mask] = True
+    print(
+        f"  Top {100 - high_income_percentile}%: selected {np.sum(top_mask):,}"
+    )
 
+    # Bottom 25%
+    bottom_mask = agi < bottom_25_pct_threshold
+    bottom_indices = np.where(bottom_mask)[0]
+    n_select_bottom = int(len(bottom_indices) * r_bottom)
+    if r_bottom >= 1.0:
+        selected_mask[bottom_indices] = True
+    elif n_select_bottom > 0:
+        selected_bottom = np.random.choice(
+            bottom_indices, n_select_bottom, replace=False
+        )
+        selected_mask[selected_bottom] = True
+    else:
         print(
-            f"  {low_p:5.1f}-{high_p:5.1f}%: Selected {n_selected:6,} / {n_in_stratum:6,} ({n_selected/max(1,n_in_stratum):.0%})"
+            f"  WARNING: Bottom 25% selection rounded to 0 (rate={r_bottom:.4f}, n={len(bottom_indices)})"
+        )
+    print(
+        f"  Bottom 25%: selected {np.sum(selected_mask & bottom_mask):,} / {len(bottom_indices):,}"
+    )
+
+    # Middle
+    middle_mask = ~top_mask & ~bottom_mask
+    middle_indices = np.where(middle_mask)[0]
+    n_select_middle = int(len(middle_indices) * r_middle)
+    if r_middle >= 1.0:
+        selected_mask[middle_indices] = True
+    elif n_select_middle > 0:
+        selected_middle = np.random.choice(
+            middle_indices, n_select_middle, replace=False
         )
+        selected_mask[selected_middle] = True
+    else:
+        print(
+            f"  WARNING: Middle selection rounded to 0 (rate={r_middle:.4f}, n={len(middle_indices)})"
+        )
+    print(
+        f"  Middle 25-{high_income_percentile}%: selected {np.sum(selected_mask & middle_mask):,} / {len(middle_indices):,}"
+    )
 
     n_selected = np.sum(selected_mask)
     print(
@@ -158,13 +183,8 @@ def create_stratified_cps_dataset(
     )
 
     # Verify high earners are preserved
-    high_earners_mask = agi >= high_income_threshold
-    n_high_earners = np.sum(high_earners_mask)
-    n_high_earners_selected = np.sum(selected_mask & high_earners_mask)
-    print(f"\nHigh earners (>=${high_income_threshold:,.0f}):")
-    print(f"  Original: {n_high_earners:,}")
     print(
-        f"  Selected: {n_high_earners_selected:,} ({n_high_earners_selected/n_high_earners:.0%})"
+        f"\nHigh earners (>=${high_income_threshold:,.0f}): {np.sum(selected_mask & top_mask):,} / {n_top:,} (100%)"
     )
 
     # Get the selected household IDs
@@ -300,28 +320,42 @@ def create_stratified_cps_dataset(
 if __name__ == "__main__":
     import sys
 
-    # Parse command line arguments
-    if len(sys.argv) > 1:
-        try:
-            target = int(sys.argv[1])
-            print(
-                f"Creating stratified dataset with target of {target:,} households..."
-            )
-            output_file = create_stratified_cps_dataset(
-                target_households=target
-            )
-        except ValueError:
-            print(f"Invalid target households: {sys.argv[1]}")
-            print("Usage: python create_stratified_cps.py [target_households]")
-            sys.exit(1)
-    else:
-        # Default target
-        print(
-            "Creating stratified dataset with default target of 30,000 households..."
-        )
-        output_file = create_stratified_cps_dataset(target_households=30_000)
+    target = 30_000
+    high_pct = 99
+    oversample = False
+    seed = None
+
+    for arg in sys.argv[1:]:
+        if arg == "--oversample-poor":
+            oversample = True
+        elif arg.startswith("--top="):
+            high_pct = float(arg.split("=")[1])
+        elif arg.startswith("--seed="):
+            seed = int(arg.split("=")[1])
+        elif arg.isdigit():
+            target = int(arg)
+
+    print(f"Creating stratified dataset:")
+    print(f"  Target households: {target:,}")
+    print(f"  Keep all above: {high_pct}th percentile")
+    print(f"  Oversample poor: {oversample}")
+    print(f"  Seed: {seed if seed is not None else 'random'}")
+
+    output_file = create_stratified_cps_dataset(
+        target_households=target,
+        high_income_percentile=high_pct,
+        oversample_poor=oversample,
+        seed=seed,
+    )
 
     print(f"\nDone! Created: {output_file}")
-    print("\nTo test loading:")
-    print("  from policyengine_us import Microsimulation")
-    print(f"  sim = Microsimulation(dataset='{output_file}')")
+    print("\nUsage:")
+    print(
+        "  python create_stratified_cps.py [target] [--top=99] [--oversample-poor] [--seed=N]"
+    )
+    print("\nExamples:")
+    print("  python create_stratified_cps.py 30000")
+    print(
+        "  python create_stratified_cps.py 50000 --top=99.5 --oversample-poor"
+    )
+    print("  python create_stratified_cps.py 30000 --seed=123  # reproducible")
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py b/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py
new file mode 100644
index 00000000..ee3d3847
--- /dev/null
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py
@@ -0,0 +1,247 @@
+"""
+Fit calibration weights using L0-regularized optimization.
+Prototype script for weight calibration using the l0-python package.
+"""
+
+import argparse
+import logging
+from datetime import datetime
+from pathlib import Path
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+parser = argparse.ArgumentParser(description="Fit calibration weights")
+parser.add_argument(
+    "--device",
+    default="cpu",
+    choices=["cpu", "cuda"],
+    help="Device for training (cpu or cuda)",
+)
+parser.add_argument(
+    "--epochs", type=int, default=100, help="Total epochs for training"
+)
+parser.add_argument(
+    "--db-path",
+    default=None,
+    help="Path to policy_data.db (default: STORAGE_FOLDER/calibration/policy_data.db)",
+)
+parser.add_argument(
+    "--dataset-path", default=None, help="Path to stratified CPS h5 file"
+)
+args = parser.parse_args()
+
+import numpy as np
+import pandas as pd
+from policyengine_us import Microsimulation
+from policyengine_us_data.storage import STORAGE_FOLDER
+from sparse_matrix_builder import SparseMatrixBuilder
+from calibration_utils import get_all_cds_from_database
+
+try:
+    import torch
+    from l0.calibration import SparseCalibrationWeights
+except ImportError:
+    raise ImportError(
+        "l0-python is required for weight fitting. "
+        "Install with: pip install policyengine-us-data[l0]"
+    )
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+DEVICE = args.device
+TOTAL_EPOCHS = args.epochs
+EPOCHS_PER_CHUNK = 500  # TODO: need a better way to set this. Remember it can blow up the Vercel app
+
+# Hyperparameters
+BETA = 0.35
+GAMMA = -0.1
+ZETA = 1.1
+INIT_KEEP_PROB = 0.999
+LOG_WEIGHT_JITTER_SD = 0.05
+LOG_ALPHA_JITTER_SD = 0.01
+LAMBDA_L0 = 1e-8
+LAMBDA_L2 = 1e-12
+LEARNING_RATE = 0.15
+
+# Data paths
+if args.db_path:
+    db_path = Path(args.db_path)
+else:
+    db_path = STORAGE_FOLDER / "calibration" / "policy_data.db"
+db_uri = f"sqlite:///{db_path}"
+
+if args.dataset_path:
+    dataset_path = Path(args.dataset_path)
+else:
+    dataset_path = STORAGE_FOLDER / "stratified_extended_cps_2023.h5"
+
+output_dir = STORAGE_FOLDER / "calibration"
+output_dir.mkdir(parents=True, exist_ok=True)
+time_period = 2023
+
+# Get all CDs from database
+cds_to_calibrate = get_all_cds_from_database(db_uri)
+print(f"Found {len(cds_to_calibrate)} congressional districts")
+
+# ============================================================================
+# STEP 1: BUILD CALIBRATION MATRIX
+# ============================================================================
+print(f"Loading simulation from {dataset_path}...")
+sim = Microsimulation(dataset=str(dataset_path))
+n_households = len(sim.calculate("household_id", map_to="household").values)
+print(f"Loaded {n_households:,} households")
+
+print("\nBuilding sparse matrix...")
+builder = SparseMatrixBuilder(
+    db_uri=db_uri,
+    time_period=time_period,
+    cds_to_calibrate=cds_to_calibrate,
+    dataset_path=str(dataset_path),
+)
+
+targets_df, X_sparse, household_id_mapping = builder.build_matrix(
+    sim,
+    target_filter={
+        "stratum_group_ids": [4],
+        "variables": [
+            "health_insurance_premiums_without_medicare_part_b",
+            "snap",
+        ],
+    },
+)
+
+print(f"Matrix shape: {X_sparse.shape}")
+print(f"Targets: {len(targets_df)}")
+
+# Filter to achievable targets (rows with non-zero data)
+row_sums = np.array(X_sparse.sum(axis=1)).flatten()
+achievable_mask = row_sums > 0
+n_achievable = achievable_mask.sum()
+n_impossible = (~achievable_mask).sum()
+
+print(f"\nAchievable targets: {n_achievable}")
+print(f"Impossible targets (filtered out): {n_impossible}")
+
+targets_df = targets_df[achievable_mask].reset_index(drop=True)
+X_sparse = X_sparse[achievable_mask, :]
+
+print(f"Filtered matrix shape: {X_sparse.shape}")
+
+# Extract target vector and names
+targets = targets_df["value"].values
+target_names = [
+    f"{row['geographic_id']}/{row['variable']}"
+    for _, row in targets_df.iterrows()
+]
+
+# ============================================================================
+# STEP 2: INITIALIZE WEIGHTS
+# ============================================================================
+initial_weights = np.ones(X_sparse.shape[1]) * 100
+print(f"\nInitial weights shape: {initial_weights.shape}")
+print(f"Initial weights sum: {initial_weights.sum():,.0f}")
+
+# ============================================================================
+# STEP 3: CREATE MODEL
+# ============================================================================
+print("\nCreating SparseCalibrationWeights model...")
+model = SparseCalibrationWeights(
+    n_features=X_sparse.shape[1],
+    beta=BETA,
+    gamma=GAMMA,
+    zeta=ZETA,
+    init_keep_prob=INIT_KEEP_PROB,
+    init_weights=initial_weights,
+    log_weight_jitter_sd=LOG_WEIGHT_JITTER_SD,
+    log_alpha_jitter_sd=LOG_ALPHA_JITTER_SD,
+    device=DEVICE,
+)
+
+# ============================================================================
+# STEP 4: TRAIN IN CHUNKS
+# ============================================================================
+timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+calibration_log = pd.DataFrame()
+
+for chunk_start in range(0, TOTAL_EPOCHS, EPOCHS_PER_CHUNK):
+    chunk_epochs = min(EPOCHS_PER_CHUNK, TOTAL_EPOCHS - chunk_start)
+    current_epoch = chunk_start + chunk_epochs
+
+    print(f"\nTraining epochs {chunk_start + 1} to {current_epoch}...")
+
+    model.fit(
+        M=X_sparse,
+        y=targets,
+        target_groups=None,
+        lambda_l0=LAMBDA_L0,
+        lambda_l2=LAMBDA_L2,
+        lr=LEARNING_RATE,
+        epochs=chunk_epochs,
+        loss_type="relative",
+        verbose=True,
+        verbose_freq=chunk_epochs,
+    )
+
+    with torch.no_grad():
+        predictions = model.predict(X_sparse).cpu().numpy()
+
+    chunk_df = pd.DataFrame(
+        {
+            "target_name": target_names,
+            "estimate": predictions,
+            "target": targets,
+        }
+    )
+    chunk_df["epoch"] = current_epoch
+    chunk_df["error"] = chunk_df.estimate - chunk_df.target
+    chunk_df["rel_error"] = chunk_df.error / chunk_df.target
+    chunk_df["abs_error"] = chunk_df.error.abs()
+    chunk_df["rel_abs_error"] = chunk_df.rel_error.abs()
+    chunk_df["loss"] = chunk_df.rel_abs_error**2
+    calibration_log = pd.concat([calibration_log, chunk_df], ignore_index=True)
+
+# ============================================================================
+# STEP 5: EXTRACT AND SAVE WEIGHTS
+# ============================================================================
+with torch.no_grad():
+    w = model.get_weights(deterministic=True).cpu().numpy()
+
+print(f"\nFinal weights shape: {w.shape}")
+print(f"Final weights sum: {w.sum():,.0f}")
+print(f"Non-zero weights: {(w > 0).sum():,}")
+
+output_path = output_dir / f"calibration_weights_{timestamp}.npy"
+np.save(output_path, w)
+print(f"\nWeights saved to: {output_path}")
+print(f"OUTPUT_PATH:{output_path}")
+
+log_path = output_dir / f"calibration_log_{timestamp}.csv"
+calibration_log.to_csv(log_path, index=False)
+print(f"Calibration log saved to: {log_path}")
+print(f"LOG_PATH:{log_path}")
+
+# ============================================================================
+# STEP 6: VERIFY PREDICTIONS
+# ============================================================================
+print("\n" + "=" * 60)
+print("PREDICTION VERIFICATION")
+print("=" * 60)
+
+with torch.no_grad():
+    predictions = model.predict(X_sparse).cpu().numpy()
+
+for i in range(len(targets)):
+    rel_error = (predictions[i] - targets[i]) / targets[i] * 100
+    print(
+        f"{target_names[i][:50]:50} | "
+        f"pred: {predictions[i]:>12,.0f} | "
+        f"target: {targets[i]:>12,.0f} | "
+        f"err: {rel_error:>6.2f}%"
+    )
+
+print("\n" + "=" * 60)
+print("FITTING COMPLETED")
+print("=" * 60)
diff --git a/policyengine_us_data/datasets/cps/small_enhanced_cps.py b/policyengine_us_data/datasets/cps/small_enhanced_cps.py
index a9679a2a..5e099bec 100644
--- a/policyengine_us_data/datasets/cps/small_enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/small_enhanced_cps.py
@@ -50,15 +50,17 @@ def create_sparse_ecps():
 
     ecps = EnhancedCPS_2024()
     h5 = ecps.load()
-    sparse_weights = h5["household_sparse_weight"][str(time_period)][:]
+    sparse_weights = h5["household_weight"][str(time_period)][:]
     hh_ids = h5["household_id"][str(time_period)][:]
+    h5.close()
 
     template_sim = Microsimulation(
         dataset=EnhancedCPS_2024,
     )
     template_sim.set_input("household_weight", time_period, sparse_weights)
 
-    df = template_sim.to_input_dataframe()  # Not at household level
+    df = template_sim.to_input_dataframe()
+    del template_sim
 
     household_weight_column = f"household_weight__{time_period}"
     df_household_id_column = f"household_id__{time_period}"
@@ -102,7 +104,7 @@ def create_sparse_ecps():
             if len(data[variable]) == 0:
                 del data[variable]
 
-    with h5py.File(STORAGE_FOLDER / "sparse_enhanced_cps_2024.h5", "w") as f:
+    with h5py.File(STORAGE_FOLDER / "enhanced_cps_2024.h5", "w") as f:
         for variable, periods in data.items():
             grp = f.create_group(variable)
             for period, values in periods.items():
diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py
index b3290fe9..38afcbea 100644
--- a/policyengine_us_data/datasets/puf/puf.py
+++ b/policyengine_us_data/datasets/puf/puf.py
@@ -788,13 +788,7 @@ class PUF_2024(PUF):
 }
 
 if __name__ == "__main__":
-    import os
-
-    local_area_calibration = os.environ.get("LOCAL_AREA_CALIBRATION") == "true"
-
-    if local_area_calibration:
-        PUF_2023().generate()
-    else:
-        PUF_2015().generate()
-        PUF_2021().generate()
-        PUF_2024().generate()
+    PUF_2015().generate()
+    PUF_2021().generate()
+    PUF_2023().generate()
+    PUF_2024().generate()
diff --git a/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py
index 90c9f8c4..96e4a996 100644
--- a/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py
+++ b/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py
@@ -17,7 +17,7 @@
 
 @pytest.fixture(scope="session")
 def data():
-    return Dataset.from_file(STORAGE_FOLDER / "sparse_enhanced_cps_2024.h5")
+    return Dataset.from_file(STORAGE_FOLDER / "enhanced_cps_2024.h5")
 
 
 @pytest.fixture(scope="session")
@@ -93,7 +93,7 @@ def test_sparse_ecps_has_tips(sim):
 
 def test_sparse_ecps_replicates_jct_tax_expenditures():
     calibration_log = pd.read_csv(
-        "calibration_log_sparse.csv",
+        "calibration_log.csv",
     )
 
     jct_rows = calibration_log[
diff --git a/pyproject.toml b/pyproject.toml
index 50f857ee..8fbb2490 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,9 @@ dependencies = [
 calibration = [
     "samplics",
 ]
+l0 = [
+    "l0-python",
+]
 
 [dependency-groups]
 dev = [
diff --git a/uv.lock b/uv.lock
index cf6075de..65e63bc1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -637,6 +637,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f8/0a/a3871375c7b9727edaeeea994bfff7c63ff7804c9829c19309ba2e058807/greenlet-3.3.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b01548f6e0b9e9784a2c99c5651e5dc89ffcbe870bc5fb2e5ef864e9cc6b5dcb", size = 276379, upload-time = "2025-12-04T14:23:30.498Z" },
     { url = "https://files.pythonhosted.org/packages/43/ab/7ebfe34dce8b87be0d11dae91acbf76f7b8246bf9d6b319c741f99fa59c6/greenlet-3.3.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:349345b770dc88f81506c6861d22a6ccd422207829d2c854ae2af8025af303e3", size = 597294, upload-time = "2025-12-04T14:50:06.847Z" },
     { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" },
+    { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" },
     { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" },
     { url = "https://files.pythonhosted.org/packages/49/0e/49b46ac39f931f59f987b7cd9f34bfec8ef81d2a1e6e00682f55be5de9f4/greenlet-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d9ad37fc657b1102ec880e637cccf20191581f75c64087a549e66c57e1ceb53", size = 1567424, upload-time = "2025-12-04T15:04:23.757Z" },
     { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" },
@@ -644,6 +645,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" },
     { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" },
     { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" },
+    { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" },
     { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" },
     { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" },
     { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" },
@@ -1083,6 +1085,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780", size = 15884, upload-time = "2023-11-23T09:26:34.325Z" },
 ]
 
+[[package]]
+name = "l0-python"
+version = "0.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "scipy" },
+    { name = "torch" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cf/6b/4a9ca6d1eb9828c526947fffb2ee2a1d02eec330f04cd53af301a05fde0a/l0_python-0.5.0.tar.gz", hash = "sha256:9b6b1751e142702e21ed866e40d8ab47304a26a5455998620a0eb798f4c7f599", size = 36320, upload-time = "2026-01-21T13:55:53.365Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/80/33ccae8af3fe55a81d33569d9241a29cecde17ab34fdff214804e81fa353/l0_python-0.5.0-py3-none-any.whl", hash = "sha256:9c8f4532426b927a97f4722b1c5114147adb09365100623effb49c0021345881", size = 23590, upload-time = "2026-01-21T13:55:52.406Z" },
+]
+
 [[package]]
 name = "lark"
 version = "1.3.1"
@@ -1843,7 +1859,7 @@ wheels = [
 
 [[package]]
 name = "policyengine-us-data"
-version = "1.54.0"
+version = "1.54.1"
 source = { editable = "." }
 dependencies = [
     { name = "google-auth" },
@@ -1873,6 +1889,9 @@ dependencies = [
 calibration = [
     { name = "samplics" },
 ]
+l0 = [
+    { name = "l0-python" },
+]
 
 [package.dev-dependencies]
 dev = [
@@ -1893,6 +1912,7 @@ dev = [
 requires-dist = [
     { name = "google-auth", specifier = ">=2.0.0" },
     { name = "google-cloud-storage", specifier = ">=2.0.0" },
+    { name = "l0-python", marker = "extra == 'l0'" },
     { name = "microdf-python", specifier = ">=1.2.1" },
     { name = "microimpute", specifier = ">=1.1.4" },
     { name = "openpyxl", specifier = ">=3.1.5" },
@@ -1914,7 +1934,7 @@ requires-dist = [
     { name = "us", specifier = ">=2.0.0" },
     { name = "xlrd", specifier = ">=2.0.2" },
 ]
-provides-extras = ["calibration"]
+provides-extras = ["calibration", "l0"]
 
 [package.metadata.requires-dev]
 dev = [