From 2cb2d08acc30d385e2e37b574f689e8172712af3 Mon Sep 17 00:00:00 2001
From: Alex Westwood <alex.westwood@ons.gov.uk>
Date: Fri, 23 Jan 2026 16:48:01 +0000
Subject: [PATCH 1/2] move setup code

---
 .../solutions/04_unit_tests_solutions.ipynb   | 114 +++++++++---------
 1 file changed, 54 insertions(+), 60 deletions(-)

diff --git a/exercises/solutions/04_unit_tests_solutions.ipynb b/exercises/solutions/04_unit_tests_solutions.ipynb
index 4e00fff..3bf2bf7 100644
--- a/exercises/solutions/04_unit_tests_solutions.ipynb
+++ b/exercises/solutions/04_unit_tests_solutions.ipynb
@@ -7,15 +7,32 @@
    "source": [
     "# Solutions: Unit Testing Exercises\n",
     "\n",
-    "This notebook provides step-by-step solutions for writing and running unit tests in your RAP pipeline using pytest. Each solution matches the corresponding exercise notebook and is designed for beginners."
+    "This notebook provides step-by-step solutions for writing and running unit tests in your RAP pipeline using pytest. Each solution matches the corresponding exercise notebook."
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "id": "1",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup code for solutions\n",
+    "\n",
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), \"..\", \"..\", \"src\")))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2",
+   "metadata": {},
    "source": [
-    "## Exercise 1 Solution: Review and Adapt an existing unit\n",
+    "## Exercise 1 solution: Review and adapt an existing unit test\n",
     "\n",
     "Open `tests/test_cleaning.py` and run the test using the following command in the terminal:\n",
     "```cmd\n",
@@ -27,7 +44,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "2",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -49,55 +66,37 @@
     "    # Fill missing smoker values with 'Yes'\n",
     "    df[\"smoker\"] = df[\"smoker\"].fillna(\"Yes\")\n",
     "\n",
-    "    # Ensure gender is uppercase\n",
-    "    df[\"gender\"] = df[\"gender\"].str.upper()\n",
+    "    # Ensure sex is uppercase\n",
+    "    df[\"sex\"] = df[\"sex\"].str.upper()\n",
     "\n",
     "    return df"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "3",
+   "id": "4",
    "metadata": {},
    "source": [
     "Notice how the check for the missing smoker value fails. The test checks the first column for a \"smoker\" value of \"No\", however the modified `clean_health_data` function fills missing smoker values with 'Yes', changing the smoker value in the first column to 'Yes', which causes the test to fail.\n",
     "\n",
-    "In order for the test to pass change the expected \"smoker\" value to \"Yes\" instead of \"No\".\n",
-    "The original assert statement looks like this:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "assert cleaned[\"smoker\"].iloc[0] == \"No\""
+    "The test failing highlights the function to developers who can then check if the change was correct or not. If it was not, the developer can fix the error in the function. If it was, the unit test can be adapted to incorporate the change. In this case assume the change was correct. In order for the test to pass change the expected \"smoker\" value to \"Yes\" instead of \"No\".\n",
+    "The original assert statement looks like this:  \n",
+    "  \n",
+    "```python\n",
+    "assert cleaned[\"smoker\"].iloc[0] == \"No\"\n",
+    "```  \n",
+    "  \n",
+    "The changed assert statement should look like this:  \n",
+    "  \n",
+    "```python\n",
+    "assert cleaned[\"smoker\"].iloc[0] == \"Yes\"\n",
+    "```"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "5",
    "metadata": {},
-   "source": [
-    "The changed assert statement should look like this:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "assert cleaned[\"smoker\"].iloc[0] == \"Yes\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7",
-   "metadata": {},
    "source": [
     "## Exercise 2 Solution: Write a simple unit test for a new function\n",
     "\n",
@@ -107,21 +106,16 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8",
+   "id": "6",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Walkthrough: Unit test for flag_missing\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "import pandas as pd\n",
     "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), \"..\", \"..\", \"src\")))\n",
+    "# Note: This test will not run unless impute_by_group has been entered into cleaning.py\n",
     "\n",
     "from python_rap_demo.cleaning import flag_missing\n",
     "\n",
-    "\n",
     "def test_flag_missing():\n",
     "    \"\"\"\n",
     "    Test flag_missing\n",
@@ -141,7 +135,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9",
+   "id": "7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -164,8 +158,8 @@
     "    \"\"\"\n",
     "    Test impute_by_group.\n",
     "    \"\"\"\n",
-    "    df = pd.DataFrame({\"height_cm\": [170, None, 160], \"gender\": [\"M\", \"F\", \"F\"]})\n",
-    "    imputed = impute_by_group(df, \"height_cm\", \"gender\")\n",
+    "    df = pd.DataFrame({\"height_cm\": [170, None, 160], \"sex\": [\"M\", \"F\", \"F\"]})\n",
+    "    imputed = impute_by_group(df, \"height_cm\", \"sex\")\n",
     "    # Check that missing value is imputed with group mean\n",
     "    expected = [170, 160, 160]\n",
     "    assert imputed.tolist() == expected\n",
@@ -177,10 +171,10 @@
   },
   {
    "cell_type": "markdown",
-   "id": "10",
+   "id": "8",
    "metadata": {},
    "source": [
-    "## Exercise 3 Solution: Run your unit tests\n",
+    "## Exercise 3 solution: Run your unit tests\n",
     "\n",
     "Run the following command in your terminal:\n",
     "```cmd\n",
@@ -192,10 +186,10 @@
   },
   {
    "cell_type": "markdown",
-   "id": "11",
+   "id": "9",
    "metadata": {},
    "source": [
-    "## Exercise 4 Solution: Stretch - Check test coverage\n",
+    "## Exercise 4 solution: Stretch - Check test coverage\n",
     "\n",
     "Run the following commands:\n",
     "```cmd\n",
@@ -209,10 +203,10 @@
   },
   {
    "cell_type": "markdown",
-   "id": "12",
+   "id": "10",
    "metadata": {},
    "source": [
-    "## Exercise 5 Solution: Stretch - Try parameterisation in pytest\n",
+    "## Exercise 5 solution: Stretch - Try parameterisation in pytest\n",
     "\n",
     "Here are examples using `@pytest.mark.parametrize` for `flag_missing` and `impute_by_group`. Parameterisation lets you run the same test with different inputs, making your tests more robust and easier to maintain."
    ]
@@ -220,7 +214,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "13",
+   "id": "11",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -274,18 +268,18 @@
     "@pytest.mark.parametrize(\n",
     "    \"df,col,group_col,expected\",\n",
     "    [\n",
-    "        # Test case 1: Impute missing height by gender group mean\n",
+    "        # Test case 1: Impute missing height by sex group mean\n",
     "        (\n",
-    "            pd.DataFrame({\"height_cm\": [170, None, 160], \"gender\": [\"M\", \"F\", \"F\"]}),\n",
+    "            pd.DataFrame({\"height_cm\": [170, None, 160], \"sex\": [\"M\", \"F\", \"F\"]}),\n",
     "            \"height_cm\",\n",
-    "            \"gender\",\n",
+    "            \"sex\",\n",
     "            [170, 160, 160],\n",
     "        ),\n",
     "        # Test case 2: All missing in one group, fallback to overall mean\n",
     "        (\n",
-    "            pd.DataFrame({\"height_cm\": [None, None, 150], \"gender\": [\"M\", \"M\", \"F\"]}),\n",
+    "            pd.DataFrame({\"height_cm\": [None, None, 150], \"sex\": [\"M\", \"M\", \"F\"]}),\n",
     "            \"height_cm\",\n",
-    "            \"gender\",\n",
+    "            \"sex\",\n",
     "            [150, 150, 150],\n",
     "        ),\n",
     "    ],\n",
@@ -311,7 +305,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.12.5"
   }
  },
  "nbformat": 4,

From bd4328bbbd72558a31d778992299796b7190311f Mon Sep 17 00:00:00 2001
From: Alex Westwood <alex.westwood@ons.gov.uk>
Date: Fri, 23 Jan 2026 17:19:15 +0000
Subject: [PATCH 2/2] correct note

---
 exercises/solutions/04_unit_tests_solutions.ipynb | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/exercises/solutions/04_unit_tests_solutions.ipynb b/exercises/solutions/04_unit_tests_solutions.ipynb
index 3bf2bf7..93bc1b4 100644
--- a/exercises/solutions/04_unit_tests_solutions.ipynb
+++ b/exercises/solutions/04_unit_tests_solutions.ipynb
@@ -112,7 +112,7 @@
    "source": [
     "# Walkthrough: Unit test for flag_missing\n",
     "\n",
-    "# Note: This test will not run unless impute_by_group has been entered into cleaning.py\n",
+    "# Note: This test will not run unless flag_missing has been entered into cleaning.py\n",
     "\n",
     "from python_rap_demo.cleaning import flag_missing\n",
     "\n",
@@ -143,14 +143,6 @@
     "\n",
     "# Note: This test will not run unless impute_by_group has been entered into cleaning.py\n",
     "\n",
-    "\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "import pandas as pd\n",
-    "\n",
-    "sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), \"..\", \"..\", \"src\")))\n",
-    "\n",
     "from python_rap_demo.cleaning import impute_by_group\n",
     "\n",
     "\n",