From 928699eea69cbb5a0b0b691a68bb3e4ee48ff0e6 Mon Sep 17 00:00:00 2001
From: fryerd1 <dylan.fryer@ons.gov.uk>
Date: Tue, 21 Oct 2025 12:11:15 +0100
Subject: [PATCH 1/7] Add test_format_month_section to test_report.py

---
 tests/test_report.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/tests/test_report.py b/tests/test_report.py
index 0a5fd0b..24d5719 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -3,9 +3,26 @@
 """
 import pandas as pd
 
-from python_rap_demo.report import generate_markdown_report
+from python_rap_demo.report import format_month_section, generate_markdown_report
 
 
+def test_format_month_section():
+    month = "January"
+    month_df = pd.DataFrame({
+    "diagnosis": ["A", "B"],
+    "case_count": [10, 20],
+    "total": [20, 50],
+    })
+    month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"]
+    expected_output = (
+        "## Month: January\n"
+        "- A: 50.00% (10 cases)\n"
+        "- B: 40.00% (20 cases)\n"
+    )
+
+    result = format_month_section(month, month_df)
+    assert result.strip() == expected_output.strip()
+
 def test_generate_markdown_report(tmp_path):
     """
     Test the generate_markdown_report function to ensure it:

From 8662116a56b89b3df7f41ba6a87ed28bc8f5d5f4 Mon Sep 17 00:00:00 2001
From: fryerd1 <dylan.fryer@ons.gov.uk>
Date: Tue, 21 Oct 2025 14:41:19 +0100
Subject: [PATCH 2/7] Add comments to test_format_month_section

---
 tests/test_report.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/test_report.py b/tests/test_report.py
index 24d5719..ce9c4a8 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -7,20 +7,24 @@
 
 
 def test_format_month_section():
+    # Define the variable month used in format_month_selection
     month = "January"
+    # Create the dataframe month_df used in format_month_selection
     month_df = pd.DataFrame({
     "diagnosis": ["A", "B"],
     "case_count": [10, 20],
     "total": [20, 50],
     })
+    #Calculate the prevalence rate
     month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"]
+    # Define the expected output of format_month_selection
     expected_output = (
         "## Month: January\n"
         "- A: 50.00% (10 cases)\n"
         "- B: 40.00% (20 cases)\n"
     )
-
     result = format_month_section(month, month_df)
+    # Check that the output from format_month_selection matches the expected output
     assert result.strip() == expected_output.strip()
 
 def test_generate_markdown_report(tmp_path):

From 413e87a3c358ea6d1c91d0b166abf5617f726b7d Mon Sep 17 00:00:00 2001
From: fryerd1 <dylan.fryer@ons.gov.uk>
Date: Wed, 29 Oct 2025 13:12:45 +0000
Subject: [PATCH 3/7] correct comments in test_report.py

---
 tests/test_report.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_report.py b/tests/test_report.py
index ce9c4a8..a1ba543 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -7,9 +7,9 @@
 
 
 def test_format_month_section():
-    # Define the variable month used in format_month_selection
+    # Define the variable month used in format_month_section
     month = "January"
-    # Create the dataframe month_df used in format_month_selection
+    # Create the dataframe month_df used in format_month_section
     month_df = pd.DataFrame({
     "diagnosis": ["A", "B"],
     "case_count": [10, 20],
@@ -17,14 +17,14 @@ def test_format_month_section():
     })
     #Calculate the prevalence rate
     month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"]
-    # Define the expected output of format_month_selection
+    # Define the expected output of format_month_section
     expected_output = (
         "## Month: January\n"
         "- A: 50.00% (10 cases)\n"
         "- B: 40.00% (20 cases)\n"
     )
     result = format_month_section(month, month_df)
-    # Check that the output from format_month_selection matches the expected output
+    # Check that the output from format_month_section matches the expected output
     assert result.strip() == expected_output.strip()
 
 def test_generate_markdown_report(tmp_path):

From bd571fb85047283f793427408449ea78bff0ada0 Mon Sep 17 00:00:00 2001
From: Alex Westwood <alex.westwood@ons.gov.uk>
Date: Wed, 26 Nov 2025 15:31:24 +0000
Subject: [PATCH 4/7] correct terminology

---
 data/health_data.csv            | 2 +-
 src/python_rap_demo/cleaning.py | 4 ++--
 tests/test_cleaning.py          | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/data/health_data.csv b/data/health_data.csv
index 621b3fb..3391498 100644
--- a/data/health_data.csv
+++ b/data/health_data.csv
@@ -1,4 +1,4 @@
-patient_id,month,age,gender,height_cm,weight_kg,smoker,diagnosis
+patient_id,month,age,sex,height_cm,weight_kg,smoker,diagnosis
 1,2025-01,45,M,175,85,Yes,Hypertension
 2,2025-01,34,F,160,62,No,Healthy
 3,2025-01,67,F,155,70,No,Diabetes
diff --git a/src/python_rap_demo/cleaning.py b/src/python_rap_demo/cleaning.py
index 360eede..36d17f1 100644
--- a/src/python_rap_demo/cleaning.py
+++ b/src/python_rap_demo/cleaning.py
@@ -20,6 +20,6 @@ def clean_health_data(df: pd.DataFrame) -> pd.DataFrame:
     df = df.dropna(subset=["diagnosis"])
     # Fill missing smoker values with 'No'
     df["smoker"] = df["smoker"].fillna("No")
-    # Ensure gender is uppercase
-    df["gender"] = df["gender"].str.upper()
+    # Ensure sex is uppercase
+    df["sex"] = df["sex"].str.upper()
     return df
diff --git a/tests/test_cleaning.py b/tests/test_cleaning.py
index c028977..9875b34 100644
--- a/tests/test_cleaning.py
+++ b/tests/test_cleaning.py
@@ -10,19 +10,19 @@ def test_clean_health_data():
     """
     Test the clean_health_data function to ensure it:
     - Fills missing 'smoker' values with 'No'
-    - Converts 'gender' to uppercase
+    - Converts 'sex' to uppercase
     - Drops rows with missing 'diagnosis'
     """
     # Create a sample DataFrame with missing and lowercase values
     df = pd.DataFrame({
         "diagnosis": ["A", None],
         "smoker": [None, "Yes"],
-        "gender": ["m", "f"]
+        "sex": ["m", "f"]
     })
     cleaned = clean_health_data(df)
     # Check that missing 'smoker' is filled
     assert cleaned["smoker"].iloc[0] == "No"
-    # Check that 'gender' is uppercase
-    assert all(cleaned["gender"].str.isupper())
+    # Check that 'sex' is uppercase
+    assert all(cleaned["sex"].str.isupper())
     # Check that rows with missing 'diagnosis' are dropped
     assert cleaned["diagnosis"].notnull().all()

From ea5b45813515acca7b116912cbdfee26771f3fbd Mon Sep 17 00:00:00 2001
From: Alex Westwood <alex.westwood@ons.gov.uk>
Date: Wed, 26 Nov 2025 15:38:12 +0000
Subject: [PATCH 5/7] Revert "correct terminology"

This reverts commit bd571fb85047283f793427408449ea78bff0ada0.
---
 data/health_data.csv            | 2 +-
 src/python_rap_demo/cleaning.py | 4 ++--
 tests/test_cleaning.py          | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/data/health_data.csv b/data/health_data.csv
index 3391498..621b3fb 100644
--- a/data/health_data.csv
+++ b/data/health_data.csv
@@ -1,4 +1,4 @@
-patient_id,month,age,sex,height_cm,weight_kg,smoker,diagnosis
+patient_id,month,age,gender,height_cm,weight_kg,smoker,diagnosis
 1,2025-01,45,M,175,85,Yes,Hypertension
 2,2025-01,34,F,160,62,No,Healthy
 3,2025-01,67,F,155,70,No,Diabetes
diff --git a/src/python_rap_demo/cleaning.py b/src/python_rap_demo/cleaning.py
index 36d17f1..360eede 100644
--- a/src/python_rap_demo/cleaning.py
+++ b/src/python_rap_demo/cleaning.py
@@ -20,6 +20,6 @@ def clean_health_data(df: pd.DataFrame) -> pd.DataFrame:
     df = df.dropna(subset=["diagnosis"])
     # Fill missing smoker values with 'No'
     df["smoker"] = df["smoker"].fillna("No")
-    # Ensure sex is uppercase
-    df["sex"] = df["sex"].str.upper()
+    # Ensure gender is uppercase
+    df["gender"] = df["gender"].str.upper()
     return df
diff --git a/tests/test_cleaning.py b/tests/test_cleaning.py
index 9875b34..c028977 100644
--- a/tests/test_cleaning.py
+++ b/tests/test_cleaning.py
@@ -10,19 +10,19 @@ def test_clean_health_data():
     """
     Test the clean_health_data function to ensure it:
     - Fills missing 'smoker' values with 'No'
-    - Converts 'sex' to uppercase
+    - Converts 'gender' to uppercase
     - Drops rows with missing 'diagnosis'
     """
     # Create a sample DataFrame with missing and lowercase values
     df = pd.DataFrame({
         "diagnosis": ["A", None],
         "smoker": [None, "Yes"],
-        "sex": ["m", "f"]
+        "gender": ["m", "f"]
     })
     cleaned = clean_health_data(df)
     # Check that missing 'smoker' is filled
     assert cleaned["smoker"].iloc[0] == "No"
-    # Check that 'sex' is uppercase
-    assert all(cleaned["sex"].str.isupper())
+    # Check that 'gender' is uppercase
+    assert all(cleaned["gender"].str.isupper())
     # Check that rows with missing 'diagnosis' are dropped
     assert cleaned["diagnosis"].notnull().all()

From 66c14608e4515ee7d43d1b8d69ca9e75b27a4a8d Mon Sep 17 00:00:00 2001
From: alex-westwood <156091267+alex-westwood@users.noreply.github.com>
Date: Mon, 5 Jan 2026 13:07:20 +0000
Subject: [PATCH 6/7] Apply suggestion from @alex-westwood

---
 tests/test_report.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_report.py b/tests/test_report.py
index a1ba543..9a64654 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -27,6 +27,7 @@ def test_format_month_section():
     # Check that the output from format_month_section matches the expected output
     assert result.strip() == expected_output.strip()
 
+
 def test_generate_markdown_report(tmp_path):
     """
     Test the generate_markdown_report function to ensure it:

From f8f7ef0b7116638b67da317cfb620040dc571b01 Mon Sep 17 00:00:00 2001
From: Alex Westwood <alex.westwood@ons.gov.uk>
Date: Mon, 5 Jan 2026 13:10:15 +0000
Subject: [PATCH 7/7] add spaces

---
 tests/test_report.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/test_report.py b/tests/test_report.py
index a1ba543..1efce51 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -11,11 +11,11 @@ def test_format_month_section():
     month = "January"
     # Create the dataframe month_df used in format_month_section
     month_df = pd.DataFrame({
-    "diagnosis": ["A", "B"],
-    "case_count": [10, 20],
-    "total": [20, 50],
+        "diagnosis": ["A", "B"],
+        "case_count": [10, 20],
+        "total": [20, 50],
     })
-    #Calculate the prevalence rate
+    # Calculate the prevalence rate
     month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"]
     # Define the expected output of format_month_section
     expected_output = (
@@ -27,6 +27,7 @@ def test_format_month_section():
     # Check that the output from format_month_section matches the expected output
     assert result.strip() == expected_output.strip()
 
+
 def test_generate_markdown_report(tmp_path):
     """
     Test the generate_markdown_report function to ensure it: