From 928699eea69cbb5a0b0b691a68bb3e4ee48ff0e6 Mon Sep 17 00:00:00 2001 From: fryerd1 Date: Tue, 21 Oct 2025 12:11:15 +0100 Subject: [PATCH 1/7] Add test_format_month_section to test_report.py --- tests/test_report.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/test_report.py b/tests/test_report.py index 0a5fd0b..24d5719 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -3,9 +3,26 @@ """ import pandas as pd -from python_rap_demo.report import generate_markdown_report +from python_rap_demo.report import format_month_section, generate_markdown_report +def test_format_month_section(): + month = "January" + month_df = pd.DataFrame({ + "diagnosis": ["A", "B"], + "case_count": [10, 20], + "total": [20, 50], + }) + month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"] + expected_output = ( + "## Month: January\n" + "- A: 50.00% (10 cases)\n" + "- B: 40.00% (20 cases)\n" + ) + + result = format_month_section(month, month_df) + assert result.strip() == expected_output.strip() + def test_generate_markdown_report(tmp_path): """ Test the generate_markdown_report function to ensure it: From 8662116a56b89b3df7f41ba6a87ed28bc8f5d5f4 Mon Sep 17 00:00:00 2001 From: fryerd1 Date: Tue, 21 Oct 2025 14:41:19 +0100 Subject: [PATCH 2/7] Add comments to test_format_month_section --- tests/test_report.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_report.py b/tests/test_report.py index 24d5719..ce9c4a8 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -7,20 +7,24 @@ def test_format_month_section(): + # Define the variable month used in format_month_selection month = "January" + # Create the dataframe month_df used in format_month_selection month_df = pd.DataFrame({ "diagnosis": ["A", "B"], "case_count": [10, 20], "total": [20, 50], }) + #Calculate the prevalence rate month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"] + # Define the expected output of format_month_selection expected_output = ( "## Month: January\n" "- A: 50.00% (10 cases)\n" "- B: 40.00% (20 cases)\n" ) - result = format_month_section(month, month_df) + # Check that the output from format_month_selection matches the expected output assert result.strip() == expected_output.strip() def test_generate_markdown_report(tmp_path): From 413e87a3c358ea6d1c91d0b166abf5617f726b7d Mon Sep 17 00:00:00 2001 From: fryerd1 Date: Wed, 29 Oct 2025 13:12:45 +0000 Subject: [PATCH 3/7] correct comments in test_report.py --- tests/test_report.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_report.py b/tests/test_report.py index ce9c4a8..a1ba543 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -7,9 +7,9 @@ def test_format_month_section(): - # Define the variable month used in format_month_selection + # Define the variable month used in format_month_section month = "January" - # Create the dataframe month_df used in format_month_selection + # Create the dataframe month_df used in format_month_section month_df = pd.DataFrame({ "diagnosis": ["A", "B"], "case_count": [10, 20], @@ -17,14 +17,14 @@ def test_format_month_section(): }) #Calculate the prevalence rate month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"] - # Define the expected output of format_month_selection + # Define the expected output of format_month_section expected_output = ( "## Month: January\n" "- A: 50.00% (10 cases)\n" "- B: 40.00% (20 cases)\n" ) result = format_month_section(month, month_df) - # Check that the output from format_month_selection matches the expected output + # Check that the output from format_month_section matches the expected output assert result.strip() == expected_output.strip() def test_generate_markdown_report(tmp_path): From bd571fb85047283f793427408449ea78bff0ada0 Mon Sep 17 00:00:00 2001 From: Alex Westwood Date: Wed, 26 Nov 2025 15:31:24 +0000 Subject: [PATCH 4/7] correct terminology --- data/health_data.csv | 2 +- src/python_rap_demo/cleaning.py | 4 ++-- tests/test_cleaning.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/health_data.csv b/data/health_data.csv index 621b3fb..3391498 100644 --- a/data/health_data.csv +++ b/data/health_data.csv @@ -1,4 +1,4 @@ -patient_id,month,age,gender,height_cm,weight_kg,smoker,diagnosis +patient_id,month,age,sex,height_cm,weight_kg,smoker,diagnosis 1,2025-01,45,M,175,85,Yes,Hypertension 2,2025-01,34,F,160,62,No,Healthy 3,2025-01,67,F,155,70,No,Diabetes diff --git a/src/python_rap_demo/cleaning.py b/src/python_rap_demo/cleaning.py index 360eede..36d17f1 100644 --- a/src/python_rap_demo/cleaning.py +++ b/src/python_rap_demo/cleaning.py @@ -20,6 +20,6 @@ def clean_health_data(df: pd.DataFrame) -> pd.DataFrame: df = df.dropna(subset=["diagnosis"]) # Fill missing smoker values with 'No' df["smoker"] = df["smoker"].fillna("No") - # Ensure gender is uppercase - df["gender"] = df["gender"].str.upper() + # Ensure sex is uppercase + df["sex"] = df["sex"].str.upper() return df diff --git a/tests/test_cleaning.py b/tests/test_cleaning.py index c028977..9875b34 100644 --- a/tests/test_cleaning.py +++ b/tests/test_cleaning.py @@ -10,19 +10,19 @@ def test_clean_health_data(): """ Test the clean_health_data function to ensure it: - Fills missing 'smoker' values with 'No' - - Converts 'gender' to uppercase + - Converts 'sex' to uppercase - Drops rows with missing 'diagnosis' """ # Create a sample DataFrame with missing and lowercase values df = pd.DataFrame({ "diagnosis": ["A", None], "smoker": [None, "Yes"], - "gender": ["m", "f"] + "sex": ["m", "f"] }) cleaned = clean_health_data(df) # Check that missing 'smoker' is filled assert cleaned["smoker"].iloc[0] == "No" - # Check that 'gender' is uppercase - assert all(cleaned["gender"].str.isupper()) + # Check that 'sex' is uppercase + assert all(cleaned["sex"].str.isupper()) # Check that rows with missing 'diagnosis' are dropped assert cleaned["diagnosis"].notnull().all() From ea5b45813515acca7b116912cbdfee26771f3fbd Mon Sep 17 00:00:00 2001 From: Alex Westwood Date: Wed, 26 Nov 2025 15:38:12 +0000 Subject: [PATCH 5/7] Revert "correct terminology" This reverts commit bd571fb85047283f793427408449ea78bff0ada0. --- data/health_data.csv | 2 +- src/python_rap_demo/cleaning.py | 4 ++-- tests/test_cleaning.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/health_data.csv b/data/health_data.csv index 3391498..621b3fb 100644 --- a/data/health_data.csv +++ b/data/health_data.csv @@ -1,4 +1,4 @@ -patient_id,month,age,sex,height_cm,weight_kg,smoker,diagnosis +patient_id,month,age,gender,height_cm,weight_kg,smoker,diagnosis 1,2025-01,45,M,175,85,Yes,Hypertension 2,2025-01,34,F,160,62,No,Healthy 3,2025-01,67,F,155,70,No,Diabetes diff --git a/src/python_rap_demo/cleaning.py b/src/python_rap_demo/cleaning.py index 36d17f1..360eede 100644 --- a/src/python_rap_demo/cleaning.py +++ b/src/python_rap_demo/cleaning.py @@ -20,6 +20,6 @@ def clean_health_data(df: pd.DataFrame) -> pd.DataFrame: df = df.dropna(subset=["diagnosis"]) # Fill missing smoker values with 'No' df["smoker"] = df["smoker"].fillna("No") - # Ensure sex is uppercase - df["sex"] = df["sex"].str.upper() + # Ensure gender is uppercase + df["gender"] = df["gender"].str.upper() return df diff --git a/tests/test_cleaning.py b/tests/test_cleaning.py index 9875b34..c028977 100644 --- a/tests/test_cleaning.py +++ b/tests/test_cleaning.py @@ -10,19 +10,19 @@ def test_clean_health_data(): """ Test the clean_health_data function to ensure it: - Fills missing 'smoker' values with 'No' - - Converts 'sex' to uppercase + - Converts 'gender' to uppercase - Drops rows with missing 'diagnosis' """ # Create a sample DataFrame with missing and lowercase values df = pd.DataFrame({ "diagnosis": ["A", None], "smoker": [None, "Yes"], - "sex": ["m", "f"] + "gender": ["m", "f"] }) cleaned = clean_health_data(df) # Check that missing 'smoker' is filled assert cleaned["smoker"].iloc[0] == "No" - # Check that 'sex' is uppercase - assert all(cleaned["sex"].str.isupper()) + # Check that 'gender' is uppercase + assert all(cleaned["gender"].str.isupper()) # Check that rows with missing 'diagnosis' are dropped assert cleaned["diagnosis"].notnull().all() From 66c14608e4515ee7d43d1b8d69ca9e75b27a4a8d Mon Sep 17 00:00:00 2001 From: alex-westwood <156091267+alex-westwood@users.noreply.github.com> Date: Mon, 5 Jan 2026 13:07:20 +0000 Subject: [PATCH 6/7] Apply suggestion from @alex-westwood --- tests/test_report.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_report.py b/tests/test_report.py index a1ba543..9a64654 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -27,6 +27,7 @@ def test_format_month_section(): # Check that the output from format_month_section matches the expected output assert result.strip() == expected_output.strip() + def test_generate_markdown_report(tmp_path): """ Test the generate_markdown_report function to ensure it: From f8f7ef0b7116638b67da317cfb620040dc571b01 Mon Sep 17 00:00:00 2001 From: Alex Westwood Date: Mon, 5 Jan 2026 13:10:15 +0000 Subject: [PATCH 7/7] add spaces --- tests/test_report.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_report.py b/tests/test_report.py index a1ba543..1efce51 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -11,11 +11,11 @@ def test_format_month_section(): month = "January" # Create the dataframe month_df used in format_month_section month_df = pd.DataFrame({ - "diagnosis": ["A", "B"], - "case_count": [10, 20], - "total": [20, 50], + "diagnosis": ["A", "B"], + "case_count": [10, 20], + "total": [20, 50], }) - #Calculate the prevalence rate + # Calculate the prevalence rate month_df["prevalence_rate"] = month_df["case_count"] / month_df["total"] # Define the expected output of format_month_section expected_output = ( @@ -27,6 +27,7 @@ def test_format_month_section(): # Check that the output from format_month_section matches the expected output assert result.strip() == expected_output.strip() + def test_generate_markdown_report(tmp_path): """ Test the generate_markdown_report function to ensure it: