From 0e4c433ceede6fe6941e0702c5f9c274ecbc7a5b Mon Sep 17 00:00:00 2001 From: Paris778 Date: Tue, 27 Jan 2026 16:55:05 +0000 Subject: [PATCH 1/2] docs: fix spelling and grammar errors in notebook documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix "auxilliary" → "auxiliary" (12 occurrences) - Fix "auxilary" → "auxiliary" (2 occurrences) - Fix "obvservation" → "observation" (2 occurrences) - Fix "suprising" → "surprising" (2 occurrences) - Fix "competing the tournament" → "competing in the tournament" (2 occurrences) - Fix "Peason" → "Pearson" correlation (1 occurrence) - Standardize parameter naming: _live_benchmark_models → live_benchmark_models Affected notebooks: - example_model.ipynb - feature_neutralization.ipynb - hello_numerai.ipynb - numerai/feature_neutralization.ipynb - numerai/hello_numerai.ipynb - numerai/target_ensemble.ipynb - target_ensemble.ipynb --- example_model.ipynb | 104 +- feature_neutralization.ipynb | 1352 +++++++++---------- hello_numerai.ipynb | 1520 ++++++++++----------- numerai/feature_neutralization.ipynb | 1368 +++++++++---------- numerai/hello_numerai.ipynb | 1528 ++++++++++----------- numerai/target_ensemble.ipynb | 1862 +++++++++++++------------- target_ensemble.ipynb | 1830 ++++++++++++------------- 7 files changed, 4786 insertions(+), 4778 deletions(-) diff --git a/example_model.ipynb b/example_model.ipynb index 0a62986..b3d6d63 100644 --- a/example_model.ipynb +++ b/example_model.ipynb @@ -11,20 +11,18 @@ }, { "cell_type": "code", + "execution_count": 1, "metadata": { + "ExecuteTime": { + "end_time": "2025-12-14T21:09:42.696957Z", + "start_time": "2025-12-14T21:09:42.549138Z" + }, "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ekw8Z93ljC3v", - "outputId": "bdd16698-2ad0-4423-b090-c5ce55fe3053", - "ExecuteTime": { - "end_time": "2025-12-14T21:09:42.696957Z", - "start_time": "2025-12-14T21:09:42.549138Z" - } + "outputId": "bdd16698-2ad0-4423-b090-c5ce55fe3053" }, - "source": [ - "!python --version" - ], "outputs": [ { "name": "stdout", @@ -34,52 +32,79 @@ ] } ], - "execution_count": 1 + "source": [ + "!python --version" + ] }, { "cell_type": "code", + "execution_count": 2, "metadata": { + "ExecuteTime": { + "end_time": "2025-12-14T21:09:44.281889Z", + "start_time": "2025-12-14T21:09:42.698161Z" + }, "colab": { "base_uri": "https://localhost:8080/" }, "id": "yoy_wT1rhMqF", - "outputId": "e038b50f-1b61-4334-be62-28f4dc40a0a0", - "ExecuteTime": { - "end_time": "2025-12-14T21:09:44.281889Z", - "start_time": "2025-12-14T21:09:42.698161Z" - } + "outputId": "e038b50f-1b61-4334-be62-28f4dc40a0a0" }, - "source": [ - "# Install dependencies\n", - "!pip install -q --upgrade numerapi pandas pyarrow matplotlib lightgbm scikit-learn scipy cloudpickle==3.1.1" - ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m25.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.3\u001B[0m\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n" + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.3\u001b[0m\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n" ] } ], - "execution_count": 2 + "source": [ + "# Install dependencies\n", + "!pip install -q --upgrade numerapi pandas pyarrow matplotlib lightgbm scikit-learn scipy cloudpickle==3.1.1" + ] }, { "cell_type": "code", + "execution_count": 3, "metadata": { + "ExecuteTime": { + "end_time": "2025-12-14T21:10:08.471862Z", + "start_time": "2025-12-14T21:09:44.283405Z" + }, "colab": { "base_uri": "https://localhost:8080/", "height": 160 }, "id": "13hdRk9ghMqI", - "outputId": "d2274374-fd85-4189-f27b-d9d466cc63ca", - "ExecuteTime": { - "end_time": "2025-12-14T21:10:08.471862Z", - "start_time": "2025-12-14T21:09:44.283405Z" - } + "outputId": "d2274374-fd85-4189-f27b-d9d466cc63ca" }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-12-14 13:09:45,386 INFO numerapi.utils: target file already exists\n", + "2025-12-14 13:09:45,387 INFO numerapi.utils: download complete\n", + "2025-12-14 13:09:46,291 INFO numerapi.utils: target file already exists\n", + "2025-12-14 13:09:46,291 INFO numerapi.utils: download complete\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001259 seconds.\n", + "You can set `force_row_wise=true` to remove the overhead.\n", + "And if memory is not enough, you can set `force_col_wise=true`.\n", + "[LightGBM] [Info] Total Bins 210\n", + "[LightGBM] [Info] Number of data points in the train set: 688184, number of used features: 42\n", + "[LightGBM] [Info] Start training from score 0.499946\n" + ] + } + ], "source": [ "from numerapi import NumerAPI\n", "import pandas as pd\n", @@ -155,32 +180,7 @@ " files.download('example_model.pkl')\n", "except:\n", " pass" - ], - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-12-14 13:09:45,386 INFO numerapi.utils: target file already exists\n", - "2025-12-14 13:09:45,387 INFO numerapi.utils: download complete\n", - "2025-12-14 13:09:46,291 INFO numerapi.utils: target file already exists\n", - "2025-12-14 13:09:46,291 INFO numerapi.utils: download complete\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001259 seconds.\n", - "You can set `force_row_wise=true` to remove the overhead.\n", - "And if memory is not enough, you can set `force_col_wise=true`.\n", - "[LightGBM] [Info] Total Bins 210\n", - "[LightGBM] [Info] Number of data points in the train set: 688184, number of used features: 42\n", - "[LightGBM] [Info] Start training from score 0.499946\n" - ] - } - ], - "execution_count": 3 + ] } ], "metadata": { diff --git a/feature_neutralization.ipynb b/feature_neutralization.ipynb index 9c2edd2..adf36fe 100644 --- a/feature_neutralization.ipynb +++ b/feature_neutralization.ipynb @@ -21,20 +21,18 @@ }, { "cell_type": "code", + "execution_count": 1, "metadata": { + "ExecuteTime": { + "end_time": "2025-12-14T22:31:02.231100Z", + "start_time": "2025-12-14T22:31:02.072885Z" + }, "colab": { "base_uri": "https://localhost:8080/" }, "id": "ws4qrSssFC9T", - "outputId": "3860d6e5-38ec-4638-82b2-bce4c7365966", - "ExecuteTime": { - "end_time": "2025-12-14T22:31:02.231100Z", - "start_time": "2025-12-14T22:31:02.072885Z" - } + "outputId": "3860d6e5-38ec-4638-82b2-bce4c7365966" }, - "source": [ - "!python --version" - ], "outputs": [ { "name": "stdout", @@ -44,44 +42,46 @@ ] } ], - "execution_count": 1 + "source": [ + "!python --version" + ] }, { "cell_type": "code", + "execution_count": 2, "metadata": { + "ExecuteTime": { + "end_time": "2025-12-14T22:31:03.976313Z", + "start_time": "2025-12-14T22:31:02.232527Z" + }, "colab": { "base_uri": "https://localhost:8080/" }, "id": "iHzZde7Tyu-N", - "outputId": "f9cb52f5-88f3-4776-a1be-cef458e718f5", - "ExecuteTime": { - "end_time": "2025-12-14T22:31:03.976313Z", - "start_time": "2025-12-14T22:31:02.232527Z" - } + "outputId": "f9cb52f5-88f3-4776-a1be-cef458e718f5" }, - "source": [ - "# Install dependencies\n", - "!pip install -q --upgrade numerapi pandas pyarrow matplotlib lightgbm scikit-learn scipy cloudpickle==3.1.1\n", - "!pip install -q --no-deps numerai-tools\n", - "\n", - "# Inline plots\n", - "%matplotlib inline" - ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m25.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.3\u001B[0m\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.3\u001b[0m\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n", "\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m25.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.3\u001B[0m\r\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n" + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.3\u001b[0m\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n" ] } ], - "execution_count": 2 + "source": [ + "# Install dependencies\n", + "!pip install -q --upgrade numerapi pandas pyarrow matplotlib lightgbm scikit-learn scipy cloudpickle==3.1.1\n", + "!pip install -q --no-deps numerai-tools\n", + "\n", + "# Inline plots\n", + "%matplotlib inline" + ] }, { "cell_type": "markdown", @@ -108,59 +108,19 @@ }, { "cell_type": "code", + "execution_count": 3, "metadata": { + "ExecuteTime": { + "end_time": "2025-12-14T22:31:04.680824Z", + "start_time": "2025-12-14T22:31:03.977656Z" + }, "colab": { "base_uri": "https://localhost:8080/", "height": 385 }, "id": "JTN8-MUmyu-P", - "outputId": "b8d0557f-ae8f-48e8-e707-806ac4683ad4", - "ExecuteTime": { - "end_time": "2025-12-14T22:31:04.680824Z", - "start_time": "2025-12-14T22:31:03.977656Z" - } + "outputId": "b8d0557f-ae8f-48e8-e707-806ac4683ad4" }, - "source": [ - "import json\n", - "import pandas as pd\n", - "from numerapi import NumerAPI\n", - "\n", - "# initialize our API client\n", - "napi = NumerAPI()\n", - "\n", - "# Set data version to one of the latest datasets\n", - "DATA_VERSION = \"v5.2\"\n", - "\n", - "napi.download_dataset(f\"{DATA_VERSION}/features.json\")\n", - "feature_metadata = json.load(open(f\"{DATA_VERSION}/features.json\"))\n", - "feature_sets = feature_metadata[\"feature_sets\"]\n", - "\n", - "sizes = [\"small\", \"medium\", \"all\"]\n", - "groups = [\n", - " \"intelligence\",\n", - " \"wisdom\",\n", - " \"charisma\",\n", - " \"dexterity\",\n", - " \"strength\",\n", - " \"constitution\",\n", - " \"agility\",\n", - " \"serenity\",\n", - " \"all\"\n", - "]\n", - "\n", - "# compile the intersections of feature sets and feature groups\n", - "subgroups = {}\n", - "for size in sizes:\n", - " subgroups[size] = {}\n", - " for group in groups:\n", - " subgroups[size][group] = (\n", - " set(feature_sets[size])\n", - " .intersection(set(feature_sets[group]))\n", - " )\n", - "\n", - "# convert to data frame and display the feature count of each intersection\n", - "pd.DataFrame(subgroups).applymap(len).sort_values(by=\"all\", ascending=False)" - ], "outputs": [ { "name": "stderr", @@ -174,18 +134,6 @@ }, { "data": { - "text/plain": [ - " small medium all\n", - "all 42 780 2748\n", - "constitution 2 134 335\n", - "charisma 3 116 290\n", - "agility 2 58 145\n", - "wisdom 3 56 140\n", - "strength 1 54 135\n", - "serenity 3 34 95\n", - "dexterity 4 21 51\n", - "intelligence 2 14 35" - ], "text/html": [ "
\n", "