-
Notifications
You must be signed in to change notification settings - Fork 12
Implement Evaluator Versions #402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0a00bcd
d465a89
348bb58
acaa901
4b71ddb
3dbcd59
532e071
5e7a5fa
060d72c
bc31c9f
ea08062
f246087
6b53ac1
ec0c8ca
fc036f5
4566584
f103b69
9c3e417
ea673f4
26fbc2d
4702307
9d1bc74
3314bec
66f191a
165afe1
838c7a5
71599e6
0144c9f
c8774a6
2076f0a
8acdc35
432a649
ab04086
3c2db59
17eb18f
1fd66f7
fc4f913
2f88428
c6a8c51
a2165fb
1445d75
7969a6e
d4a445b
b3adfee
37f4856
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -243,3 +243,5 @@ package.json | |
| tau2-bench | ||
| *.err | ||
| eval-protocol | ||
|
|
||
| .vscode/launch.json | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| !launch.json.backup |
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| { | ||
| "version": "0.2.0", | ||
| "configurations": [ | ||
| { | ||
| "name": "EP: Upload", | ||
| "type": "python", | ||
| "request": "launch", | ||
| "module": "eval_protocol.cli", | ||
| "args": ["upload"], | ||
| "console": "integratedTerminal", | ||
| "justMyCode": false, | ||
| "cwd": "<REPLACE_WITH_YOUR_EVALUATOR_DIRECTORY>", | ||
| "env": { | ||
| "PYTHONPATH": "${workspaceFolder}", | ||
| "FIREWORKS_API_KEY": "${env:FIREWORKS_API_KEY}", | ||
| "FIREWORKS_BASE_URL": "${env:FIREWORKS_BASE_URL}", | ||
| "FIREWORKS_EXTRA_HEADERS": "{\"x-api-key\": \"${env:FIREWORKS_API_KEY}\", \"X-Fireworks-Gateway-Secret\": \"${env:FIREWORKS_GATEWAY_SECRET}\"}" | ||
| } | ||
| }, | ||
| { | ||
| "name": "EP: Local Test", | ||
| "type": "python", | ||
| "request": "launch", | ||
| "module": "eval_protocol.cli", | ||
| "args": ["local-test", "--ignore-docker"], | ||
| "console": "integratedTerminal", | ||
| "justMyCode": false, | ||
| "cwd": "<REPLACE_WITH_YOUR_EVALUATOR_DIRECTORY>", | ||
| "env": { | ||
| "PYTHONPATH": "${workspaceFolder}", | ||
| "FIREWORKS_API_KEY": "${env:FIREWORKS_API_KEY}", | ||
| "FIREWORKS_BASE_URL": "${env:FIREWORKS_BASE_URL}", | ||
| "FIREWORKS_EXTRA_HEADERS": "{\"x-api-key\": \"${env:FIREWORKS_API_KEY}\", \"X-Fireworks-Gateway-Secret\": \"${env:FIREWORKS_GATEWAY_SECRET}\"}" | ||
| } | ||
| }, | ||
| { | ||
| "name": "EP: Create RFT", | ||
| "type": "python", | ||
| "request": "launch", | ||
| "module": "eval_protocol.cli", | ||
| "args": [ | ||
| "create", | ||
| "rft", | ||
| "--base-model", | ||
| "accounts/fireworks/models/qwen3-0p6b", | ||
| "--chunk-size", | ||
| "10" | ||
| ], | ||
| "console": "integratedTerminal", | ||
| "justMyCode": false, | ||
| "cwd": "<REPLACE_WITH_YOUR_EVALUATOR_DIRECTORY>", | ||
| "env": { | ||
| "PYTHONPATH": "${workspaceFolder}", | ||
| "FIREWORKS_API_KEY": "${env:FIREWORKS_API_KEY}", | ||
| "FIREWORKS_BASE_URL": "${env:FIREWORKS_BASE_URL}", | ||
| "FIREWORKS_EXTRA_HEADERS": "{\"x-api-key\": \"${env:FIREWORKS_API_KEY}\", \"X-Fireworks-Gateway-Secret\": \"${env:FIREWORKS_GATEWAY_SECRET}\"}" | ||
| } | ||
| } | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,13 +81,12 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse | |
| "--env-file", | ||
| help="Path to .env file containing secrets to upload (default: .env in current directory)", | ||
| ) | ||
| upload_parser.add_argument( | ||
| "--force", | ||
| action="store_true", | ||
| help="Overwrite existing evaluator with the same ID", | ||
| ) | ||
|
|
||
| # Auto-generate flags from SDK Fireworks().evaluators.create() signature | ||
| # Note: We use Fireworks() directly here instead of create_fireworks_client() | ||
| # because we only need the method signature for introspection, not a fully | ||
| # authenticated client. create_fireworks_client() would trigger an HTTP request | ||
| # to verify the API key, causing delays even for --help invocations. | ||
| create_evaluator_fn = Fireworks().evaluators.create | ||
|
|
||
| upload_skip_fields = { | ||
|
|
@@ -137,7 +136,6 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse | |
|
|
||
| rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode") | ||
| rft_parser.add_argument("--dry-run", action="store_true", help="Print planned SDK call without sending") | ||
| rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing
|
||
| rft_parser.add_argument("--skip-validation", action="store_true", help="Skip local dataset/evaluator validation") | ||
| rft_parser.add_argument( | ||
| "--ignore-docker", | ||
|
|
@@ -198,6 +196,10 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse | |
| "loss_config.method": "RL loss method for underlying trainers. One of {grpo,dapo}.", | ||
| } | ||
|
|
||
| # Note: We use Fireworks() directly here instead of create_fireworks_client() | ||
| # because we only need the method signature for introspection, not a fully | ||
| # authenticated client. create_fireworks_client() would trigger an HTTP request | ||
| # to verify the API key, causing delays even for --help invocations. | ||
| create_rft_job_fn = Fireworks().reinforcement_fine_tuning_jobs.create | ||
|
|
||
| add_args_from_callable_signature( | ||
|
|
@@ -208,6 +210,78 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse | |
| help_overrides=help_overrides, | ||
| ) | ||
|
|
||
| # Create evj (Evaluation Job) subcommand | ||
| evj_parser = create_subparsers.add_parser( | ||
| "evj", | ||
| help="Create an Evaluation Job on Fireworks", | ||
| ) | ||
|
|
||
| evj_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode") | ||
| evj_parser.add_argument("--dry-run", action="store_true", help="Print planned SDK call without sending") | ||
| evj_parser.add_argument("--skip-validation", action="store_true", help="Skip local dataset/evaluator validation") | ||
| evj_parser.add_argument( | ||
| "--ignore-docker", | ||
| action="store_true", | ||
| help="Ignore Dockerfile even if present; run pytest on host during evaluator validation", | ||
| ) | ||
| evj_parser.add_argument( | ||
| "--docker-build-extra", | ||
| default="", | ||
| metavar="", | ||
| help="Extra flags to pass to 'docker build' when validating evaluator (quoted string, e.g. \"--no-cache --pull --progress=plain\")", | ||
| ) | ||
| evj_parser.add_argument( | ||
| "--docker-run-extra", | ||
| default="", | ||
| metavar="", | ||
| help="Extra flags to pass to 'docker run' when validating evaluator (quoted string, e.g. \"--env-file .env --memory=8g\")", | ||
| ) | ||
| evj_parser.add_argument( | ||
| "--quiet", | ||
| action="store_true", | ||
| help="If set, only errors will be printed.", | ||
| ) | ||
|
|
||
| # Auto-generate flags from SDK Fireworks().evaluation_jobs.create() signature | ||
| create_evj_fn = Fireworks().evaluation_jobs.create | ||
|
|
||
| evj_skip_fields = { | ||
| "__top_level__": { | ||
| "account_id", # auto-detected | ||
| "extra_headers", | ||
| "extra_query", | ||
| "extra_body", | ||
| "timeout", | ||
| }, | ||
| "evaluation_job": { | ||
| "output_stats", # read-only, set by server | ||
| }, | ||
| } | ||
| evj_aliases = { | ||
| "evaluation_job_id": ["--job-id"], | ||
| "evaluation_job.evaluator": ["--evaluator"], | ||
| "evaluation_job.input_dataset": ["--dataset"], # --input-dataset is auto-added | ||
| "evaluation_job.display_name": ["--name"], | ||
| # output_dataset, evaluator_version get their short forms auto-added | ||
| } | ||
| evj_help_overrides = { | ||
| "evaluation_job_id": "Evaluation Job ID to use", | ||
| "evaluation_job.evaluator": "Evaluator resource name (format: accounts/{account_id}/evaluators/{evaluator_id})", | ||
| "evaluation_job.input_dataset": "Input dataset resource name (format: accounts/{account_id}/datasets/{dataset_id})", | ||
| "evaluation_job.output_dataset": "Output dataset resource name where results will be written", | ||
| "evaluation_job.display_name": "Display name for the evaluation job", | ||
| "evaluation_job.evaluator_version": "Specific evaluator version to use (defaults to current version)", | ||
| "leaderboard_ids": "Optional leaderboard IDs to attach this job to upon creation", | ||
| } | ||
|
|
||
| add_args_from_callable_signature( | ||
| evj_parser, | ||
| create_evj_fn, | ||
| skip_fields=evj_skip_fields, | ||
| aliases=evj_aliases, | ||
| help_overrides=evj_help_overrides, | ||
| ) | ||
|
|
||
| # Local test command | ||
| local_test_parser = subparsers.add_parser( | ||
| "local-test", | ||
|
|
@@ -349,7 +423,11 @@ def _extract_flag_value(argv_list, flag_name): | |
| from .cli_commands.create_rft import create_rft_command | ||
|
|
||
| return create_rft_command(args) | ||
| print("Error: missing subcommand for 'create'. Try: eval-protocol create rft") | ||
| elif args.create_command == "evj": | ||
| from .cli_commands.create_evj import create_evj_command | ||
|
|
||
| return create_evj_command(args) | ||
| print("Error: missing subcommand for 'create'. Try: eval-protocol create rft|evj") | ||
| return 1 | ||
| elif args.command == "local-test": | ||
| from .cli_commands.local_test import local_test_command | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.