From 0ca4ee952a2f46de761a1f437fef74189f5e5cf9 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 15 Jan 2026 17:05:55 +0000 Subject: [PATCH 1/2] feat(schemas): refactor job/policy schemas to use JSON Schema files - Add job.schema.json and policy.schema.json as standalone JSON Schema files - Update validation.py to support loading schemas from files and applying defaults during validation using a custom jsonschema validator - Refactor job_schema.py and policy_schema.py to load from JSON Schema files while maintaining backward compatibility - Update parser.py and policy_parser.py to use validate_and_set_defaults which fills in default values from the schema - Add yaml-language-server schema declarations to all job.yml and policy.yml files for IDE support --- .deepwork.policy.yml | 1 + .deepwork/jobs/add_platform/job.yml | 1 + .deepwork/jobs/deepwork_jobs/job.yml | 1 + .deepwork/jobs/deepwork_policy/job.yml | 1 + src/deepwork/core/parser.py | 6 +- src/deepwork/core/policy_parser.py | 6 +- src/deepwork/schemas/job.schema.json | 221 +++++++++++++++++ src/deepwork/schemas/job_schema.py | 234 +++--------------- src/deepwork/schemas/policy.schema.json | 69 ++++++ src/deepwork/schemas/policy_schema.py | 106 +++----- .../standard_jobs/deepwork_jobs/job.yml | 1 + .../standard_jobs/deepwork_policy/job.yml | 1 + src/deepwork/utils/validation.py | 175 ++++++++++++- tests/fixtures/jobs/complex_job/job.yml | 1 + tests/fixtures/jobs/simple_job/job.yml | 1 + tests/fixtures/policies/multiple_policies.yml | 1 + .../policy_with_instructions_file.yml | 1 + tests/fixtures/policies/valid_policy.yml | 1 + 18 files changed, 542 insertions(+), 286 deletions(-) create mode 100644 src/deepwork/schemas/job.schema.json create mode 100644 src/deepwork/schemas/policy.schema.json diff --git a/.deepwork.policy.yml b/.deepwork.policy.yml index f2721da..f6df3a3 100644 --- a/.deepwork.policy.yml +++ b/.deepwork.policy.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=src/deepwork/schemas/policy.schema.json - name: "README Accuracy" trigger: "src/**/*" safety: "README.md" diff --git a/.deepwork/jobs/add_platform/job.yml b/.deepwork/jobs/add_platform/job.yml index 0754474..8451ece 100644 --- a/.deepwork/jobs/add_platform/job.yml +++ b/.deepwork/jobs/add_platform/job.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../src/deepwork/schemas/job.schema.json name: add_platform version: "0.1.0" summary: "Add a new AI platform to DeepWork with adapter, templates, and tests" diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml index 4bb8656..fd3ec07 100644 --- a/.deepwork/jobs/deepwork_jobs/job.yml +++ b/.deepwork/jobs/deepwork_jobs/job.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../src/deepwork/schemas/job.schema.json name: deepwork_jobs version: "0.2.0" summary: "DeepWork job management commands" diff --git a/.deepwork/jobs/deepwork_policy/job.yml b/.deepwork/jobs/deepwork_policy/job.yml index 0aacc87..0968e9f 100644 --- a/.deepwork/jobs/deepwork_policy/job.yml +++ b/.deepwork/jobs/deepwork_policy/job.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../src/deepwork/schemas/job.schema.json name: deepwork_policy version: "0.1.0" summary: "Policy enforcement for AI agent sessions" diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py index 42fba81..887d271 100644 --- a/src/deepwork/core/parser.py +++ b/src/deepwork/core/parser.py @@ -5,7 +5,7 @@ from typing import Any from deepwork.schemas.job_schema import JOB_SCHEMA, LIFECYCLE_HOOK_EVENTS -from deepwork.utils.validation import ValidationError, validate_against_schema +from deepwork.utils.validation import ValidationError, validate_and_set_defaults from deepwork.utils.yaml_utils import YAMLError, load_yaml @@ -294,9 +294,9 @@ def parse_job_definition(job_dir: Path | str) -> JobDefinition: if job_data is None: raise ParseError("job.yml is empty") - # Validate against schema + # Validate against schema and apply defaults try: - validate_against_schema(job_data, JOB_SCHEMA) + validate_and_set_defaults(job_data, JOB_SCHEMA) except ValidationError as e: raise ParseError(f"Job definition validation failed: {e}") from e diff --git a/src/deepwork/core/policy_parser.py b/src/deepwork/core/policy_parser.py index b6ade99..80120aa 100644 --- a/src/deepwork/core/policy_parser.py +++ b/src/deepwork/core/policy_parser.py @@ -8,7 +8,7 @@ import yaml from deepwork.schemas.policy_schema import POLICY_SCHEMA -from deepwork.utils.validation import ValidationError, validate_against_schema +from deepwork.utils.validation import ValidationError, validate_and_set_defaults class PolicyParseError(Exception): @@ -280,9 +280,9 @@ def parse_policy_file(policy_path: Path | str, base_dir: Path | None = None) -> f"Policy file must contain a list of policies, got {type(policy_data).__name__}" ) - # Validate against schema + # Validate against schema and apply defaults try: - validate_against_schema(policy_data, POLICY_SCHEMA) + validate_and_set_defaults(policy_data, POLICY_SCHEMA) except ValidationError as e: raise PolicyParseError(f"Policy definition validation failed: {e}") from e diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json new file mode 100644 index 0000000..c0d2c97 --- /dev/null +++ b/src/deepwork/schemas/job.schema.json @@ -0,0 +1,221 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://deepwork.dev/schemas/job.schema.json", + "title": "DeepWork Job Definition", + "description": "Schema for DeepWork job.yml files that define multi-step workflows", + "type": "object", + "required": ["name", "version", "summary", "steps"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "pattern": "^[a-z][a-z0-9_]*$", + "description": "Job name (lowercase letters, numbers, underscores, must start with letter)" + }, + "version": { + "type": "string", + "pattern": "^\\d+\\.\\d+\\.\\d+$", + "description": "Semantic version (e.g., 1.0.0)" + }, + "summary": { + "type": "string", + "minLength": 1, + "maxLength": 200, + "description": "Brief one-line summary of what this job accomplishes" + }, + "description": { + "type": "string", + "minLength": 1, + "description": "Detailed multi-line description of the job's purpose, process, and goals" + }, + "changelog": { + "type": "array", + "default": [], + "description": "Version history and changes to the job", + "items": { + "type": "object", + "required": ["version", "changes"], + "additionalProperties": false, + "properties": { + "version": { + "type": "string", + "pattern": "^\\d+\\.\\d+\\.\\d+$", + "description": "Version number for this change" + }, + "changes": { + "type": "string", + "minLength": 1, + "description": "Description of changes made in this version" + } + } + } + }, + "steps": { + "type": "array", + "minItems": 1, + "description": "List of steps in the job", + "items": { + "$ref": "#/$defs/step" + } + } + }, + "$defs": { + "hookAction": { + "type": "object", + "description": "A hook action - one of: inline prompt, prompt file reference, or shell script", + "oneOf": [ + { + "required": ["prompt"], + "additionalProperties": false, + "properties": { + "prompt": { + "type": "string", + "minLength": 1, + "description": "Inline prompt for validation/action" + } + } + }, + { + "required": ["prompt_file"], + "additionalProperties": false, + "properties": { + "prompt_file": { + "type": "string", + "minLength": 1, + "description": "Path to prompt file (relative to job directory)" + } + } + }, + { + "required": ["script"], + "additionalProperties": false, + "properties": { + "script": { + "type": "string", + "minLength": 1, + "description": "Path to shell script (relative to job directory)" + } + } + } + ] + }, + "userInput": { + "type": "object", + "description": "User parameter input", + "required": ["name", "description"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Input parameter name" + }, + "description": { + "type": "string", + "description": "Input parameter description" + } + } + }, + "fileInput": { + "type": "object", + "description": "File input from a previous step", + "required": ["file", "from_step"], + "additionalProperties": false, + "properties": { + "file": { + "type": "string", + "description": "File name from previous step" + }, + "from_step": { + "type": "string", + "description": "Step ID that produces this file" + } + } + }, + "step": { + "type": "object", + "description": "A single step in the job workflow", + "required": ["id", "name", "description", "instructions_file", "outputs"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "pattern": "^[a-z][a-z0-9_]*$", + "description": "Step ID (unique within job)" + }, + "name": { + "type": "string", + "minLength": 1, + "description": "Human-readable step name" + }, + "description": { + "type": "string", + "minLength": 1, + "description": "Step description" + }, + "instructions_file": { + "type": "string", + "minLength": 1, + "description": "Path to instructions file (relative to job directory)" + }, + "inputs": { + "type": "array", + "default": [], + "description": "List of inputs (user parameters or files from previous steps)", + "items": { + "oneOf": [ + { "$ref": "#/$defs/userInput" }, + { "$ref": "#/$defs/fileInput" } + ] + } + }, + "outputs": { + "type": "array", + "description": "List of output files/directories", + "items": { + "type": "string", + "minLength": 1 + } + }, + "dependencies": { + "type": "array", + "default": [], + "description": "List of step IDs this step depends on", + "items": { + "type": "string" + } + }, + "hooks": { + "type": "object", + "default": {}, + "description": "Lifecycle hooks for this step, keyed by event type", + "additionalProperties": false, + "properties": { + "after_agent": { + "type": "array", + "default": [], + "description": "Hooks triggered after the agent finishes (quality validation)", + "items": { "$ref": "#/$defs/hookAction" } + }, + "before_tool": { + "type": "array", + "default": [], + "description": "Hooks triggered before a tool is used", + "items": { "$ref": "#/$defs/hookAction" } + }, + "before_prompt": { + "type": "array", + "default": [], + "description": "Hooks triggered when user submits a prompt", + "items": { "$ref": "#/$defs/hookAction" } + } + } + }, + "stop_hooks": { + "type": "array", + "description": "DEPRECATED: Use hooks.after_agent instead. Stop hooks for quality validation loops.", + "items": { "$ref": "#/$defs/hookAction" } + } + } + } + } +} diff --git a/src/deepwork/schemas/job_schema.py b/src/deepwork/schemas/job_schema.py index 4127abb..075ab1f 100644 --- a/src/deepwork/schemas/job_schema.py +++ b/src/deepwork/schemas/job_schema.py @@ -1,212 +1,38 @@ -"""JSON Schema definition for job definitions.""" +"""JSON Schema definition for job definitions. + +The schema is loaded from the JSON Schema file job.schema.json. +This module provides backward-compatible access to the schema and related constants. +""" from typing import Any +from deepwork.utils.validation import load_schema + # Supported lifecycle hook events (generic names, mapped to platform-specific by adapters) # These values must match CommandLifecycleHook enum in adapters.py LIFECYCLE_HOOK_EVENTS = ["after_agent", "before_tool", "before_prompt"] -# Schema definition for a single hook action (prompt, prompt_file, or script) -HOOK_ACTION_SCHEMA: dict[str, Any] = { - "type": "object", - "oneOf": [ - { - "required": ["prompt"], - "properties": { - "prompt": { - "type": "string", - "minLength": 1, - "description": "Inline prompt for validation/action", - }, - }, - "additionalProperties": False, - }, - { - "required": ["prompt_file"], - "properties": { - "prompt_file": { - "type": "string", - "minLength": 1, - "description": "Path to prompt file (relative to job directory)", - }, - }, - "additionalProperties": False, - }, - { - "required": ["script"], - "properties": { - "script": { - "type": "string", - "minLength": 1, - "description": "Path to shell script (relative to job directory)", - }, - }, - "additionalProperties": False, - }, - ], -} +# Schema name for loading (corresponds to job.schema.json) +JOB_SCHEMA_NAME = "job.schema" + + +def get_job_schema() -> dict[str, Any]: + """ + Load and return the job schema from the JSON Schema file. + + Returns: + The job JSON Schema as a dictionary + """ + return load_schema(JOB_SCHEMA_NAME) + -# JSON Schema for job.yml files -JOB_SCHEMA: dict[str, Any] = { - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "required": ["name", "version", "summary", "steps"], - "properties": { - "name": { - "type": "string", - "pattern": "^[a-z][a-z0-9_]*$", - "description": "Job name (lowercase letters, numbers, underscores, must start with letter)", - }, - "version": { - "type": "string", - "pattern": r"^\d+\.\d+\.\d+$", - "description": "Semantic version (e.g., 1.0.0)", - }, - "summary": { - "type": "string", - "minLength": 1, - "maxLength": 200, - "description": "Brief one-line summary of what this job accomplishes", - }, - "description": { - "type": "string", - "minLength": 1, - "description": "Detailed multi-line description of the job's purpose, process, and goals", - }, - "changelog": { - "type": "array", - "description": "Version history and changes to the job", - "items": { - "type": "object", - "required": ["version", "changes"], - "properties": { - "version": { - "type": "string", - "pattern": r"^\d+\.\d+\.\d+$", - "description": "Version number for this change", - }, - "changes": { - "type": "string", - "minLength": 1, - "description": "Description of changes made in this version", - }, - }, - "additionalProperties": False, - }, - }, - "steps": { - "type": "array", - "minItems": 1, - "description": "List of steps in the job", - "items": { - "type": "object", - "required": ["id", "name", "description", "instructions_file", "outputs"], - "properties": { - "id": { - "type": "string", - "pattern": "^[a-z][a-z0-9_]*$", - "description": "Step ID (unique within job)", - }, - "name": { - "type": "string", - "minLength": 1, - "description": "Human-readable step name", - }, - "description": { - "type": "string", - "minLength": 1, - "description": "Step description", - }, - "instructions_file": { - "type": "string", - "minLength": 1, - "description": "Path to instructions file (relative to job directory)", - }, - "inputs": { - "type": "array", - "description": "List of inputs (user parameters or files from previous steps)", - "items": { - "type": "object", - "oneOf": [ - { - "required": ["name", "description"], - "properties": { - "name": { - "type": "string", - "description": "Input parameter name", - }, - "description": { - "type": "string", - "description": "Input parameter description", - }, - }, - "additionalProperties": False, - }, - { - "required": ["file", "from_step"], - "properties": { - "file": { - "type": "string", - "description": "File name from previous step", - }, - "from_step": { - "type": "string", - "description": "Step ID that produces this file", - }, - }, - "additionalProperties": False, - }, - ], - }, - }, - "outputs": { - "type": "array", - "description": "List of output files/directories", - "items": { - "type": "string", - "minLength": 1, - }, - }, - "dependencies": { - "type": "array", - "description": "List of step IDs this step depends on", - "items": { - "type": "string", - }, - "default": [], - }, - "hooks": { - "type": "object", - "description": "Lifecycle hooks for this step, keyed by event type", - "properties": { - "after_agent": { - "type": "array", - "description": "Hooks triggered after the agent finishes (quality validation)", - "items": HOOK_ACTION_SCHEMA, - }, - "before_tool": { - "type": "array", - "description": "Hooks triggered before a tool is used", - "items": HOOK_ACTION_SCHEMA, - }, - "before_prompt": { - "type": "array", - "description": "Hooks triggered when user submits a prompt", - "items": HOOK_ACTION_SCHEMA, - }, - }, - "additionalProperties": False, - }, - # DEPRECATED: Use hooks.after_agent instead - "stop_hooks": { - "type": "array", - "description": "DEPRECATED: Use hooks.after_agent instead. Stop hooks for quality validation loops.", - "items": HOOK_ACTION_SCHEMA, - }, - }, - "additionalProperties": False, - }, - }, - }, - "additionalProperties": False, -} +# For backward compatibility, expose JOB_SCHEMA as a module-level variable +# This is loaded lazily to avoid circular imports at module load time +def __getattr__(name: str) -> Any: + if name == "JOB_SCHEMA": + return get_job_schema() + if name == "HOOK_ACTION_SCHEMA": + # Return the hookAction definition from the schema + schema = get_job_schema() + return schema.get("$defs", {}).get("hookAction", {}) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/deepwork/schemas/policy.schema.json b/src/deepwork/schemas/policy.schema.json new file mode 100644 index 0000000..b74616c --- /dev/null +++ b/src/deepwork/schemas/policy.schema.json @@ -0,0 +1,69 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://deepwork.dev/schemas/policy.schema.json", + "title": "DeepWork Policy Definition", + "description": "Schema for .deepwork.policy.yml files that define policies triggering based on file changes", + "type": "array", + "items": { + "$ref": "#/$defs/policy" + }, + "$defs": { + "globPattern": { + "type": "string", + "minLength": 1, + "description": "A glob pattern for matching file paths (supports * and ** wildcards)" + }, + "globPatternOrArray": { + "description": "One or more glob patterns for matching file paths", + "oneOf": [ + { "$ref": "#/$defs/globPattern" }, + { + "type": "array", + "items": { "$ref": "#/$defs/globPattern" }, + "minItems": 1 + } + ] + }, + "policy": { + "type": "object", + "description": "A policy that triggers based on file changes", + "required": ["name", "trigger"], + "additionalProperties": false, + "oneOf": [ + { "required": ["instructions"] }, + { "required": ["instructions_file"] } + ], + "properties": { + "name": { + "type": "string", + "minLength": 1, + "description": "Friendly name for the policy" + }, + "trigger": { + "$ref": "#/$defs/globPatternOrArray", + "description": "Glob pattern(s) for files that, if changed, should trigger this policy" + }, + "safety": { + "$ref": "#/$defs/globPatternOrArray", + "description": "Glob pattern(s) for files that, if also changed, mean the policy doesn't need to trigger" + }, + "instructions": { + "type": "string", + "minLength": 1, + "description": "Instructions to give the agent when this policy triggers" + }, + "instructions_file": { + "type": "string", + "minLength": 1, + "description": "Path to a file containing instructions (alternative to inline instructions)" + }, + "compare_to": { + "type": "string", + "enum": ["base", "default_tip", "prompt"], + "default": "base", + "description": "What to compare against when detecting changed files. 'base' (default) compares to the base of the current branch. 'default_tip' compares to the tip of the default branch. 'prompt' compares to the state at the start of the prompt." + } + } + } + } +} diff --git a/src/deepwork/schemas/policy_schema.py b/src/deepwork/schemas/policy_schema.py index 5aa6ae8..9e62e59 100644 --- a/src/deepwork/schemas/policy_schema.py +++ b/src/deepwork/schemas/policy_schema.py @@ -1,78 +1,34 @@ -"""JSON Schema definition for policy definitions.""" +"""JSON Schema definition for policy definitions. + +The schema is loaded from the JSON Schema file policy.schema.json. +This module provides backward-compatible access to the schema and related constants. +""" from typing import Any -# JSON Schema for .deepwork.policy.yml files -# Policies are defined as an array of policy objects -POLICY_SCHEMA: dict[str, Any] = { - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "array", - "description": "List of policies that trigger based on file changes", - "items": { - "type": "object", - "required": ["name", "trigger"], - "properties": { - "name": { - "type": "string", - "minLength": 1, - "description": "Friendly name for the policy", - }, - "trigger": { - "oneOf": [ - { - "type": "string", - "minLength": 1, - "description": "Glob pattern for files that trigger this policy", - }, - { - "type": "array", - "items": {"type": "string", "minLength": 1}, - "minItems": 1, - "description": "List of glob patterns for files that trigger this policy", - }, - ], - "description": "Glob pattern(s) for files that, if changed, should trigger this policy", - }, - "safety": { - "oneOf": [ - { - "type": "string", - "minLength": 1, - "description": "Glob pattern for safety files", - }, - { - "type": "array", - "items": {"type": "string", "minLength": 1}, - "description": "List of glob patterns for safety files", - }, - ], - "description": "Glob pattern(s) for files that, if also changed, mean the policy doesn't need to trigger", - }, - "instructions": { - "type": "string", - "minLength": 1, - "description": "Instructions to give the agent when this policy triggers", - }, - "instructions_file": { - "type": "string", - "minLength": 1, - "description": "Path to a file containing instructions (alternative to inline instructions)", - }, - "compare_to": { - "type": "string", - "enum": ["base", "default_tip", "prompt"], - "description": ( - "What to compare against when detecting changed files. " - "'base' (default) compares to the base of the current branch. " - "'default_tip' compares to the tip of the default branch. " - "'prompt' compares to the state at the start of the prompt." - ), - }, - }, - "oneOf": [ - {"required": ["instructions"]}, - {"required": ["instructions_file"]}, - ], - "additionalProperties": False, - }, -} +from deepwork.utils.validation import load_schema + +# Valid compare_to values for policies +COMPARE_TO_VALUES = frozenset({"base", "default_tip", "prompt"}) +DEFAULT_COMPARE_TO = "base" + +# Schema name for loading (corresponds to policy.schema.json) +POLICY_SCHEMA_NAME = "policy.schema" + + +def get_policy_schema() -> dict[str, Any]: + """ + Load and return the policy schema from the JSON Schema file. + + Returns: + The policy JSON Schema as a dictionary + """ + return load_schema(POLICY_SCHEMA_NAME) + + +# For backward compatibility, expose POLICY_SCHEMA as a module-level variable +# This is loaded lazily to avoid circular imports at module load time +def __getattr__(name: str) -> Any: + if name == "POLICY_SCHEMA": + return get_policy_schema() + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index 4bb8656..648a15c 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../schemas/job.schema.json name: deepwork_jobs version: "0.2.0" summary: "DeepWork job management commands" diff --git a/src/deepwork/standard_jobs/deepwork_policy/job.yml b/src/deepwork/standard_jobs/deepwork_policy/job.yml index 0aacc87..17ae507 100644 --- a/src/deepwork/standard_jobs/deepwork_policy/job.yml +++ b/src/deepwork/standard_jobs/deepwork_policy/job.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../schemas/job.schema.json name: deepwork_policy version: "0.1.0" summary: "Policy enforcement for AI agent sessions" diff --git a/src/deepwork/utils/validation.py b/src/deepwork/utils/validation.py index 863f8a7..756aaed 100644 --- a/src/deepwork/utils/validation.py +++ b/src/deepwork/utils/validation.py @@ -1,9 +1,12 @@ """Validation utilities using JSON Schema.""" +import json +from pathlib import Path from typing import Any +from jsonschema import Draft7Validator, validate from jsonschema import ValidationError as JSONSchemaValidationError -from jsonschema import validate +from jsonschema.validators import extend class ValidationError(Exception): @@ -12,6 +15,124 @@ class ValidationError(Exception): pass +# Cache for loaded schemas +_schema_cache: dict[str, dict[str, Any]] = {} + +# Directory where schema files are stored +SCHEMAS_DIR = Path(__file__).parent.parent / "schemas" + + +def load_schema(schema_name: str) -> dict[str, Any]: + """ + Load a JSON Schema from file. + + Args: + schema_name: Name of the schema file (without .json extension) + e.g., "job.schema" or "policy.schema" + + Returns: + Parsed JSON Schema as a dictionary + + Raises: + FileNotFoundError: If schema file doesn't exist + json.JSONDecodeError: If schema file is invalid JSON + """ + if schema_name in _schema_cache: + return _schema_cache[schema_name] + + schema_path = SCHEMAS_DIR / f"{schema_name}.json" + if not schema_path.exists(): + raise FileNotFoundError(f"Schema file not found: {schema_path}") + + with open(schema_path, encoding="utf-8") as f: + schema = json.load(f) + + _schema_cache[schema_name] = schema + return dict(schema) + + +def _set_defaults(validator_class: type) -> type: + """ + Create a validator class that sets default values during validation. + + This extends a jsonschema validator to fill in default values from the schema + when they are not present in the data. + + Args: + validator_class: The base validator class to extend + + Returns: + Extended validator class that sets defaults + """ + validate_properties = validator_class.VALIDATORS["properties"] # type: ignore[attr-defined] + + def set_defaults_in_properties( + validator: Any, properties: dict[str, Any], instance: Any, schema: dict[str, Any] + ) -> Any: + """Validator that sets defaults before validating properties.""" + # Only process dicts + if not isinstance(instance, dict): + yield from validate_properties(validator, properties, instance, schema) + return + + # Set defaults for missing properties + for prop, subschema in properties.items(): + if prop not in instance and "default" in subschema: + instance[prop] = _deep_copy_default(subschema["default"]) + + # Continue with normal validation + yield from validate_properties(validator, properties, instance, schema) + + def set_defaults_in_items( + validator: Any, items: dict[str, Any], instance: Any, schema: dict[str, Any] + ) -> Any: + """Validator that processes defaults in array items.""" + # Only process lists + if not isinstance(instance, list): + yield from validator_class.VALIDATORS["items"]( # type: ignore[attr-defined] + validator, items, instance, schema + ) + return + + # For each item in the array, if it's a dict and items schema has properties, + # apply defaults recursively + if isinstance(items, dict) and "properties" in items: + for item in instance: + if isinstance(item, dict): + for prop, subschema in items["properties"].items(): + if prop not in item and "default" in subschema: + item[prop] = _deep_copy_default(subschema["default"]) + + # Continue with normal validation + yield from validator_class.VALIDATORS["items"]( # type: ignore[attr-defined] + validator, items, instance, schema + ) + + return extend( # type: ignore[no-any-return] + validator_class, + {"properties": set_defaults_in_properties, "items": set_defaults_in_items}, + ) + + +def _deep_copy_default(value: Any) -> Any: + """ + Create a deep copy of a default value. + + This ensures that mutable defaults (lists, dicts) don't get shared + between different instances. + """ + if isinstance(value, dict): + return {k: _deep_copy_default(v) for k, v in value.items()} + elif isinstance(value, list): + return [_deep_copy_default(item) for item in value] + else: + return value + + +# Create the default-setting validator +DefaultSettingValidator = _set_defaults(Draft7Validator) + + def validate_against_schema(data: dict[str, Any], schema: dict[str, Any]) -> None: """ Validate data against JSON Schema. @@ -29,3 +150,55 @@ def validate_against_schema(data: dict[str, Any], schema: dict[str, Any]) -> Non # Extract meaningful error message path = " -> ".join(str(p) for p in e.path) if e.path else "root" raise ValidationError(f"Validation error at {path}: {e.message}") from e + + +def validate_and_set_defaults(data: Any, schema: dict[str, Any]) -> Any: + """ + Validate data against JSON Schema and apply default values. + + This function mutates the input data in-place, adding default values + for any missing properties that have defaults defined in the schema. + + Args: + data: Data to validate (will be mutated in-place) + schema: JSON Schema to validate against + + Returns: + The input data with defaults applied (same object, mutated) + + Raises: + ValidationError: If validation fails + """ + try: + # The DefaultSettingValidator sets defaults as it validates + validator = DefaultSettingValidator(schema) + errors = list(validator.iter_errors(data)) + if errors: + # Report the first error + e = errors[0] + path = " -> ".join(str(p) for p in e.path) if e.path else "root" + raise ValidationError(f"Validation error at {path}: {e.message}") + except JSONSchemaValidationError as e: + path = " -> ".join(str(p) for p in e.path) if e.path else "root" + raise ValidationError(f"Validation error at {path}: {e.message}") from e + + return data + + +def validate_and_set_defaults_from_schema(data: Any, schema_name: str) -> Any: + """ + Load a schema by name and validate data with defaults. + + Args: + data: Data to validate (will be mutated in-place) + schema_name: Name of the schema file (e.g., "job.schema", "policy.schema") + + Returns: + The input data with defaults applied + + Raises: + ValidationError: If validation fails + FileNotFoundError: If schema file doesn't exist + """ + schema = load_schema(schema_name) + return validate_and_set_defaults(data, schema) diff --git a/tests/fixtures/jobs/complex_job/job.yml b/tests/fixtures/jobs/complex_job/job.yml index 7c1343d..21af3a2 100644 --- a/tests/fixtures/jobs/complex_job/job.yml +++ b/tests/fixtures/jobs/complex_job/job.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../../src/deepwork/schemas/job.schema.json name: competitive_research version: "0.1.0" summary: "Systematic competitive analysis workflow" diff --git a/tests/fixtures/jobs/simple_job/job.yml b/tests/fixtures/jobs/simple_job/job.yml index 1464229..ef01f7a 100644 --- a/tests/fixtures/jobs/simple_job/job.yml +++ b/tests/fixtures/jobs/simple_job/job.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../../src/deepwork/schemas/job.schema.json name: simple_job version: "0.1.0" summary: "A simple single-step job for testing" diff --git a/tests/fixtures/policies/multiple_policies.yml b/tests/fixtures/policies/multiple_policies.yml index da29231..ae0d008 100644 --- a/tests/fixtures/policies/multiple_policies.yml +++ b/tests/fixtures/policies/multiple_policies.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../src/deepwork/schemas/policy.schema.json - name: "Update install guide on config changes" trigger: "app/config/**/*" safety: "docs/install_guide.md" diff --git a/tests/fixtures/policies/policy_with_instructions_file.yml b/tests/fixtures/policies/policy_with_instructions_file.yml index 267bfc6..64f0001 100644 --- a/tests/fixtures/policies/policy_with_instructions_file.yml +++ b/tests/fixtures/policies/policy_with_instructions_file.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../src/deepwork/schemas/policy.schema.json - name: "Security review" trigger: "src/auth/**/*" instructions_file: "instructions/security_review.md" diff --git a/tests/fixtures/policies/valid_policy.yml b/tests/fixtures/policies/valid_policy.yml index a2b0b6b..bcab0f1 100644 --- a/tests/fixtures/policies/valid_policy.yml +++ b/tests/fixtures/policies/valid_policy.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=../../../src/deepwork/schemas/policy.schema.json - name: "Update install guide on config changes" trigger: "app/config/**/*" safety: "docs/install_guide.md" From e4d4363c0708b34611b06b74f0c690e4690b2772 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 15 Jan 2026 19:08:25 +0000 Subject: [PATCH 2/2] chore: update uv.lock --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index d780f0d..ccd8b04 100644 --- a/uv.lock +++ b/uv.lock @@ -126,7 +126,7 @@ toml = [ [[package]] name = "deepwork" -version = "0.1.0" +version = "0.1.1" source = { editable = "." } dependencies = [ { name = "click" },