skills/tests/scenarios/easy/add-factorial.yaml

# Easy scenario: Add a simple function to existing codebase
id: add-factorial
title: Add factorial function
difficulty: easy
tags: [python, new-feature, single-file]

fixture:
  source: python-math-lib

task:
  description: |
    Add a function `factorial(n)` to `src/math_utils.py` that computes
    the factorial of a non-negative integer.

    Requirements:
    - factorial(0) should return 1
    - factorial(5) should return 120
    - Should raise ValueError for negative inputs

  entry_point: src/math_utils.py

execution:
  mode: both
  timeout: 5m

  scripted:
    actions:
      - type: worker
        command: start
      - type: edit
        path: src/math_utils.py
        old: |
          # Math utilities
        new: |
          # Math utilities

          def factorial(n: int) -> int:
              """Compute factorial of non-negative integer."""
              if n < 0:
                  raise ValueError("factorial not defined for negative numbers")
              if n <= 1:
                  return 1
              return n * factorial(n - 1)
      - type: shell
        run: git add -A && git commit -m "Add factorial function"
      - type: worker
        command: done

verify:
  properties:
    - type: file_contains
      path: src/math_utils.py
      pattern: "def factorial"

    - type: function_defined
      path: src/math_utils.py
      name: factorial
      language: python

    - type: tests_pass
      command: pytest tests/ -v

    - type: custom
      command: |
        python -c "
        from src.math_utils import factorial
        assert factorial(0) == 1
        assert factorial(5) == 120
        try:
            factorial(-1)
            exit(1)  # Should have raised
        except ValueError:
            pass
        "

  llm_judge:
    enabled: true
    model: haiku
    rubric:
      - criterion: Function correctly computes factorial for typical inputs
        weight: 1.0
      - criterion: Handles edge case n=0 correctly
        weight: 0.5
      - criterion: Handles negative input with appropriate error
        weight: 0.5
      - criterion: Code is idiomatic Python with type hints
        weight: 0.3
    threshold: 0.7

  human:
    required: false

benchmark:
  enabled: true
  runs: 5