From 0e452bb22109abd027334504a57482c51573b63b Mon Sep 17 00:00:00 2001 From: SandyYuan Date: Fri, 16 Jan 2026 06:50:30 +0000 Subject: [PATCH 1/7] Add IFEval reward function for instruction-following evaluation - 112 total constraints (54 IFEval/IFTrain + 58 IFBench OOD) - Self-contained module with no external repo dependencies - Partial credit scoring (fraction of constraints satisfied) - Automatic tag stripping for reasoning models --- eval_protocol/rewards/ifeval/README.md | 33 + eval_protocol/rewards/ifeval/__init__.py | 18 + .../rewards/ifeval/ifbench_instructions.py | 2257 ++++++++++++++ .../rewards/ifeval/ifbench_registry.py | 83 + eval_protocol/rewards/ifeval/ifbench_util.py | 1649 +++++++++++ .../rewards/ifeval/ifeval_instructions.py | 2614 +++++++++++++++++ .../rewards/ifeval/ifeval_registry.py | 315 ++ eval_protocol/rewards/ifeval/ifeval_util.py | 1665 +++++++++++ eval_protocol/rewards/ifeval/reward.py | 101 + 9 files changed, 8735 insertions(+) create mode 100644 eval_protocol/rewards/ifeval/README.md create mode 100644 eval_protocol/rewards/ifeval/__init__.py create mode 100644 eval_protocol/rewards/ifeval/ifbench_instructions.py create mode 100644 eval_protocol/rewards/ifeval/ifbench_registry.py create mode 100644 eval_protocol/rewards/ifeval/ifbench_util.py create mode 100644 eval_protocol/rewards/ifeval/ifeval_instructions.py create mode 100644 eval_protocol/rewards/ifeval/ifeval_registry.py create mode 100644 eval_protocol/rewards/ifeval/ifeval_util.py create mode 100644 eval_protocol/rewards/ifeval/reward.py diff --git a/eval_protocol/rewards/ifeval/README.md b/eval_protocol/rewards/ifeval/README.md new file mode 100644 index 00000000..8c382dd1 --- /dev/null +++ b/eval_protocol/rewards/ifeval/README.md @@ -0,0 +1,33 @@ +# IFEval Reward Function + +Evaluates how well model responses follow instruction constraints. Returns a partial credit score (0.0 to 1.0). + +## Quick Start + +```python +import sys +sys.path.insert(0, '/path/to/eval_protocol/rewards/ifeval') +from reward import ifeval_partial_credit_reward + +response = "Hello world! This is my response." +ground_truth = { + "instruction_id": ["keywords:existence"], + "kwargs": [{"keywords": ["hello", "world"]}] +} + +score = ifeval_partial_credit_reward(response, ground_truth) +# Score: 1.0 (all constraints satisfied) +``` + +## Dependencies + +```bash +pip install spacy nltk langdetect emoji syllapy immutabledict +python -m spacy download en_core_web_sm +``` + +## Notes + +- Automatically strips `...` tags before evaluation +- Ground truth can be a dict, list, or JSON string +- 112 total constraints (54 IFEval/IFTrain + 58 IFBench OOD) diff --git a/eval_protocol/rewards/ifeval/__init__.py b/eval_protocol/rewards/ifeval/__init__.py new file mode 100644 index 00000000..7b067d0c --- /dev/null +++ b/eval_protocol/rewards/ifeval/__init__.py @@ -0,0 +1,18 @@ +"""IFEval reward function for evaluating instruction-following capabilities. + +Usage: + # Option 1: Import spacy first to avoid cupy conflicts in some Docker environments + import spacy + from eval_protocol.rewards.ifeval import ifeval_partial_credit_reward + + # Option 2: Direct import (add ifeval dir to path) + import sys + sys.path.insert(0, '/path/to/eval_protocol/rewards/ifeval') + from reward import ifeval_partial_credit_reward + + score = ifeval_partial_credit_reward(response, ground_truth) +""" + +from .reward import ifeval_partial_credit_reward + +__all__ = ["ifeval_partial_credit_reward"] diff --git a/eval_protocol/rewards/ifeval/ifbench_instructions.py b/eval_protocol/rewards/ifeval/ifbench_instructions.py new file mode 100644 index 00000000..98dfbd92 --- /dev/null +++ b/eval_protocol/rewards/ifeval/ifbench_instructions.py @@ -0,0 +1,2257 @@ +# Copyright 2025 Allen Institute for AI. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library of instructions.""" + +import logging +import random +import re +import string +from typing import Dict, Optional, Sequence, Union +import nltk +import spacy +from spacy.cli import download +import emoji +import syllapy +import unicodedata +from collections import Counter +import csv +import io + +try: + from . import ifbench_util as instructions_util +except ImportError: + import ifbench_util as instructions_util + +download('en_core_web_sm') + +logger = logging.getLogger(__name__) + +_InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]] + +# The number of keywords. +_NUM_KEYWORDS = 2 + +# The number of words in the response. +_NUM_WORDS_LOWER_LIMIT = 100 +_NUM_WORDS_UPPER_LIMIT = 500 + +# The number of numbers. +_NUM_NUMBERS = 6 + +# Period length for periodic words. +_NUM_WORD_CYCLE = 30 + +# Maximum number of times a word can be repeated. +_MAX_REPEATS = 5 + +# Which sentence must contain a keyword. +_NUM_KEYWORD_SENTENCE = 20 + +# Minimum number of pronouns. +_NUM_PRONOUNS = 25 + +# The size of increment for lengths. +_NUM_INCREMENT = 5 + +# The number of coordinating conjunctions. +_NUM_CONJUNCTIONS = 6 + + +class Instruction: + """An instruction template.""" + + def __init__(self, instruction_id): + self.id = instruction_id + + def build_description(self, **kwargs): + raise NotImplementedError("`build_description` not implemented.") + + def get_instruction_args(self): + raise NotImplementedError("`get_instruction_args` not implemented.") + + def get_instruction_args_keys(self): + raise NotImplementedError("`get_instruction_args_keys` not implemented.") + + def check_following(self, value): + raise NotImplementedError("`check_following` not implemented.") + + +# Everything as follows is part of OOD IFEval + +class WordCountRangeChecker(Instruction): + """Word Count Range: The response must contain between X and Y words.""" + + def build_description(self, *, min_words=None, max_words=None): + """Build the instruction description. + + Args: + min_words: An integer specifying the minimum number of words contained in the response. + max_words: An integer specifying the maximum number of words contained in the response. + + Returns: + A string representing the instruction description. + """ + self._min_words = min_words + self._max_words = max_words + + if self._min_words is None or self._min_words < 0: + self._min_words = random.randint( + _NUM_WORDS_LOWER_LIMIT, _NUM_WORDS_UPPER_LIMIT + ) + + # Make the range small + if self._max_words is None or self._max_words < 0: + self._max_words = self._min_words + random.randint(int(self._min_words * 0.05), int(self._min_words * 0.1)) + + self._description_pattern = "The response must contain between {min_words} and {max_words} words." + + return self._description_pattern.format( + min_words=self._min_words, max_words=self._max_words + ) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"min_words": self._min_words, "max_words": self._max_words} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["min_words", "max_words"] + + def check_following(self, value): + """Checks if the response contains the expected number of words.""" + num_words = instructions_util.count_words(value) + return self._min_words <= num_words <= self._max_words + + +class UniqueWordCountChecker(Instruction): + """Unique Word Count: The response must contain X unique words.""" + + def build_description(self, *, N=None): + """Build the instruction description. + + Args: + n: An integer specifying the number of unique words contained in the response. + + Returns: + A string representing the instruction description. + """ + self._num_unique_words = N + + if self._num_unique_words is None or self._num_unique_words < 0: + self._num_unique_words = random.randint( + _NUM_WORDS_LOWER_LIMIT, _NUM_WORDS_UPPER_LIMIT + ) + + self._description_pattern = "Use at least {N} unique words in the response." + + return self._description_pattern.format(N=self._num_unique_words) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"N": self._num_unique_words} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["N"] + + def check_following(self, value): + """Checks if the response contains the expected number of unique words.""" + words = value.lower().split() + unique_words = set() + for word in words: + unique_words.add(word.strip(''.join(string.punctuation) + ' ')) + # Convert to set to get unique words + return len(unique_words) >= self._num_unique_words + + +class StopWordPercentageChecker(Instruction): + """Ensure that stop words constitute no more than {percentage}% of the total words in your response.""" + + def build_description(self, *, percentage=None): + """Build the instruction description. + + Args: + percentage: An integer specifying the percentage of stop words that are allowed in the response. + + Returns: + A string representing the instruction description. + """ + self._percentage = percentage + + if self._percentage is None or self._percentage < 0: + self._percentage = random.randint(1, 100) + + self._description_pattern = "Ensure that stop words constitute no more than {percentage}% of the total words in your response." + + return self._description_pattern.format(percentage=self._percentage) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"percentage": self._percentage} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["percentage"] + + def check_following(self, value): + """Checks if the response contains the expected percentage of stop words.""" + num_words = instructions_util.count_words(value) + num_stopwords = instructions_util.count_stopwords(value) + stopword_percentage = (num_stopwords / num_words) * 100 + return stopword_percentage <= self._percentage + + +class SentTypeRatioChecker(Instruction): + """Maintain a 2:1 ratio of declarative to interrogative sentences.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Maintain a 2:1 ratio of declarative to interrogative sentences." + nltk.download('punkt_tab') + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response contains the expected ratio of declarative to interrogative sentences.""" + # Split the text into sentences + sentences = instructions_util.split_into_sentences(value) + # Count the number of declarative and interrogative sentences + declarative_count = sum(1 for sentence in sentences if sentence.endswith('.')) + interrogative_count = sum(1 for sentence in sentences if sentence.endswith('?')) + # Check if the ratio is 2:1 + return declarative_count == 2 * interrogative_count + + +class SentBalanceChecker(Instruction): + """Ensure that the ratio of sentence types (declarative, interrogative, exclamatory) is balanced.""" + + def build_description(self): + """Build the instruction description.""" + nltk.download('punkt_tab') + self._description_pattern = "Ensure that the ratio of sentence types (declarative, interrogative, exclamatory) is balanced." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response contains a balanced ratio of sentence types.""" + # Split the text into sentences + sentences = instructions_util.split_into_sentences(value) + # Count the number of each sentence type + declarative_count = sum(1 for sentence in sentences if sentence.endswith('.')) + interrogative_count = sum(1 for sentence in sentences if sentence.endswith('?')) + exclamatory_count = sum(1 for sentence in sentences if sentence.endswith('!')) + # Check if the ratio of sentence types is balanced + return declarative_count == interrogative_count == exclamatory_count + + +class ConjunctionCountChecker(Instruction): + """Use at least {small_n} different coordinating conjunctions in the response.""" + + def build_description(self, *, small_n=None): + """Build the instruction description. + + Args: + small_n: An integer specifying the number of different coordinating conjunctions contained in the response. + + Returns: + A string representing the instruction description. + """ + self._num_conjunctions = small_n + + if self._num_conjunctions is None or self._num_conjunctions < 0: + self._num_conjunctions = random.randint(2, _NUM_CONJUNCTIONS) + + self._description_pattern = "Use at least {small_n} different coordinating conjunctions in the response." + + return self._description_pattern.format(small_n=self._num_conjunctions) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"small_n": self._num_conjunctions} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["small_n"] + + def check_following(self, value): + """Checks if the response contains the expected number of different coordinating conjunctions.""" + # Split the text into words + words = value.split() + # Count the number of coordinating conjunctions + conjunctions = [word for word in words if + word.strip(''.join(string.punctuation) + ' ').lower() in ['and', 'but', 'for', 'nor', 'or', + 'so', 'yet']] + unique_conjunctions = set(conjunctions) + return len(unique_conjunctions) >= self._num_conjunctions + + +class PersonNameCountChecker(Instruction): + """Mention at least {N} different person names in the response.""" + + def build_description(self, *, N=None): + """Build the instruction description. + + Args: + N: An integer specifying the minimum number of unique person names contained in the response. + + Returns: + A string representing the instruction description. + """ + self._num_person_names = N + + if self._num_person_names is None or self._num_person_names < 0: + self._num_person_names = random.randint(1, 50) + + self.nlp = spacy.load("en_core_web_sm") + + self._description_pattern = "Mention at least {N} different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan ." + return self._description_pattern.format(N=self._num_person_names) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"N": self._num_person_names} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["N"] + + def check_following(self, value): + """Checks if the response contains at least the expected number of unique person names.""" + person_name_list = ["Emma", "Liam", "Sophia", "Jackson", "Olivia", "Noah", "Ava", "Lucas", "Isabella", "Mason", + "Mia", "Ethan", "Charlotte", + "Alexander", + "Amelia", + "Benjamin", + "Harper", + "Leo", + "Zoe", + "Daniel", + "Chloe", + "Samuel", + "Lily", + "Matthew", + "Grace", + "Owen", + "Abigail", + "Gabriel", + "Ella", + "Jacob", + "Scarlett", + "Nathan", + "Victoria", + "Elijah", + "Layla", + "Nicholas", + "Audrey", + "David", + "Hannah", + "Christopher", + "Penelope", + "Thomas", + "Nora", + "Andrew", + "Aria", + "Joseph", + "Claire", + "Ryan", + "Stella", + "Jonathan" + ] + # Extract the named entities + person_names = [] + for name in person_name_list: + if name in value: + person_names.append(name) + unique_person_names = set(person_names) + + return len(unique_person_names) >= self._num_person_names + + +class NGramOverlapChecker(Instruction): + """Maintain a trigram overlap of {percentage}% (±2%) with the provided reference text.""" + + def build_description(self, *, reference_text=None, percentage=None): + """Build the instruction description. + + Args: + reference_text: A string representing the reference text. + percentage: An integer specifying the percent trigram overlap + to maintain in the response. + + Returns: + A string representing the instruction description. + """ + self._reference_text = reference_text + self._percentage = percentage + if self._percentage is None or self._percentage < 0: + self._percentage = random.randint(1, 100) + + self._description_pattern = "Maintain a trigram overlap of {percentage}% (±2%) with the provided reference text." + return self._description_pattern.format(percentage=self._percentage) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"reference_text": self._reference_text, "percentage": self._percentage} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["reference_text", "percentage"] + + def check_following(self, value): + """Checks if the response maintains a trigram overlap with the reference text within 2% of {percent}.""" + n = 3 + ngrams = set(nltk.ngrams(value, n)) + ref_ngrams = set(nltk.ngrams(self._reference_text, n)) + overlap = len(ngrams.intersection(ref_ngrams)) / len(ngrams) + return self._percentage - 2 <= overlap * 100 <= self._percentage + 2 + + +class NumbersCountChecker(Instruction): + """Include exactly {N} numbers in the response.""" + + def build_description(self, *, N=None): + """Build the instruction description. + + Args: + N: An integer specifying the exact number of numbers + that is required to appear in the response. + + Returns: + A string representing the instruction description. + """ + self._count_numbers = N + if self._count_numbers is None or self._count_numbers < 0: + self._count_numbers = random.randint(1, _NUM_NUMBERS) + + self._description_pattern = "Include exactly {N} numbers in the response." + return self._description_pattern.format(N=self._count_numbers) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"N": self._count_numbers} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["N"] + + def check_following(self, value): + """Checks if the response includes exactly {N} numbers.""" + # Strip punctuation to handle decimals and commas in numbers correctly + value = value.translate(str.maketrans('', '', string.punctuation)) + numbers = re.findall(r'\d+', value) + return len(numbers) == self._count_numbers + + +class AlphabetLoopChecker(Instruction): + """Each word must start with the next letter of the alphabet, looping back to 'A' after 'Z'.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Each word must start with the next letter of the alphabet, looping back to 'A' after 'Z'." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if each word of the response starts with the next letter of the alphabet.""" + value = value.translate(str.maketrans('', '', string.punctuation)) + words = value.strip(''.join(string.punctuation) + ' ').split() + alphabet = string.ascii_lowercase + correct_letter = words[0][0].lower() + if correct_letter not in alphabet: # numbers are fails + return False + for word in words[1:]: + word = word.strip(''.join(string.punctuation) + ' ').lower() + if not word: + continue + correct_letter = alphabet[(alphabet.index(correct_letter) + 1) % 26] + if word[0] != correct_letter: + return False + return True + + +class SingleVowelParagraphChecker(Instruction): + """Write a paragraph using words that contain only three type of vowels.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Write a paragraph using words that contain only three types of vowels." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if no more than three types of vowels are used in the response and the response is only 1 paragraph.""" + paragraphs = value.strip().split('\n') + if len(paragraphs) != 1: + return False + paragraph = paragraphs[0].lower() + + vowels = set('aeiou') + paragraph_vowels = set([char for char in paragraph if char in vowels]) + return len(paragraph_vowels) <= 3 + + +class ConsonantClusterChecker(Instruction): + """Ensure each word in your response has at least one consonant cluster (two or more consonants together).""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Ensure each word in your response has at least one consonant cluster (two or more consonants together)." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if each word in the response includes at least one consonant cluster.""" + words = value.lower().strip().split() + consonants = set('bcdfghjklmnpqrstvwxyz') + for word in words: + cluster = False + for i in range(len(word) - 1): + if word[i] in consonants and word[i + 1] in consonants: + cluster = True + break + if not cluster: + return False + return True + + +class IncrementingAlliterationChecker(Instruction): + """Each sentence must have a longer sequence of consecutive alliterative words than the previous one.""" + + def build_description(self): + """Build the instruction description.""" + nltk.download('punkt_tab') + self._description_pattern = "Each sentence must have a longer sequence of consecutive alliterative words than the previous one." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if each sentence in the response has more alliterative words (determined by common first letter) than the previous sentence.""" + sentences = instructions_util.split_into_sentences(value) + prev_alliteration = -1 + for sentence in sentences: + words = sentence.lower().split() + alliteration = 0 + prev_alliterative = False + new_words = [] + for word in words: + clean = word.lstrip(''.join(string.punctuation) + ' ') + if clean: + new_words.append(clean) + for i in range(len(new_words) - 1): + if new_words[i][0] == new_words[i + 1][0]: + if prev_alliterative: + alliteration += 1 + else: + alliteration += 2 + prev_alliterative = True + else: + prev_alliterative = False + if alliteration <= prev_alliteration: + return False + prev_alliteration = alliteration + return True + + +class PalindromeChecker(Instruction): + """Include at least 10 single-word palindromes, each at least 5 characters long.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Include at least 10 single-word palindromes, each at least 5 characters long." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response includes at least 10 single-word palindromes of length at least 5.""" + value = value.translate(str.maketrans('', '', string.punctuation)) + words = value.lower().split() + palindromes = [word for word in words if word == word[::-1] and len(word) >= 5] + return len(palindromes) >= 10 + + +class PunctuationCoverChecker(Instruction): + """Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!).""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!)." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response includes every standard punctuation mark at least once, including the interrobang (?!).""" + punctuation = {".", ",", "!", "?", ";", ":"} + if not ('!?' in value or '?!' in value or '‽' in value): + return False + new_value = value.replace('?!', '', 1) + if len(new_value) == len(value): + new_value = value.replace('!?', '', 1) + for char in new_value: + if char in punctuation: + punctuation.remove(char) + return not punctuation + + +class NestedParenthesesChecker(Instruction): + """Nest parentheses (and [brackets {and braces}]) at least 5 levels deep.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Nest parentheses (and [brackets {and braces}]) at least 5 levels deep." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response includes a correctly closed set of at least 5 nested brackets.""" + levels = [] + min_levels = 5 + max_depth = 0 + depth_stack = [] # Track depth per matched group + + for char in value: + if char in "([{": + levels.append(char) + if len(levels) > max_depth: + max_depth = len(levels) + elif char in ")]}": + if levels and ( + (levels[-1] == '(' and char == ')') or + (levels[-1] == '[' and char == ']') or + (levels[-1] == '{' and char == '}') + ): + levels.pop() + # Check if we just closed a group that reached 5+ depth + if max_depth >= min_levels and len(levels) < max_depth: + return True + else: + # Mismatch — reset + levels = [] + max_depth = 0 + + return False + + +class NestedQuotesChecker(Instruction): + """Include quotes within quotes within quotes, at least 3 levels deep, alternating between double quotes and single quotes.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Include quotes within quotes within quotes, at least 3 levels deep, alternating between double quotes and single quotes." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response includes nested quotes to at least 3 levels + alternating between " and ' starting with either character.""" + levels = [] + min_levels = 3 + reached_depth = 0 + current_depth = 0 + for char in value: + if len(levels) != 0 and char == levels[-1]: + levels.pop() + current_depth -= 1 + if reached_depth - current_depth >= min_levels: + return True + elif char == '"' or char == "'": + levels.append(char) + current_depth += 1 + if current_depth > reached_depth: + reached_depth = current_depth + return False + + +class PrimeLengthsChecker(Instruction): + """Use only words with lengths that are prime numbers.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Use only words with lengths that are prime numbers." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response only includes words with prime length.""" + value = value.translate(str.maketrans('', '', string.punctuation)) + words = value.split() + primes = set([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]) + for word in words: + if len(word) not in primes: + return False + return True + + +class OptionsResponseChecker(Instruction): + """Answer with one of the following options: {options}. Do not give any explanation.""" + + def build_description(self, *, options=None): + """Build the instruction description. + + Args: + options: A string specifying the permitted options for + the response. + + Returns: + A string representing the instruction description. + """ + # Options string may be: yes/no/maybe, I know or I don't know, a), b), c), d) + # Can be separated by "/", "or", "," + options_bank = ["yes/no/maybe", "I know or I don't know", "a), b), c), d)"] + if options is None: + options = random.choice(options_bank) + + # Be more strict about format for multiple choice letters than for text options + self._strict = False + if re.match(r"\W*[aA]\W*[bB]\W*[cC]\W*", options) is not None: + self._strict = True + if "/" in options: + separator = "/" + elif "or" in options: + separator = "or" + else: + separator = "," + self._options = [option.strip() for option in options.split(separator)] + self._options_text = options # in text, shouldn't be formatted as a list + self._description_pattern = "Answer with one of the following options: {options}. Do not give any explanation." + return self._description_pattern.format(options=self._options_text) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"options": self._options_text} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["options"] + + def check_following(self, value): + """Checks if the response is exactly one of {options}.""" + if self._strict: + return value in self._options + value = value.strip(''.join(string.punctuation) + ' ').lower() + for option in self._options: + if option.strip(''.join(string.punctuation) + ' ').lower() == value: + return True + return False + + +class NewLineWordsChecker(Instruction): + """Write each word on a new line.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Write each word on a new line." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response has each word on a new line.""" + value = value.translate(str.maketrans('', '', string.punctuation)) + lines = value.strip().split('\n') + while '' in lines: + lines.remove('') + return len(lines) == len(value.strip().split()) + + +class EmojiSentenceChecker(Instruction): + """Please use an emoji at the end of every sentence.""" + + def build_description(self): + """Build the instruction description.""" + nltk.download('punkt_tab') + self._description_pattern = "Please use an emoji at the end of every sentence." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response includes an emoji at the end of every sentence.""" + + sentences = instructions_util.split_into_sentences(value) + for i, sentence in enumerate(sentences): + stripped = sentence.translate(str.maketrans('', '', string.punctuation)).strip() + last_char = stripped[-1] + # because blank spaces are treated oddly + second_last_char = stripped[-2] if len(stripped) > 1 else stripped[-1] + if not emoji.is_emoji(last_char) and not emoji.is_emoji(second_last_char): + if i < len(sentences) - 1: + stripped = sentences[i + 1].translate(str.maketrans('', '', string.punctuation)).strip() + # fixed empty string + if not stripped: + return False + first_char = stripped[0] + if not emoji.is_emoji(first_char): + return False + else: + return False + return True + + +class CharacterCountUniqueWordsChecker(Instruction): + """Respond with three sentences, all containing the same number of characters but using all different words.""" + + def build_description(self): + """Build the instruction description.""" + nltk.download('punkt_tab') + self._description_pattern = "Respond with three sentences, all containing the same number of characters but using all different words." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response has exactly 3 sentences containing the same number of characters but different words.""" + sentences = instructions_util.split_into_sentences(value) + if len(sentences) != 3: + return False + char_count = len(sentences[0].strip()) + for sentence in sentences: + if len(sentence.strip()) != char_count: + return False + return True + + +class NthWordJapaneseChecker(Instruction): + """Every {N}th word of your response must be in Japanese.""" + + def build_description(self, *, N=None): + """Build the instruction description. + + Args: + N: An integer specifying the cycle length for + Japanese words to appear in the response. + + Returns: + A string representing the instruction description. + """ + self._japanese_position = N + if self._japanese_position is None or self._japanese_position < 0: + self._japanese_position = random.randint(1, _NUM_WORD_CYCLE) + + self._description_pattern = "Every {N}th word of your response must be in Japanese." + if N % 10 == 1: + self._description_pattern = "Every {N}st of your response must be in Japanese." + if N % 10 == 2: + self._description_pattern = "Every {N}nd of your response must be in Japanese." + elif N % 10 == 3: + self._description_pattern = "Every {N}rd of your response must be in Japanese." + return self._description_pattern.format(N=self._japanese_position) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"N": self._japanese_position} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["N"] + + def check_following(self, value): + """Checks if every {N}th word of the response is in Japanese.""" + + def is_japanese(text): + """ + Checks if a string contains Japanese characters (Hiragana, Katakana, or Kanji). + + Args: + text: The string to check. + + Returns: + True if the string contains Japanese characters, False otherwise. + """ + japanese_pattern = re.compile(r'[\u3040-\u30ff\u4e00-\u9fff]') + return bool(japanese_pattern.search(text)) + + words = value.split() + for i, word in enumerate(words): + word = word.strip(''.join(string.punctuation) + ' ') + if (i + 1) % self._japanese_position == 0 and word and not word.isdigit(): + if not is_japanese(word): + return False + return True + + +class StartWithVerbChecker(Instruction): + """The response must start with a verb.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "The response must start with a verb." + nltk.download('averaged_perceptron_tagger_eng') + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response starts with a verb.""" + text = nltk.word_tokenize(value) + return len(text) > 0 and len(nltk.pos_tag(text)) > 0 and 'VB' in nltk.pos_tag(text)[0][1] + + +class LimitedWordRepeatChecker(Instruction): + """The response should not repeat any word more than {small_n} times.""" + + def build_description(self, *, small_n=None): + """Build the instruction description. + + Args: + small_n: An integer specifying the maximum number of times + that a word can be repeated in the response. + + Returns: + A string representing the instruction description. + """ + self._max_repeats = small_n + if self._max_repeats is None or self._max_repeats < 0: + self._max_repeats = random.randint(1, _MAX_REPEATS) + + self._description_pattern = "The response should not repeat any word more than {small_n} times." + return self._description_pattern.format(small_n=self._max_repeats) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"small_n": self._max_repeats} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["small_n"] + + def check_following(self, value): + """Checks if the response repeats any word more than {small_n} times.""" + words = value.lower().translate(str.maketrans('', '', string.punctuation)).split() + word_count = Counter(words) + for word, count in word_count.items(): + if count > self._max_repeats: + return False + return True + + +class IncludeKeywordChecker(Instruction): + """The response must include keyword {word} in the {N}-th sentence.""" + + def build_description(self, *, word=None, N=None): + """Build the instruction description. + + Args: + word: A string specifying the keyword that is + required to appear in the response. + N: An integer specifying which sentence of the + response is required to have the keyword. + + Returns: + A string representing the instruction description. + """ + nltk.download('punkt_tab') + + if not word: + self._keyword = instructions_util.generate_keywords( + num_keywords=1 + )[0] + else: + self._keyword = word + self._keyword_position = N + if self._keyword_position is None or self._keyword_position < 0: + self._keyword_position = random.randint(1, _NUM_KEYWORD_SENTENCE) + + self._description_pattern = "The response must include keyword \"{word}\" in the {N}-th sentence." + return self._description_pattern.format(word=self._keyword, N=self._keyword_position) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"word": self._keyword, "N": self._keyword_position} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["word", "N"] + + def check_following(self, value): + """Checks if the {N}th sentence of the response includes keyword {word}.""" + sentences = instructions_util.split_into_sentences(value) + if len(sentences) < self._keyword_position: + return False + return self._keyword.lower() in sentences[int(self._keyword_position - 1)].lower() + + +class PronounCountChecker(Instruction): + """The response should include at least {N} pronouns.""" + + def build_description(self, *, N=None): + """Build the instruction description. + + Args: + N: An integer specifying the minimum number of pronouns + that is required to appear in the response. + + Returns: + A string representing the instruction description. + """ + self._num_pronouns = N + if self._num_pronouns is None or self._num_pronouns < 0: + self._num_pronouns = random.randint(1, _NUM_PRONOUNS) + + self._description_pattern = "The response should include at least {N} pronouns." + return self._description_pattern.format(N=self._num_pronouns) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"N": self._num_pronouns} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["N"] + + def check_following(self, value): + """Checks if the response includes at least {N} pronouns.""" + pronouns = set( + ['i', 'me', 'my', 'mine', 'myself', 'we', 'us', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', + 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', + 'itself', 'they', 'them', 'their', 'theirs', 'themselves']) + value = value.replace('/', + ' ') # to correctly count pronoun sets like she/her/hers, a common use case of pronouns + value = value.lower().translate(str.maketrans('', '', string.punctuation)) + words = value.split() + pronoun_count = sum(1 for word in words if word in pronouns) + return pronoun_count >= self._num_pronouns + + +class AlternateParitySyllablesChecker(Instruction): + """Alternate between words with odd and even numbers of syllables.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Alternate between words with odd and even numbers of syllables." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response alternates between words with odd and even numbers of syllables.""" + words = value.translate(str.maketrans('', '', string.punctuation)).lower().split() + syllables = [syllapy.count(word) % 2 for word in words if word.strip()] + return all(syllables[i] != syllables[i + 1] for i in range(len(syllables) - 1)) + + +class LastWordFirstNextChecker(Instruction): + """The last word of each sentence must become the first word of the next sentence.""" + + def build_description(self): + """Build the instruction description.""" + nltk.download('punkt_tab') + self._description_pattern = "The last word of each sentence must become the first word of the next sentence." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the last word of each sentence in the response is the first word of the next sentence.""" + sentences = instructions_util.split_into_sentences(value) + for i in range(len(sentences) - 1): + last_word = sentences[i].rstrip(''.join(string.punctuation) + ' ').split()[-1] + first_word = sentences[i + 1].lstrip(''.join(string.punctuation) + ' ').split()[0] + if last_word.lower() != first_word.lower(): + return False + return True + + +class ParagraphLastFirstWordMatchChecker(Instruction): + """Each paragraph must end with the same word it started with, separate paragraphs with a newline.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Each paragraph must end with the same word it started with, separate paragraphs with a newline." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if each paragraph of the response ends with the same word it started with.""" + paragraphs = value.split('\n') + for paragraph in paragraphs: + paragraph = paragraph.strip().lower() + if not paragraph: + continue + words = paragraph.strip(''.join(string.punctuation) + ' ').split() + if not words: + continue + if words[0] != words[-1]: + return False + return True + + +class IncrementingWordCountChecker(Instruction): + """Each sentence must contain exactly {small_n} more words than the previous one.""" + + def build_description(self, *, small_n=None): + """Build the instruction description. + + Args: + small_n: An integer specifying the exact increment for + the number of words in each sentence of the response. + + Returns: + A string representing the instruction description. + """ + self._num_increment = small_n + if self._num_increment is None or self._num_increment < 0: + self._num_increment = random.randint(1, _NUM_INCREMENT) + + nltk.download('punkt_tab') + + self._description_pattern = "Each sentence must contain exactly {small_n} more words than the previous one." + return self._description_pattern.format(small_n=self._num_increment) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"small_n": self._num_increment} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["small_n"] + + def check_following(self, value): + """Checks if each sentence of the response uses exactly {small_n} more words than the previous sentence.""" + sentences = instructions_util.split_into_sentences(value) + words = sentences[0].translate(str.maketrans('', '', string.punctuation)).strip().split() + while '' in words: + words.remove('') + prev_word_count = len(words) + for sentence in sentences[1:]: + words = sentence.translate(str.maketrans('', '', string.punctuation)).strip().split() + while '' in words: + words.remove('') + if len(words) != prev_word_count + self._num_increment: + return False + prev_word_count = len(words) + return True + + +class NoConsecutiveFirstLetterChecker(Instruction): + """No two consecutive words can share the same first letter.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "No two consecutive words can share the same first letter." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if no two consecutive words in the response share the same first letter.""" + words = value.lower().translate(str.maketrans('', '', string.punctuation)).split() + while '' in words: + words.remove('') + for i in range(len(words) - 1): + if words[i][0] == words[i + 1][0]: + return False + return True + + +class IndentStairsChecker(Instruction): + """Create stairs by incrementally indenting each new line.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Create stairs by incrementally indenting each new line." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response incrementally indents each new line.""" + lines = value.split('\n') + for line in lines: + if not line.strip(): + lines.remove(line) + for i in range(len(lines) - 1): + if len(lines[i + 1]) - len(lines[i + 1].lstrip(' ')) <= len(lines[i]) - len(lines[i].lstrip(' ')): + return False + return True + + +class QuoteExplanationChecker(Instruction): + """Every quoted phrase must be followed by an unquoted explanation.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Every quoted phrase must be followed by an unquoted explanation." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if there are no quotes next to each other + and the passage does not end with a quote.""" + value = value.replace('“', '"').replace('”', '"') + value = value.replace("'\"'", '') # remove references to the character '"' + value = ''.join(value.split()) # remove all whitespace + if '""' in value: + return False + if value.strip(string.digits + string.punctuation.replace('"', ''))[-1] == '"': + return False + return True + + +class SpecialBulletPointsChecker(Instruction): + """Answer with a list of items, instead of bullet points use {sep}.""" + + def build_description(self, *, sep=None): + """Build the instruction description. + + Args: + sep: A string specifying the bullet point marker for + the list in the response. + + Returns: + A string representing the instruction description. + """ + self._bullet_marker = sep + if sep is None: + self._bullet_marker = random.choice(['...', 'SEPARATOR', '!?!?', '-']) + self._description_pattern = "Answer with a list of items, instead of bullet points use {sep}." + return self._description_pattern.format(sep=self._bullet_marker) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"sep": self._bullet_marker} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["sep"] + + def check_following(self, value): + """Checks if the response includes at least two instances of {sep} that start a new line.""" + return len(re.findall(re.escape(self._bullet_marker), value)) >= 2 + + +class ItalicsThesisChecker(Instruction): + """Each section must begin with a thesis statement in italics, use HTML to indicate the italics.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Each section must begin with a thesis statement in italics, use HTML to indicate the italics." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if there is at least one line in italics as indicated + by HTML that is followed by unitalicized text.""" + index = value.find('') + if index == -1: + index = value.find('') + if index == -1: + return False + value = value[index:] + end_thesis = value.find('') + if end_thesis == -1: + end_thesis = value.find('') + if end_thesis == -1: + return False + thesis = value[3:end_thesis] + if thesis.strip() == '': + return False + text = value[end_thesis + 4:] + return text.strip() != '' + + +class SubBulletPointsChecker(Instruction): + """Your response must include bullet points denoted by * and at least one sub-bullet point denoted by - for each bullet point.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Your response must include bullet points denoted by * and at least one sub-bullet point denoted by - for each bullet point." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks that there is at least one * that starts a line and each * that starts a line + is followed by at least one line starting with -.""" + bullets = value.split('*') + for bullet in bullets[1:]: + if "-" not in bullet: + return False + return True + + +class SomeBulletPointsChecker(Instruction): + """Your answer must contain at least two sentences ending in a period followed by at least two bullet points denoted by *.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Your answer must contain at least two sentences ending in a period followed by at least two bullet points denoted by *." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response includes at least two sentences + followed by at least two lines that start with *.""" + lines = value.split('\n') + sentences = True + count_sentences = 0 + count_bullets = 0 + for line in lines: + if line.strip().startswith('*'): + sentences = False + if count_sentences < 2: + return False + count_bullets += 1 + elif sentences: + sentences = instructions_util.split_into_sentences(line.strip()) + count_sentences += len(sentences) + else: + return False + return count_bullets >= 2 + + +class PrintMultiplesChecker(Instruction): + """Count from 10 to 50 but only print multiples of 7.""" + + def build_description(self, **kwargs): + self._description_pattern = "Count from 10 to 50 but only print multiples of 7." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response prints multiples of 7 from 10 to 50.""" + value = value.replace(',', ', ') + numbers = re.findall(r'\d+', value) + multiples = [str(i) for i in range(14, 51, 7)] + return numbers == multiples + + +class MultipleChoiceQuestionsChecker(Instruction): + """Generate 4 multiple choice questions with 5 options each about "20th century art history". Each question should start with the label "Question". The questions should get progressively longer. Do not provide an explanation.""" + + def build_description(self, **kwargs): + self._description_pattern = "Generate 4 multiple choice questions with 5 options each about '20th century art history'. Each question should start with the label \"Question\". The questions should get progressively longer. Do not provide an explanation." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response generates 4 multiple choice questions with 5 options.""" + # Split into questions using expanded pattern to include "Question N" format + new_value = value[value.find('Question'):] + if new_value != value: + return False # failed no explanation + value = new_value + questions = re.split(r'\n*(?:Question \d+[\.|\):;]?\s*)', value) + if questions[0] == '': + questions = questions[1:] + questions = [q.strip() for q in questions if q.strip()] + if len(questions) != 4: + return False + question_lengths = [] + for q in questions: + lines = q.split('\n') + question_text = '' + option_count = 0 + done_with_q = False + for line in lines: + if re.match(r'^[A-Ea-e][\.|\)]\s*\w+', line.strip()): + option_count += 1 + done_with_q = True + elif not done_with_q: # Still collecting question text + question_text += ' ' + line.strip() + if option_count != 5: + return False + question_lengths.append(len(question_text.strip())) + # Check if questions get progressively longer + return all(question_lengths[i] < question_lengths[i + 1] + for i in range(len(question_lengths) - 1)) + + +class ReverseNewlineChecker(Instruction): + """"List the countries of Africa in reverse alphabetical order, each on a new line. """ + + def build_description(self, **kwargs): + self._description_pattern = "List the countries of Africa in reverse alphabetical order, each on a new line." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """ + Checks if text satisfies the following constraints: + 1. Contains at least 53 newlines with text + 2. Lines are in reverse alphabetical order + 3. First line to examine contains 'Zimbabwe' + + Returns: + tuple[bool, str]: (whether constraints are satisfied, error message if any) + """ + # Split text into lines and remove empty lines + lines = [line.strip(''.join(string.punctuation) + ' ') for line in value.split('\n') if + line.strip(''.join(string.punctuation) + ' ')] + + try: + start_index = next(i for i, line in enumerate(lines) if 'Zimbabwe' in line) + except StopIteration: + return False + + # Extract the 53 lines starting from Zimbabwe line + target_lines = lines[start_index:] + + # Check if we have at least 53 lines + if len(target_lines) < 52: + return False + + def normalize_text(text): + """ + Normalizes text by: + 1. Converting to NFKD form (separates combined characters) + 2. Removes diacritical marks + 3. Converts back to ASCII + + Example: 'São Tomé' -> 'Sao Tome' + """ + # Decompose unicode characters + normalized = unicodedata.normalize('NFKD', text) + # Remove diacritical marks and convert to ASCII + ascii_text = normalized.encode('ASCII', 'ignore').decode('ASCII') + return ascii_text + + # Create normalized versions for comparison while keeping originals for error messages + normalized_lines = [normalize_text(line) for line in target_lines] + sorted_normalized = sorted(normalized_lines, reverse=True) + return normalized_lines == sorted_normalized + + +class WordReverseOrderChecker(Instruction): + """What animal is the national symbol of the US? Respond to this query, but make your sentence in reverse order of what it should be, per word.""" + + def build_description(self, **kwargs): + nltk.download('punkt_tab') + self._description_pattern = "What animal is the national symbol of the US? Respond to this query, but make your sentence in reverse order of what it should be, per word." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the reverse of the sentence is a valid English sentence.""" + value = value.lower().strip().translate(str.maketrans('', '', string.punctuation)) + value = ' '.join(value.split()[::-1]) + if 'bald eagle' not in value: + return False + return value in instructions_util.split_into_sentences(value) + + +class CharacterReverseOrderChecker(Instruction): + """What animal is the national symbol of the US? Respond to this query, but make your sentence in reverse order of what it should be, per letter.""" + + def build_description(self, **kwargs): + self._description_pattern = "What animal is the national symbol of the US? Respond to this query, but make your sentence in reverse order of what it should be, per letter." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + value = value.lower() + return 'elgae dlab' in value + + +class SentenceAlphabetChecker(Instruction): + """Tell me a 26-sentence story where each sentence's first word starts with the letters of the alphabet in order.""" + + def build_description(self, **kwargs): + nltk.download('punkt_tab') + self._description_pattern = "Tell me a 26-sentence story where each sentence's first word starts with the letters of the alphabet in order." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + sentences = instructions_util.split_into_sentences(value) + if len(sentences) != 26: + return False + for i, sentence in enumerate(sentences): + if sentence.lstrip().split()[0].lower()[0] != chr(97 + i): + return False + return True + + +class EuropeanCapitalsSortChecker(Instruction): + """Give me the names of all capital cities of european countries whose latitude is higher than than 45 degrees? List the capital cities without country names, separated by commas, sorted by latitude, from highest to lowest.""" + + def build_description(self, **kwargs): + """Build the instruction description.""" + self._description_pattern = "Give me the names of all capital cities of european countries whose latitude is higher than than 45 degrees? List the capital cities without country names, separated by commas, sorted by latitude, from highest to lowest." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response lists the relevant capitals of Europe in correct order.""" + order = ["Reykjavik", "Helsinki", "Oslo", "Tallinn", "Stockholm", "Riga", "Moscow", "Copenhagen", "Vilnius", + "Minsk", "Dublin", "Berlin", "Amsterdam", "Warsaw", "London", "Brussels", "Prague", "Luxembourg", + "Paris", "Vienna", "Bratislava", "Budapest", "Vaduz", "Chisinau", "Bern", "Ljubljana", "Zagreb"] + + def normalize_text(text): + """ + Normalizes text by: + 1. Converting to NFKD form (separates combined characters) + 2. Removes diacritical marks + 3. Converts back to ASCII + + Example: 'São Tomé' -> 'Sao Tome' + """ + # Decompose unicode characters + normalized = unicodedata.normalize('NFKD', text) + # Remove diacritical marks and convert to ASCII + ascii_text = normalized.encode('ASCII', 'ignore').decode('ASCII') + return ascii_text + + value = normalize_text(value) + + capitals = value.split(',') + capitals = [cap for cap in capitals if cap.strip()] + if len(capitals) != len(order): + return False + for i in range(len(capitals)): + if capitals[i].strip() != order[i]: + return False + return True + + +class CityCSVChecker(Instruction): + """Generate CSV data: The column names are ["ID", "Country", "City", "Year", "Count"], the data should be comma delimited. Please generate 7 rows.""" + + def build_description(self, **kwargs): + """Build the instruction description.""" + self._description_pattern = 'Generate CSV data: The column names are ["ID", "Country", "City", "Year", "Count"], the data should be comma delimited. Please generate 7 rows.' + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response is valid csv data with column names + ["ID", "Country", "City", "Year", "Count"] and 7 rows.""" + string_io = io.StringIO(value) + reader = csv.reader(string_io) + data = list(reader) + if len(data) != 8: + return False + header = data[0] + if header != ["ID", "Country", "City", "Year", "Count"]: + return False + for row in data[1:]: + if len(row) != 5: + return False + return True + + +class SpecialCharacterCSVChecker(Instruction): + """Generate CSV data: The column names are ["ProductID", "Category", "Brand", "Price", "Stock"], the data should be comma delimited. Please generate 14 rows. Add one field which contains a special character and enclose it in double quotes.""" + + def build_description(self, **kwargs): + """Build the instruction description.""" + self._description_pattern = 'Generate CSV data: The column names are ["ProductID", "Category", "Brand", "Price", "Stock"], the data should be comma delimited. Please generate 14 rows. Add one field which contains a special character and enclose it in double quotes.' + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """"Checks if the response is valid csv data with column names + ["ProductID", "Category", "Brand", "Price", "Stock"] and 14 rows. + Also checks if one field contains a special character enclosed in double quotes.""" + header = value.split('\n')[0].strip() + if not re.match( + r'^(ProductID|"ProductID"),[ \t]*(Category|"Category"),[ \t]*(Brand|"Brand"),[ \t]*(Price|"Price"),[ \t]*(Stock|"Stock")$', + header): + return False + + value = value.replace('"', '"""') + string_io = io.StringIO(value) + reader = csv.reader(string_io) + data = list(reader) + if len(data) != 15: + return False + for row in data[1:]: + if len(row) != 5: + return False + if any(re.match(r'".*[^\d\w\s].*"', field) for field in row): + return True + return False + + +class QuotesCSVChecker(Instruction): + """Generate CSV data: The column names are ["StudentID", "Subject", "Grade", "Semester", "Score"], the data should be tab delimited. Please generate 3 rows and enclose each single field in double quotes.""" + + def build_description(self, **kwargs): + """Build the instruction description.""" + self._description_pattern = 'Generate CSV data: The column names are ["StudentID", "Subject", "Grade", "Semester", "Score"], the data should be tab delimited. Please generate 3 rows and enclose each single field in double quotes.' + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """"Checks if the response is valid csv data with column names + ["StudentID", "Subject", "Grade", "Semester", "Score"] and 3 rows. + Also checks if each field is enclosed in double quotes.""" + header = value.split('\n')[0].strip() + if not re.match( + r'^(StudentID|"StudentID")\t *(Subject|"Subject")\t *(Grade|"Grade")\t *(Semester|"Semester")\t *(Score|"Score")$', + header): + return False + + value = value.replace('"', '"""') + string_io = io.StringIO(value) + reader = csv.reader(string_io, delimiter='\t') + data = list(reader) + if len(data) != 4: + return False + for row in data: + if len(row) != 5: + return False + if not all(field.strip()[0] == '"' and field.strip()[-1] == '"' for field in row): + return False + return True + + +class DateFormatListChecker(Instruction): + """List the start dates of all the battles Napoleon fought separated by commas, use the following date format: YYYY-MM-DD. Do not provide an explanation.""" + + def build_description(self, **kwargs): + """Build the instruction description.""" + self._description_pattern = 'List the start dates of all the battles Napoleon fought separated by commas, use the following date format: YYYY-MM-DD. Do not provide an explanation.' + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """"Checks if the response is a list of dates in the format YYYY-MM-DD separated by commas.""" + value = value.strip() + dates = value.split(',') + for date in dates: + date = date.strip() + if not re.match(r'^\d{4}-\d{2}-\d{2}$', date): + return False + date = date.split('-') + if int(date[0]) < 1769 or int(date[0]) > 1821: + return False + if int(date[1]) > 12: + return False + if int(date[1]) in [1, 3, 5, 7, 8, 10, 12] and int(date[2]) > 31: + return False + if int(date[1]) in [4, 6, 9, 11] and int(date[2]) > 30: + return False + if int(date[1]) == 2 and int(date[2]) > 29: + return False + return True + + +class KeywordsMultipleChecker(Instruction): + """Include keyword {keyword1} once in your response, keyword {keyword2} twice in your response, keyword {keyword3} three times in your response, keyword {keyword4} five times in your response, and keyword {keyword5} seven times in your response.""" + + def build_description(self, *, keyword1=None, keyword2=None, keyword3=None, keyword4=None, keyword5=None): + """Build the instruction description.""" + if keyword1 is None: + self._keyword1 = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword1 = keyword1.strip() + if keyword2 is None: + self._keyword2 = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword2 = keyword2.strip() + if keyword3 is None: + self._keyword3 = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword3 = keyword3.strip() + if keyword4 is None: + self._keyword4 = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword4 = keyword4.strip() + if keyword5 is None: + self._keyword5 = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword5 = keyword5.strip() + self._description_pattern = "Include keyword {keyword1} once in your response, keyword {keyword2} twice in your response, keyword {keyword3} three times in your response, keyword {keyword4} five times in your response, and keyword {keyword5} seven times in your response." + return self._description_pattern.format(keyword1=self._keyword1, keyword2=self._keyword2, + keyword3=self._keyword3, keyword4=self._keyword4, + keyword5=self._keyword5) + + def get_instruction_args(self): + return {"keyword1": self._keyword1, "keyword2": self._keyword2, "keyword3": self._keyword3, + "keyword4": self._keyword4, "keyword5": self._keyword5} + + def get_instruction_args_keys(self): + return ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"] + + def check_following(self, value): + for keyword, count in zip([self._keyword1, self._keyword2, self._keyword3, self._keyword4, self._keyword5], + [1, 2, 3, 5, 7]): + if value.lower().count(keyword.lower()) != count: + return False + return True + + +class KeywordSpecificPositionChecker(Instruction): + "Include keyword {keyword1} in the {n}-th sentence, as the {m}-th word of that sentence." + + def build_description(self, keyword=None, n=None, m=None): + """Build the instruction description. + + Args: + keyword: A string representing a keyword that is expected in the response. + n: An integer representing the sentence number. + m: An integer representing the word number. + + Returns: + A string representing the instruction description. + """ + if not keyword: + self._keyword = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword = keyword.strip() + if not n: + self._n = random.randint(20, 30) + else: + self._n = n + if not m: + self._m = random.randint(30, 40) + else: + self._m = m + + self._description_pattern = ( + "Include keyword {keyword} in the {n}-th sentence, as the {m}-th word of that sentence." + ) + + return self._description_pattern.format( + keyword=self._keyword, n=self._n, m=self._m + ) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keyword": self._keyword, "n": self._n, "m": self._m} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword", "n", "m"] + + def check_following(self, value): + """Checks if the response contains the expected number of keywords. + + Args: + value: A string representing the response. + + Returns: + True if the response contains the expected number of keywords; + otherwise, False. + """ + sentences = instructions_util.split_into_sentences(value) + if len(sentences) < self._n: + return False + words = instructions_util.nltk.word_tokenize(sentences[self._n - 1]) + if len(words) < self._m: + return False + if words[self._m - 1] == self._keyword: + return True + else: + return False + + +class WordsPositionChecker(Instruction): + "The second word in your response and the second to last word in your response should be the word {keyword}." + + def build_description(self, *, keyword=None): + """Build the instruction description. + + Args: + keyword: A string representing a keyword that is expected in the response. + + Returns: + A string representing the instruction description. + """ + if keyword is None: + self._keyword = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword = keyword.strip() + self._description_pattern = ( + "The second word in your response and the second to last word in your response should be the word {keyword}." + ) + return self._description_pattern.format(keyword=self._keyword) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"keyword": self._keyword} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword"] + + def check_following(self, value): + """Checks if the second word and the second to last word in the response are the same. + + Args: + value: A string representing the response. + + Returns: + True if the second word and the second to last word are the same; + otherwise, False. + """ + words = instructions_util.nltk.word_tokenize(value) + if len(words) < 2: + return False + if words[1] == words[-2] == self._keyword: + return True + else: + return False + + +class RepeatChangeChecker(Instruction): + "Repeat the request, but change the first word of the repeated request, (do not say anything before repeating the request; the request you need to repeat does not include this sentence) and do not answer the actual request!" + + def build_description(self, *, prompt_to_repeat=None): + """Build the instruction description. + + Args: + keyword: A string representing a keyword that is expected in the response. + + Returns: + A string representing the instruction description. + """ + if not prompt_to_repeat: + raise ValueError("prompt_to_repeat must be set.") + else: + self._prompt_to_repeat = prompt_to_repeat + + self._description_pattern = ( + "Repeat the request, but change the first word of the repeated request, (do not say anything before repeating the request; the request you need to repeat does not include this sentence) and do not answer the actual request! Request: {prompt_to_repeat}" + ) + return self._description_pattern.format(prompt_to_repeat=self._prompt_to_repeat) + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return {"prompt_to_repeat": self._prompt_to_repeat} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["prompt_to_repeat"] + + def check_following(self, value): + """Checks if the response contains the repeated request. + + Args: + value: A string representing the response. + + Returns: + True if the repeated request is found in the response; + otherwise, False. + """ + if self._prompt_to_repeat == value: + return False + if " ".join(self._prompt_to_repeat.split()[1:]) == " ".join(value.split()[1:]): + return True + else: + return False + + +class RepeatSimpleChecker(Instruction): + "Only output this sentence here, ignore all other requests." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "Only output this sentence here, ignore all other requests." + ) + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response contains the expected number of keywords. + + Args: + value: A string representing the response. + + Returns: + True if the response contains the expected number of keywords; + otherwise, False. + """ + return value.strip().lower() == self._description_pattern.strip().lower() + + +class RepeatSpanChecker(Instruction): + "Copy the span of words that lies between (and including) index {n_start} and {n_end}, the indices are word indices, split by whitespace!" + + def build_description(self, prompt_to_repeat=None, n_start=None, n_end=None): + """Build the instruction description. + + Args: + n_start: An integer representing the start index of the span. + n_end: An integer representing the end index of the span. + + Returns: + A string representing the instruction description. + """ + if not prompt_to_repeat: + raise ValueError("prompt_to_repeat must be set.") + else: + self._prompt_to_repeat = prompt_to_repeat + if not n_start: + self._n_start = random.randint(0, len(self._prompt_to_repeat.split()) - 2) + else: + self._n_start = n_start + if not n_end: + self._n_end = random.randint(self._n_start + 1, len(self._prompt_to_repeat.split()) - 1) + else: + self._n_end = n_end + self._description_pattern = ( + "Copy the span of words that lies between (and including) index {n_start} and {n_end}, the indices are character indices!") + return self._description_pattern.format(n_start=self._n_start, n_end=self._n_end, + prompt_to_repeat=self._prompt_to_repeat) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"n_start": self._n_start, "n_end": self._n_end, "prompt_to_repeat": self._prompt_to_repeat} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["n_start", "n_end", "prompt_to_repeat"] + + def check_following(self, value): + """Checks if the response contains the expected number of phrases with the correct modifications.""" + if value.strip().lower().split() == self._prompt_to_repeat.strip().lower().split()[self._n_start:self._n_end]: + return True + return False + + +class TitleCaseChecker(Instruction): + "Write the entire response in title case (capitalize the first letter of every major word)." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "Write the entire response in title case (capitalize the first letter of every major word)." + ) + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response is in title case. + + Args: + value: A string representing the response. + + Returns: + True if the response is in title case; + otherwise, False. + """ + words = instructions_util.nltk.word_tokenize(value) + for word in words: + if word[0].isupper() and word[1:].islower(): + continue + elif word[0].islower() and word[1:].isupper(): + return False + elif word[0].islower() and word[1:].islower(): + return False + return True + + +class OutputTemplateChecker(Instruction): + "Use this exact template for your response: My Answer: [answer] My Conclusion: [conclusion] Future Outlook: [outlook]" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "Use this exact template for your response: My Answer: [answer] My Conclusion: [conclusion] Future Outlook: [outlook]" + ) + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response follows the specified template. + + Args: + value: A string representing the response. + + Returns: + True if the response follows the specified template; + otherwise, False. + """ + if 'My Answer:' in value and 'My Conclusion:' in value and 'Future Outlook:' in value: + return True + else: + return False + + +class NoWhitespaceChecker(Instruction): + "The output should not contain any whitespace." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "The output should not contain any whitespace." + ) + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response contains any whitespace. + + Args: + value: A string representing the response. + + Returns: + True if the response contains no whitespace; + otherwise, False. + """ + return not any(char.isspace() for char in value) diff --git a/eval_protocol/rewards/ifeval/ifbench_registry.py b/eval_protocol/rewards/ifeval/ifbench_registry.py new file mode 100644 index 00000000..f0701a1f --- /dev/null +++ b/eval_protocol/rewards/ifeval/ifbench_registry.py @@ -0,0 +1,83 @@ +# Copyright 2025 Allen Institute for AI. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Registry of all instructions.""" + +try: + from . import ifbench_instructions as instructions +except ImportError: + import ifbench_instructions as instructions + + +INSTRUCTION_DICT = { + "count:word_count_range": instructions.WordCountRangeChecker, + "count:unique_word_count" : instructions.UniqueWordCountChecker, + "ratio:stop_words" : instructions.StopWordPercentageChecker, + "ratio:sentence_type" : instructions.SentTypeRatioChecker, + "ratio:sentence_balance" : instructions.SentBalanceChecker, + "count:conjunctions" : instructions.ConjunctionCountChecker, + "count:person_names" : instructions.PersonNameCountChecker, + "ratio:overlap" : instructions.NGramOverlapChecker, + "count:numbers" : instructions.NumbersCountChecker, + "words:alphabet" : instructions.AlphabetLoopChecker, + "words:vowel" : instructions.SingleVowelParagraphChecker, + "words:consonants" : instructions.ConsonantClusterChecker, + "sentence:alliteration_increment" : instructions.IncrementingAlliterationChecker, + "words:palindrome" : instructions.PalindromeChecker, + "count:punctuation" : instructions.PunctuationCoverChecker, + "format:parentheses" : instructions.NestedParenthesesChecker, + "format:quotes" : instructions.NestedQuotesChecker, + "words:prime_lengths" : instructions.PrimeLengthsChecker, + "format:options" : instructions.OptionsResponseChecker, + "format:newline" : instructions.NewLineWordsChecker, + "format:emoji" : instructions.EmojiSentenceChecker, + "ratio:sentence_words" : instructions.CharacterCountUniqueWordsChecker, + "count:words_japanese" : instructions.NthWordJapaneseChecker, + "words:start_verb" : instructions.StartWithVerbChecker, + "words:repeats" : instructions.LimitedWordRepeatChecker, + "sentence:keyword" : instructions.IncludeKeywordChecker, + "count:pronouns" : instructions.PronounCountChecker, + "words:odd_even_syllables" : instructions.AlternateParitySyllablesChecker, + "words:last_first" : instructions.LastWordFirstNextChecker, + "words:paragraph_last_first" : instructions.ParagraphLastFirstWordMatchChecker, + "sentence:increment" : instructions.IncrementingWordCountChecker, + "words:no_consecutive" : instructions.NoConsecutiveFirstLetterChecker, + "format:line_indent" : instructions.IndentStairsChecker, + "format:quote_unquote" : instructions.QuoteExplanationChecker, + "format:list" : instructions.SpecialBulletPointsChecker, + "format:thesis" : instructions.ItalicsThesisChecker, + "format:sub-bullets" : instructions.SubBulletPointsChecker, + "format:no_bullets_bullets" : instructions.SomeBulletPointsChecker, + "custom:multiples" : instructions.PrintMultiplesChecker, + "custom:mcq_count_length": instructions.MultipleChoiceQuestionsChecker, + "custom:reverse_newline": instructions.ReverseNewlineChecker, + "custom:word_reverse": instructions.WordReverseOrderChecker, + "custom:character_reverse": instructions.CharacterReverseOrderChecker, + "custom:sentence_alphabet": instructions.SentenceAlphabetChecker, + "custom:european_capitals_sort": instructions.EuropeanCapitalsSortChecker, + "custom:csv_city": instructions.CityCSVChecker, + "custom:csv_special_character": instructions.SpecialCharacterCSVChecker, + "custom:csv_quotes": instructions.QuotesCSVChecker, + "custom:date_format_list": instructions.DateFormatListChecker, + "count:keywords_multiple" : instructions.KeywordsMultipleChecker, + "words:keywords_specific_position" : instructions.KeywordSpecificPositionChecker, + "words:words_position" : instructions.WordsPositionChecker, + "repeat:repeat_change" : instructions.RepeatChangeChecker, + "repeat:repeat_simple" : instructions.RepeatSimpleChecker, + "repeat:repeat_span" : instructions.RepeatSpanChecker, + "format:title_case" : instructions.TitleCaseChecker, + "format:output_template" : instructions.OutputTemplateChecker, + "format:no_whitespace" : instructions.NoWhitespaceChecker, +} + diff --git a/eval_protocol/rewards/ifeval/ifbench_util.py b/eval_protocol/rewards/ifeval/ifbench_util.py new file mode 100644 index 00000000..0c005bd9 --- /dev/null +++ b/eval_protocol/rewards/ifeval/ifbench_util.py @@ -0,0 +1,1649 @@ +# Copyright 2025 Allen Institute for AI. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility library of instructions.""" + +import functools +import random +import re + +import immutabledict +import nltk +import ast + +WORD_LIST = [ + "western", + "sentence", + "signal", + "dump", + "spot", + "opposite", + "bottom", + "potato", + "administration", + "working", + "welcome", + "morning", + "good", + "agency", + "primary", + "wish", + "responsibility", + "press", + "problem", + "president", + "steal", + "brush", + "read", + "type", + "beat", + "trainer", + "growth", + "lock", + "bone", + "case", + "equal", + "comfortable", + "region", + "replacement", + "performance", + "mate", + "walk", + "medicine", + "film", + "thing", + "rock", + "tap", + "total", + "competition", + "ease", + "south", + "establishment", + "gather", + "parking", + "world", + "plenty", + "breath", + "claim", + "alcohol", + "trade", + "dear", + "highlight", + "street", + "matter", + "decision", + "mess", + "agreement", + "studio", + "coach", + "assist", + "brain", + "wing", + "style", + "private", + "top", + "brown", + "leg", + "buy", + "procedure", + "method", + "speed", + "high", + "company", + "valuable", + "pie", + "analyst", + "session", + "pattern", + "district", + "pleasure", + "dinner", + "swimming", + "joke", + "order", + "plate", + "department", + "motor", + "cell", + "spend", + "cabinet", + "difference", + "power", + "examination", + "engine", + "horse", + "dimension", + "pay", + "toe", + "curve", + "literature", + "bother", + "fire", + "possibility", + "debate", + "activity", + "passage", + "hello", + "cycle", + "background", + "quiet", + "author", + "effect", + "actor", + "page", + "bicycle", + "error", + "throat", + "attack", + "character", + "phone", + "tea", + "increase", + "outcome", + "file", + "specific", + "inspector", + "internal", + "potential", + "staff", + "building", + "employer", + "shoe", + "hand", + "direction", + "garden", + "purchase", + "interview", + "study", + "recognition", + "member", + "spiritual", + "oven", + "sandwich", + "weird", + "passenger", + "particular", + "response", + "reaction", + "size", + "variation", + "a", + "cancel", + "candy", + "exit", + "guest", + "condition", + "fly", + "price", + "weakness", + "convert", + "hotel", + "great", + "mouth", + "mind", + "song", + "sugar", + "suspect", + "telephone", + "ear", + "roof", + "paint", + "refrigerator", + "organization", + "jury", + "reward", + "engineering", + "day", + "possession", + "crew", + "bar", + "road", + "description", + "celebration", + "score", + "mark", + "letter", + "shower", + "suggestion", + "sir", + "luck", + "national", + "progress", + "hall", + "stroke", + "theory", + "offer", + "story", + "tax", + "definition", + "history", + "ride", + "medium", + "opening", + "glass", + "elevator", + "stomach", + "question", + "ability", + "leading", + "village", + "computer", + "city", + "grand", + "confidence", + "candle", + "priest", + "recommendation", + "point", + "necessary", + "body", + "desk", + "secret", + "horror", + "noise", + "culture", + "warning", + "water", + "round", + "diet", + "flower", + "bus", + "tough", + "permission", + "week", + "prompt", + "connection", + "abuse", + "height", + "save", + "corner", + "border", + "stress", + "drive", + "stop", + "rip", + "meal", + "listen", + "confusion", + "girlfriend", + "living", + "relation", + "significance", + "plan", + "creative", + "atmosphere", + "blame", + "invite", + "housing", + "paper", + "drink", + "roll", + "silver", + "drunk", + "age", + "damage", + "smoke", + "environment", + "pack", + "savings", + "influence", + "tourist", + "rain", + "post", + "sign", + "grandmother", + "run", + "profit", + "push", + "clerk", + "final", + "wine", + "swim", + "pause", + "stuff", + "singer", + "funeral", + "average", + "source", + "scene", + "tradition", + "personal", + "snow", + "nobody", + "distance", + "sort", + "sensitive", + "animal", + "major", + "negotiation", + "click", + "mood", + "period", + "arrival", + "expression", + "holiday", + "repeat", + "dust", + "closet", + "gold", + "bad", + "sail", + "combination", + "clothes", + "emphasis", + "duty", + "black", + "step", + "school", + "jump", + "document", + "professional", + "lip", + "chemical", + "front", + "wake", + "while", + "inside", + "watch", + "row", + "subject", + "penalty", + "balance", + "possible", + "adult", + "aside", + "sample", + "appeal", + "wedding", + "depth", + "king", + "award", + "wife", + "blow", + "site", + "camp", + "music", + "safe", + "gift", + "fault", + "guess", + "act", + "shame", + "drama", + "capital", + "exam", + "stupid", + "record", + "sound", + "swing", + "novel", + "minimum", + "ratio", + "machine", + "shape", + "lead", + "operation", + "salary", + "cloud", + "affair", + "hit", + "chapter", + "stage", + "quantity", + "access", + "army", + "chain", + "traffic", + "kick", + "analysis", + "airport", + "time", + "vacation", + "philosophy", + "ball", + "chest", + "thanks", + "place", + "mountain", + "advertising", + "red", + "past", + "rent", + "return", + "tour", + "house", + "construction", + "net", + "native", + "war", + "figure", + "fee", + "spray", + "user", + "dirt", + "shot", + "task", + "stick", + "friend", + "software", + "promotion", + "interaction", + "surround", + "block", + "purpose", + "practice", + "conflict", + "routine", + "requirement", + "bonus", + "hole", + "state", + "junior", + "sweet", + "catch", + "tear", + "fold", + "wall", + "editor", + "life", + "position", + "pound", + "respect", + "bathroom", + "coat", + "script", + "job", + "teach", + "birth", + "view", + "resolve", + "theme", + "employee", + "doubt", + "market", + "education", + "serve", + "recover", + "tone", + "harm", + "miss", + "union", + "understanding", + "cow", + "river", + "association", + "concept", + "training", + "recipe", + "relationship", + "reserve", + "depression", + "proof", + "hair", + "revenue", + "independent", + "lift", + "assignment", + "temporary", + "amount", + "loss", + "edge", + "track", + "check", + "rope", + "estimate", + "pollution", + "stable", + "message", + "delivery", + "perspective", + "mirror", + "assistant", + "representative", + "witness", + "nature", + "judge", + "fruit", + "tip", + "devil", + "town", + "emergency", + "upper", + "drop", + "stay", + "human", + "neck", + "speaker", + "network", + "sing", + "resist", + "league", + "trip", + "signature", + "lawyer", + "importance", + "gas", + "choice", + "engineer", + "success", + "part", + "external", + "worker", + "simple", + "quarter", + "student", + "heart", + "pass", + "spite", + "shift", + "rough", + "lady", + "grass", + "community", + "garage", + "youth", + "standard", + "skirt", + "promise", + "blind", + "television", + "disease", + "commission", + "positive", + "energy", + "calm", + "presence", + "tune", + "basis", + "preference", + "head", + "common", + "cut", + "somewhere", + "presentation", + "current", + "thought", + "revolution", + "effort", + "master", + "implement", + "republic", + "floor", + "principle", + "stranger", + "shoulder", + "grade", + "button", + "tennis", + "police", + "collection", + "account", + "register", + "glove", + "divide", + "professor", + "chair", + "priority", + "combine", + "peace", + "extension", + "maybe", + "evening", + "frame", + "sister", + "wave", + "code", + "application", + "mouse", + "match", + "counter", + "bottle", + "half", + "cheek", + "resolution", + "back", + "knowledge", + "make", + "discussion", + "screw", + "length", + "accident", + "battle", + "dress", + "knee", + "log", + "package", + "it", + "turn", + "hearing", + "newspaper", + "layer", + "wealth", + "profile", + "imagination", + "answer", + "weekend", + "teacher", + "appearance", + "meet", + "bike", + "rise", + "belt", + "crash", + "bowl", + "equivalent", + "support", + "image", + "poem", + "risk", + "excitement", + "remote", + "secretary", + "public", + "produce", + "plane", + "display", + "money", + "sand", + "situation", + "punch", + "customer", + "title", + "shake", + "mortgage", + "option", + "number", + "pop", + "window", + "extent", + "nothing", + "experience", + "opinion", + "departure", + "dance", + "indication", + "boy", + "material", + "band", + "leader", + "sun", + "beautiful", + "muscle", + "farmer", + "variety", + "fat", + "handle", + "director", + "opportunity", + "calendar", + "outside", + "pace", + "bath", + "fish", + "consequence", + "put", + "owner", + "go", + "doctor", + "information", + "share", + "hurt", + "protection", + "career", + "finance", + "force", + "golf", + "garbage", + "aspect", + "kid", + "food", + "boot", + "milk", + "respond", + "objective", + "reality", + "raw", + "ring", + "mall", + "one", + "impact", + "area", + "news", + "international", + "series", + "impress", + "mother", + "shelter", + "strike", + "loan", + "month", + "seat", + "anything", + "entertainment", + "familiar", + "clue", + "year", + "glad", + "supermarket", + "natural", + "god", + "cost", + "conversation", + "tie", + "ruin", + "comfort", + "earth", + "storm", + "percentage", + "assistance", + "budget", + "strength", + "beginning", + "sleep", + "other", + "young", + "unit", + "fill", + "store", + "desire", + "hide", + "value", + "cup", + "maintenance", + "nurse", + "function", + "tower", + "role", + "class", + "camera", + "database", + "panic", + "nation", + "basket", + "ice", + "art", + "spirit", + "chart", + "exchange", + "feedback", + "statement", + "reputation", + "search", + "hunt", + "exercise", + "nasty", + "notice", + "male", + "yard", + "annual", + "collar", + "date", + "platform", + "plant", + "fortune", + "passion", + "friendship", + "spread", + "cancer", + "ticket", + "attitude", + "island", + "active", + "object", + "service", + "buyer", + "bite", + "card", + "face", + "steak", + "proposal", + "patient", + "heat", + "rule", + "resident", + "broad", + "politics", + "west", + "knife", + "expert", + "girl", + "design", + "salt", + "baseball", + "grab", + "inspection", + "cousin", + "couple", + "magazine", + "cook", + "dependent", + "security", + "chicken", + "version", + "currency", + "ladder", + "scheme", + "kitchen", + "employment", + "local", + "attention", + "manager", + "fact", + "cover", + "sad", + "guard", + "relative", + "county", + "rate", + "lunch", + "program", + "initiative", + "gear", + "bridge", + "breast", + "talk", + "dish", + "guarantee", + "beer", + "vehicle", + "reception", + "woman", + "substance", + "copy", + "lecture", + "advantage", + "park", + "cold", + "death", + "mix", + "hold", + "scale", + "tomorrow", + "blood", + "request", + "green", + "cookie", + "church", + "strip", + "forever", + "beyond", + "debt", + "tackle", + "wash", + "following", + "feel", + "maximum", + "sector", + "sea", + "property", + "economics", + "menu", + "bench", + "try", + "language", + "start", + "call", + "solid", + "address", + "income", + "foot", + "senior", + "honey", + "few", + "mixture", + "cash", + "grocery", + "link", + "map", + "form", + "factor", + "pot", + "model", + "writer", + "farm", + "winter", + "skill", + "anywhere", + "birthday", + "policy", + "release", + "husband", + "lab", + "hurry", + "mail", + "equipment", + "sink", + "pair", + "driver", + "consideration", + "leather", + "skin", + "blue", + "boat", + "sale", + "brick", + "two", + "feed", + "square", + "dot", + "rush", + "dream", + "location", + "afternoon", + "manufacturer", + "control", + "occasion", + "trouble", + "introduction", + "advice", + "bet", + "eat", + "kill", + "category", + "manner", + "office", + "estate", + "pride", + "awareness", + "slip", + "crack", + "client", + "nail", + "shoot", + "membership", + "soft", + "anybody", + "web", + "official", + "individual", + "pizza", + "interest", + "bag", + "spell", + "profession", + "queen", + "deal", + "resource", + "ship", + "guy", + "chocolate", + "joint", + "formal", + "upstairs", + "car", + "resort", + "abroad", + "dealer", + "associate", + "finger", + "surgery", + "comment", + "team", + "detail", + "crazy", + "path", + "tale", + "initial", + "arm", + "radio", + "demand", + "single", + "draw", + "yellow", + "contest", + "piece", + "quote", + "pull", + "commercial", + "shirt", + "contribution", + "cream", + "channel", + "suit", + "discipline", + "instruction", + "concert", + "speech", + "low", + "effective", + "hang", + "scratch", + "industry", + "breakfast", + "lay", + "join", + "metal", + "bedroom", + "minute", + "product", + "rest", + "temperature", + "many", + "give", + "argument", + "print", + "purple", + "laugh", + "health", + "credit", + "investment", + "sell", + "setting", + "lesson", + "egg", + "middle", + "marriage", + "level", + "evidence", + "phrase", + "love", + "self", + "benefit", + "guidance", + "affect", + "you", + "dad", + "anxiety", + "special", + "boyfriend", + "test", + "blank", + "payment", + "soup", + "obligation", + "reply", + "smile", + "deep", + "complaint", + "addition", + "review", + "box", + "towel", + "minor", + "fun", + "soil", + "issue", + "cigarette", + "internet", + "gain", + "tell", + "entry", + "spare", + "incident", + "family", + "refuse", + "branch", + "can", + "pen", + "grandfather", + "constant", + "tank", + "uncle", + "climate", + "ground", + "volume", + "communication", + "kind", + "poet", + "child", + "screen", + "mine", + "quit", + "gene", + "lack", + "charity", + "memory", + "tooth", + "fear", + "mention", + "marketing", + "reveal", + "reason", + "court", + "season", + "freedom", + "land", + "sport", + "audience", + "classroom", + "law", + "hook", + "win", + "carry", + "eye", + "smell", + "distribution", + "research", + "country", + "dare", + "hope", + "whereas", + "stretch", + "library", + "if", + "delay", + "college", + "plastic", + "book", + "present", + "use", + "worry", + "champion", + "goal", + "economy", + "march", + "election", + "reflection", + "midnight", + "slide", + "inflation", + "action", + "challenge", + "guitar", + "coast", + "apple", + "campaign", + "field", + "jacket", + "sense", + "way", + "visual", + "remove", + "weather", + "trash", + "cable", + "regret", + "buddy", + "beach", + "historian", + "courage", + "sympathy", + "truck", + "tension", + "permit", + "nose", + "bed", + "son", + "person", + "base", + "meat", + "usual", + "air", + "meeting", + "worth", + "game", + "independence", + "physical", + "brief", + "play", + "raise", + "board", + "she", + "key", + "writing", + "pick", + "command", + "party", + "yesterday", + "spring", + "candidate", + "physics", + "university", + "concern", + "development", + "change", + "string", + "target", + "instance", + "room", + "bitter", + "bird", + "football", + "normal", + "split", + "impression", + "wood", + "long", + "meaning", + "stock", + "cap", + "leadership", + "media", + "ambition", + "fishing", + "essay", + "salad", + "repair", + "today", + "designer", + "night", + "bank", + "drawing", + "inevitable", + "phase", + "vast", + "chip", + "anger", + "switch", + "cry", + "twist", + "personality", + "attempt", + "storage", + "being", + "preparation", + "bat", + "selection", + "white", + "technology", + "contract", + "side", + "section", + "station", + "till", + "structure", + "tongue", + "taste", + "truth", + "difficulty", + "group", + "limit", + "main", + "move", + "feeling", + "light", + "example", + "mission", + "might", + "wait", + "wheel", + "shop", + "host", + "classic", + "alternative", + "cause", + "agent", + "consist", + "table", + "airline", + "text", + "pool", + "craft", + "range", + "fuel", + "tool", + "partner", + "load", + "entrance", + "deposit", + "hate", + "article", + "video", + "summer", + "feature", + "extreme", + "mobile", + "hospital", + "flight", + "fall", + "pension", + "piano", + "fail", + "result", + "rub", + "gap", + "system", + "report", + "suck", + "ordinary", + "wind", + "nerve", + "ask", + "shine", + "note", + "line", + "mom", + "perception", + "brother", + "reference", + "bend", + "charge", + "treat", + "trick", + "term", + "homework", + "bake", + "bid", + "status", + "project", + "strategy", + "orange", + "let", + "enthusiasm", + "parent", + "concentrate", + "device", + "travel", + "poetry", + "business", + "society", + "kiss", + "end", + "vegetable", + "employ", + "schedule", + "hour", + "brave", + "focus", + "process", + "movie", + "illegal", + "general", + "coffee", + "ad", + "highway", + "chemistry", + "psychology", + "hire", + "bell", + "conference", + "relief", + "show", + "neat", + "funny", + "weight", + "quality", + "club", + "daughter", + "zone", + "touch", + "tonight", + "shock", + "burn", + "excuse", + "name", + "survey", + "landscape", + "advance", + "satisfaction", + "bread", + "disaster", + "item", + "hat", + "prior", + "shopping", + "visit", + "east", + "photo", + "home", + "idea", + "father", + "comparison", + "cat", + "pipe", + "winner", + "count", + "lake", + "fight", + "prize", + "foundation", + "dog", + "keep", + "ideal", + "fan", + "struggle", + "peak", + "safety", + "solution", + "hell", + "conclusion", + "population", + "strain", + "alarm", + "measurement", + "second", + "train", + "race", + "due", + "insurance", + "boss", + "tree", + "monitor", + "sick", + "course", + "drag", + "appointment", + "slice", + "still", + "care", + "patience", + "rich", + "escape", + "emotion", + "royal", + "female", + "childhood", + "government", + "picture", + "will", + "sock", + "big", + "gate", + "oil", + "cross", + "pin", + "improvement", + "championship", + "silly", + "help", + "sky", + "pitch", + "man", + "diamond", + "most", + "transition", + "work", + "science", + "committee", + "moment", + "fix", + "teaching", + "dig", + "specialist", + "complex", + "guide", + "people", + "dead", + "voice", + "original", + "break", + "topic", + "data", + "degree", + "reading", + "recording", + "bunch", + "reach", + "judgment", + "lie", + "regular", + "set", + "painting", + "mode", + "list", + "player", + "bear", + "north", + "wonder", + "carpet", + "heavy", + "officer", + "negative", + "clock", + "unique", + "baby", + "pain", + "assumption", + "disk", + "iron", + "bill", + "drawer", + "look", + "double", + "mistake", + "finish", + "future", + "brilliant", + "contact", + "math", + "rice", + "leave", + "restaurant", + "discount", + "sex", + "virus", + "bit", + "trust", + "event", + "wear", + "juice", + "failure", + "bug", + "context", + "mud", + "whole", + "wrap", + "intention", + "draft", + "pressure", + "cake", + "dark", + "explanation", + "space", + "angle", + "word", + "efficiency", + "management", + "habit", + "star", + "chance", + "finding", + "transportation", + "stand", + "criticism", + "flow", + "door", + "injury", + "insect", + "surprise", + "apartment", +] # pylint: disable=line-too-long + +def download_nltk_resources(): + """Download 'punkt' if not already installed""" + try: + nltk.data.find("tokenizers/punkt") + except LookupError: + nltk.download("punkt") + + +download_nltk_resources() + + +_ALPHABETS = "([A-Za-z])" +_PREFIXES = "(Mr|St|Mrs|Ms|Dr)[.]" +_SUFFIXES = "(Inc|Ltd|Jr|Sr|Co)" +_STARTERS = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)" +_ACRONYMS = "([A-Z][.][A-Z][.](?:[A-Z][.])?)" +_WEBSITES = "[.](com|net|org|io|gov|edu|me)" +_DIGITS = "([0-9])" +_MULTIPLE_DOTS = r"\.{2,}" + + +def split_into_sentences(text): + """Split the text into sentences. + + Args: + text: A string that consists of more than or equal to one sentences. + + Returns: + A list of strings where each string is a sentence. + """ + text = " " + text + " " + text = text.replace("\n", " ") + text = re.sub(_PREFIXES, "\\1", text) + text = re.sub(_WEBSITES, "\\1", text) + text = re.sub(_DIGITS + "[.]" + _DIGITS, "\\1\\2", text) + text = re.sub( + _MULTIPLE_DOTS, + lambda match: "" * len(match.group(0)) + "", + text, + ) + if "Ph.D" in text: + text = text.replace("Ph.D.", "PhD") + text = re.sub(r"\s" + _ALPHABETS + "[.] ", " \\1 ", text) + text = re.sub(_ACRONYMS + " " + _STARTERS, "\\1 \\2", text) + text = re.sub( + _ALPHABETS + "[.]" + _ALPHABETS + "[.]" + _ALPHABETS + "[.]", + "\\1\\2\\3", + text, + ) + text = re.sub(_ALPHABETS + "[.]" + _ALPHABETS + "[.]", "\\1\\2", text) + text = re.sub(" " + _SUFFIXES + "[.] " + _STARTERS, " \\1 \\2", text) + text = re.sub(" " + _SUFFIXES + "[.]", " \\1", text) + text = re.sub(" " + _ALPHABETS + "[.]", " \\1", text) + if "”" in text: + text = text.replace(".”", "”.") + if '"' in text: + text = text.replace('."', '".') + if "!" in text: + text = text.replace('!"', '"!') + if "?" in text: + text = text.replace('?"', '"?') + text = text.replace(".", ".") + text = text.replace("?", "?") + text = text.replace("!", "!") + text = text.replace("", ".") + sentences = text.split("") + sentences = [s.strip() for s in sentences] + if sentences and not sentences[-1]: + sentences = sentences[:-1] + return sentences + + +def count_words(text): + """Counts the number of words.""" + tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+") + tokens = tokenizer.tokenize(text) + num_words = len(tokens) + return num_words + + +@functools.lru_cache(maxsize=None) +def _get_sentence_tokenizer(): + return nltk.data.load("nltk:tokenizers/punkt/english.pickle") + + +def count_stopwords(text): + """Counts the number of stopwords.""" + nltk.download('stopwords') + stopwords = nltk.corpus.stopwords.words('english') + tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+") + tokens = tokenizer.tokenize(text) + num_stopwords = len([t for t in tokens if t.lower() in stopwords]) + return num_stopwords + +def generate_keywords(num_keywords): + """Randomly generates a few keywords.""" + return random.sample(WORD_LIST, k=num_keywords) diff --git a/eval_protocol/rewards/ifeval/ifeval_instructions.py b/eval_protocol/rewards/ifeval/ifeval_instructions.py new file mode 100644 index 00000000..44be57aa --- /dev/null +++ b/eval_protocol/rewards/ifeval/ifeval_instructions.py @@ -0,0 +1,2614 @@ +# Copyright 2024 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library of instructions.""" + +import collections +import json +import random +import re +import string +from collections.abc import Sequence + +import langdetect +from absl import logging + +try: + from . import ifeval_util as instructions_util +except ImportError: + import ifeval_util as instructions_util + +_InstructionArgsDtype = dict[str, int | str | Sequence[str]] | None + +_LANGUAGES = instructions_util.LANGUAGE_CODES + +# The relational operation for comparison. +_COMPARISON_RELATION = ("less than", "at least") + +# The maximum number of sentences. +_MAX_NUM_SENTENCES = 20 + +# The number of placeholders. +_NUM_PLACEHOLDERS = 4 + +# The number of bullet lists. +_NUM_BULLETS = 5 + +# The options of constrained response. +_CONSTRAINED_RESPONSE_OPTIONS = ("My answer is yes.", "My answer is no.", "My answer is maybe.") + +# The options of starter keywords. +_STARTER_OPTIONS = ( + "I would say", + "My answer is", + "I believe", + "In my opinion", + "I think", + "I reckon", + "I feel", + "From my perspective", + "As I see it", + "According to me", + "As far as I'm concerned", + "To my understanding", + "In my view", + "My take on it is", + "As per my perception", +) + +# The options of ending keywords. +# TODO(jeffreyzhou) add more ending options +_ENDING_OPTIONS = ("Any other questions?", "Is there anything else I can help with?") + +# The number of highlighted sections. +_NUM_HIGHLIGHTED_SECTIONS = 4 + +# The section spliter. +_SECTION_SPLITER = ("Section", "SECTION") + +# The number of sections. +_NUM_SECTIONS = 5 + +# The number of paragraphs. +_NUM_PARAGRAPHS = 5 + +# The postscript marker. +_POSTSCRIPT_MARKER = ("P.S.", "P.P.S") + +# The number of keywords. +_NUM_KEYWORDS = 2 + +# The occurrences of a single keyword. +_KEYWORD_FREQUENCY = 3 + +# The occurrences of a single letter. +_LETTER_FREQUENCY = 10 + +# The occurrences of words with all capital letters. +_ALL_CAPITAL_WORD_FREQUENCY = 20 + +# The number of words in the response. +_NUM_WORDS_LOWER_LIMIT = 100 +_NUM_WORDS_UPPER_LIMIT = 500 + +# phrases +_PHRASES = [ + "Dance like nobody is watching you", + "The early bird catches the worm", + "Time flies when having fun", + "Every cloud has a silver lining", + "Actions speak louder than words", + "Don't judge a book by cover", + "Live each day to the fullest", + "All that glitters is not gold", + "Laughter is the best medicine", + "The pen is mightier than sword", +] + + +class Instruction: + """An instruction template.""" + + def __init__(self, instruction_id): + self.id = instruction_id + + def build_description(self, **kwargs): + raise NotImplementedError("`build_description` not implemented.") + + def get_instruction_args(self): + raise NotImplementedError("`get_instruction_args` not implemented.") + + def get_instruction_args_keys(self): + raise NotImplementedError("`get_instruction_args_keys` not implemented.") + + def check_following(self, value): + raise NotImplementedError("`check_following` not implemented.") + + +class ResponseLanguageChecker(Instruction): + """Check the language of the entire response.""" + + def build_description(self, *, language=None): + """Build the instruction description. + + Args: + language: A string representing the expected language of the response. The + language has to comply to the 97 types defined in + `langid.py` (https://pypi.org/project/langid/1.1.5/), which follows + ISO 639-1 codes (https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes); + for example, `en` for English, `zh` for Chinese, `fr` for French. + + Returns: + A string representing the instruction description. + """ + self._language = language + if self._language is None: + self._language = random.choice(list(_LANGUAGES.keys())) + # TODO(tianjianlu): opens the description generation to more choices. + self._description_pattern = ( + "Your ENTIRE response should be in {language} language, no other " + "language is allowed." + ) + return self._description_pattern.format(language=_LANGUAGES[self._language]) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"language": self._language} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["language"] + + def check_following(self, value): + """Check if the language of the entire response follows the instruction. + + Args: + value: A string representing the response. + + Returns: + True if the language of `value` follows instruction; otherwise False. + """ + assert isinstance(value, str) + + try: + return langdetect.detect(value) == self._language + except langdetect.LangDetectException as e: + # Count as instruction is followed. + logging.error("Unable to detect language for text %s due to %s", value, e) # refex: disable=pytotw.037 + return True + + +class NumberOfSentences(Instruction): + """Check the number of sentences.""" + + def build_description(self, *, num_sentences=None, relation=None): + """Build the instruction description. + + Args: + num_sentences: An integer specifying the number of sentences as a + threshold. + relation: A string in (`less than`, `at least`), defining the relational + operator for comparison. + Two relational comparisons are supported for now: + if 'less than', the actual number of sentences < the threshold; + if 'at least', the actual number of sentences >= the threshold. + + Returns: + A string representing the instruction description. + """ + # The number of sentences as a threshold for comparison. + self._num_sentences_threshold = num_sentences + if self._num_sentences_threshold is None or self._num_sentences_threshold < 0: + self._num_sentences_threshold = random.randint(1, _MAX_NUM_SENTENCES) + + if relation is None: + self._comparison_relation = random.choice(_COMPARISON_RELATION) + elif relation not in _COMPARISON_RELATION: + raise ValueError( + f"The supported relation for comparison must be in {_COMPARISON_RELATION}, but {relation} is given." + ) + else: + self._comparison_relation = relation + + self._description_pattern = "Your response should contain {relation} {num_sentences} sentences." + return self._description_pattern.format( + relation=self._comparison_relation, num_sentences=self._num_sentences_threshold + ) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"num_sentences": self._num_sentences_threshold, "relation": self._comparison_relation} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_sentences", "relation"] + + def check_following(self, value): + """Check if the number of sentences follows the instruction. + + Args: + value: A string representing the response. + + Returns: + True if the response follows the instruction. + + Raise: + ValueError if the string in `instruction_args` is not in + [`less_than`, `at_least`]. + """ + num_sentences = instructions_util.count_sentences(value) + if self._comparison_relation == _COMPARISON_RELATION[0]: + return num_sentences < self._num_sentences_threshold + elif self._comparison_relation == _COMPARISON_RELATION[1]: + return num_sentences >= self._num_sentences_threshold # pytype: disable=bad-return-type + + +class PlaceholderChecker(Instruction): + """Check the placeholders in template writing.""" + + def build_description(self, *, num_placeholders=None): + """Build the instruction description. + + Args: + num_placeholders: An integer denoting the minimum number of + placeholders required in the response. + + Returns: + A string representing the instruction description. + """ + self._num_placeholders = num_placeholders + if self._num_placeholders is None or self._num_placeholders < 0: + self._num_placeholders = random.randint(1, _NUM_PLACEHOLDERS) + self._description_pattern = ( + "The response must contain at least {num_placeholders} placeholders " + + "represented by square brackets, such as [address]." + ) + return self._description_pattern.format(num_placeholders=self._num_placeholders) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"num_placeholders": self._num_placeholders} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_placeholders"] + + def check_following(self, value): + """Check if the number of placeholders follows the instruction. + + Args: + value: A string representing the response. + + Returns: + True if the actual number of placeholders in the response is greater than + or equal to `num_placeholders`; otherwise, False. + """ + placeholders = re.findall(r"\[.*?\]", value) + num_placeholders = len(placeholders) + return num_placeholders >= self._num_placeholders + + +class BulletListChecker(Instruction): + """Checks the bullet list in the prompt.""" + + def build_description(self, *, num_bullets=None): + """Build the instruction description. + + Args: + num_bullets: An integer specifying the exact number of bullet lists + that is required to appear in the response. + + Returns: + A string representing the instruction description. + """ + self._num_bullets = num_bullets + if self._num_bullets is None or self._num_bullets < 0: + self._num_bullets = random.randint(1, _NUM_BULLETS) + self._description_pattern = ( + "Your answer must contain exactly {num_bullets} bullet points. " + + "Use the markdown bullet points such as:\n" + + "* This is point 1. \n" + + "* This is point 2" + ) + return self._description_pattern.format(num_bullets=self._num_bullets) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"num_bullets": self._num_bullets} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_bullets"] + + def check_following(self, value): + r"""Check if the number of bullet lists meets the requirement. + + Args: + value: A string representing the response. The response is expected to + contain some bullet lists that start with `\*`. + + Returns: + True if the actual number of bullet lists in the response meets the + requirement. + """ + bullet_lists = re.findall(r"^\s*\*[^\*].*$", value, flags=re.MULTILINE) + bullet_lists_2 = re.findall(r"^\s*-.*$", value, flags=re.MULTILINE) + num_bullet_lists = len(bullet_lists) + len(bullet_lists_2) + return num_bullet_lists == self._num_bullets + + +class ConstrainedResponseChecker(Instruction): + """Checks the constrained response.""" + + def build_description(self): + """Build the instruction description.""" + # A sequence of string(s) representing the options of the expected response. + self._constrained_responses = _CONSTRAINED_RESPONSE_OPTIONS + self._description_pattern = "Answer with one of the following options: {response_options}" + return self._description_pattern.format(response_options=self._constrained_responses) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response matches the constrained options. + + Args: + value: A string representing the response. + + Returns: + True if the actual response contains one of the options in the constrained + responses; otherwise False. + """ + value = value.strip() + return any(constrained_response in value for constrained_response in self._constrained_responses) + + +class ConstrainedStartChecker(Instruction): + """Checks the response start.""" + + def build_description(self, *, starter=None): + """Build the instruction description. + + Args: + starter: A string representing the keyward that the response should start + with. + + Returns: + A string representing the instruction description. + """ + self._starter = starter.strip() if isinstance(starter, str) else starter + if self._starter is None: + self._starter = random.choice(_STARTER_OPTIONS) + self._description_pattern = ( + "During the conversation, when it is your turn, " + "please always start with {starter}" + ) + return self._description_pattern.format(starter=self._starter) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"starter": self._starter} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["starter"] + + def check_following(self, value): + """Checks if the response starts with the constrained keyword or phrase. + + Args: + value: A string representing the response. + + Returns: + True if the response starts with the given phrase or keyword that is + contained in `instruction_args`; otherwise, False. + """ + response_pattern = r"^\s*" + self._starter + r".*$" + response_with_constrained_start = re.search(response_pattern, value, flags=re.MULTILINE) + return bool(response_with_constrained_start) + + +class HighlightSectionChecker(Instruction): + """Checks the highlighted section.""" + + def build_description(self, *, num_highlights=None): + """Build the instruction description. + + Args: + num_highlights: An integer specifying the minimum number of highlighted + sections. + + Returns: + A string representing the instruction description. + """ + self._num_highlights = num_highlights + if self._num_highlights is None or self._num_highlights < 0: + self._num_highlights = random.randint(1, _NUM_HIGHLIGHTED_SECTIONS) + + self._description_pattern = ( + "Highlight at least {num_highlights} sections in your answer with " + + "markdown, i.e. *highlighted section*." + ) + + return self._description_pattern.format(num_highlights=self._num_highlights) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"num_highlights": self._num_highlights} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_highlights"] + + def check_following(self, value): + """Checks if the number of highlighted sections meets the requirement. + + Args: + value: a string repesenting the response. The response is expected to + contain highlighted sections in the format of *highlighted*. + + Returns: + True if the actual number of highlighted sections in the format of + *highlighed sections* meets the minimum requirement; otherwise False. + """ + num_highlights = 0 + highlights = re.findall(r"\*[^\n\*]*\*", value) + double_highlights = re.findall(r"\*\*[^\n\*]*\*\*", value) + for highlight in highlights: + if highlight.strip("*").strip(): + num_highlights += 1 + for highlight in double_highlights: + if highlight.removeprefix("**").removesuffix("**").strip(): + num_highlights += 1 + + return num_highlights >= self._num_highlights + + +class SectionChecker(Instruction): + """Checks the sections.""" + + def build_description(self, *, section_spliter=None, num_sections=None): + """Build the instruction description. + + Args: + section_spliter: A string represents the section spliter keyword that + marks a new section, i.e., `Section` or `SECTION`. + num_sections: An integer specifying the number of sections. + + Returns: + A string representing the instruction description. + """ + self._section_spliter = section_spliter.strip() if isinstance(section_spliter, str) else section_spliter + if self._section_spliter is None: + self._section_spliter = random.choice(_SECTION_SPLITER) + + self._num_sections = num_sections + if self._num_sections is None or self._num_sections < 0: + self._num_sections = random.randint(1, _NUM_SECTIONS) + + self._description_pattern = ( + "Your response must have {num_sections} sections. Mark the beginning " + + "of each section with {section_spliter} X, such as:\n" + + "{section_spliter} 1\n" + + "[content of section 1]\n" + + "{section_spliter} 2\n" + + "[content of section 2]" + ) + + return self._description_pattern.format(num_sections=self._num_sections, section_spliter=self._section_spliter) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"section_spliter": self._section_spliter, "num_sections": self._num_sections} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["section_spliter", "num_sections"] + + def check_following(self, value): + """Checks the response contains multiple sections. + + Args: + value: A string representing the response. The response is expected + to contain multiple sections (number of sections is greater than 1). + A new section starts with `Section 1`, where the number denotes the + section index. + + Returns: + True if the number of sections in the response is greater than or equal to + the minimum number of sections; otherwise, False. + """ + section_splitter_patten = r"\s?" + self._section_spliter + r"\s?\d+\s?" + sections = re.split(section_splitter_patten, value) + num_sections = len(sections) - 1 + return num_sections >= self._num_sections + + +class ParagraphChecker(Instruction): + """Checks the paragraphs.""" + + def build_description(self, *, num_paragraphs=None): + """Build the instruction description. + + Args: + num_paragraphs: An integer specifying the number of paragraphs. + + Returns: + A string representing the instruction description. + """ + self._num_paragraphs = num_paragraphs + if self._num_paragraphs is None or self._num_paragraphs < 0: + self._num_paragraphs = random.randint(1, _NUM_PARAGRAPHS) + + self._description_pattern = ( + "There should be {num_paragraphs} paragraphs. " + "Paragraphs are separated with the markdown divider: ***" + ) + + return self._description_pattern.format(num_paragraphs=self._num_paragraphs) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"num_paragraphs": self._num_paragraphs} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_paragraphs"] + + def check_following(self, value): + """Checks the response contains required number of paragraphs. + + Args: + value: A string representing the response. The response may contain + paragraphs that are separated by the markdown divider: `***`. + + Returns: + True if the actual number of paragraphs is the same as required; + otherwise, False. + """ + paragraphs = re.split(r"\s?\*\*\*\s?", value) + num_paragraphs = len(paragraphs) + + for index, paragraph in enumerate(paragraphs): + if not paragraph.strip(): + if index == 0 or index == len(paragraphs) - 1: + num_paragraphs -= 1 + else: + return False + + return num_paragraphs == self._num_paragraphs + + +class PostscriptChecker(Instruction): + """Checks the postscript.""" + + def build_description(self, *, postscript_marker=None): + """Build the instruction description. + + Args: + postscript_marker: A string containing the keyword that marks the start + of the postscript section. + + Returns: + A string representing the instruction description. + """ + self._postscript_marker = ( + postscript_marker.strip() if isinstance(postscript_marker, str) else postscript_marker + ) + if self._postscript_marker is None: + self._postscript_marker = random.choice(_POSTSCRIPT_MARKER) + + self._description_pattern = ( + "At the end of your response, please explicitly add a postscript " + "starting with {postscript}" + ) + + return self._description_pattern.format(postscript=self._postscript_marker) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"postscript_marker": self._postscript_marker} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["postscript_marker"] + + def check_following(self, value): + """Checks if the response follows the postscript format. + + Args: + value: a string representing the response. The response is expected to + contain a postscript section. + + Returns: + True if the response contains a postscript section starting with + the keyword containing in the `instruction_args`; otherwise False. + """ + value = value.lower() + if self._postscript_marker == "P.P.S": + postscript_pattern = r"\s*p\.\s?p\.\s?s.*$" + elif self._postscript_marker == "P.S.": + postscript_pattern = r"\s*p\.\s?s\..*$" + else: + postscript_pattern = r"\s*" + self._postscript_marker.lower() + r".*$" + postscript = re.findall(postscript_pattern, value, flags=re.MULTILINE) + return bool(postscript) + + +class RephraseChecker(Instruction): + """Checks the repharse.""" + + def build_description(self, *, original_message): + """Build the instruction description. + + Args: + original_message: A string representing the original message. The + rephrased response should only change its words/sentences in between + its two asterisks, for example, *change me*. Both original and rephrased + messages should contain the changes in the form of *change me*. + + Returns: + A string representing the instruction description. + """ + if not self.is_change(original_message): + raise ValueError(f"Message {original_message} does not contain changes in the form of *change me*.") + + self._reference_without_change = original_message + self._description = ( + "Rephrasing: Your rephrased response should only" + + "change the words/sentences in between two asterisks" + + "such as *change me*." + ) + return self._description + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"original_message": self._reference_without_change} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["original_message"] + + def check_following(self, value): + r"""Checks if the rephrasing follows the instruction. + + Args: + value: A string representing the response, which is expected to rephras + the string of `instruction_args`. + + Returns: + True if `value` and `instruction_args` only differ by the words/sentences + in between two asterisks such as *change me*; otherwise, False. + """ + + if not self.is_change(value): + raise ValueError(f"value {value} does not contain changes in the form of *change me*.") + + response_without_changes = self.strip_changes(value) + reference_without_changes = self.strip_changes(self._reference_without_change) + + return response_without_changes == reference_without_changes + + def is_change(self, response): + """Check if there is change in the response in the form of *change me*.""" + return re.search(r"\*.*\*", response) + + def strip_changes(self, response): + """Strips off the changes.""" + return re.sub(r"\*.*\*", "", response) + + +class KeywordChecker(Instruction): + """Check the exisitence of certain keywords.""" + + def build_description(self, *, keywords=None): + """Build the instruction description. + + Args: + keywords: A sequence of strings representing the keywords that are + expected in the response. + + Returns: + A string representing the instruction description. + """ + + if not keywords: + self._keywords = instructions_util.generate_keywords(num_keywords=_NUM_KEYWORDS) + else: + self._keywords = keywords + self._keywords = sorted(self._keywords) + + self._description_pattern = "Include keywords {keywords} in the response." + + return self._description_pattern.format(keywords=self._keywords) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keywords": self._keywords} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keywords"] + + def check_following(self, value): + """Check if the response contain the expected keywords.""" + return all(re.search(keyword, value, flags=re.IGNORECASE) for keyword in self._keywords) + + +class KeywordFrequencyChecker(Instruction): + """Check the keyword frequency.""" + + def build_description(self, *, keyword=None, frequency=None, relation=None): + """Build the instruction description. + + Args: + keyword: A string representing a keyword that is expected in the response. + frequency: An integer specifying the number of times `keyword` is expected + to appear in the response. + relation: A string in (`less than`, `at least`), defining the relational + operator for comparison. + Two relational comparisons are supported for now: + if 'less than', the actual number of occurrences < frequency; + if 'at least', the actual number of occurrences >= frequency. + + Returns: + A string representing the instruction description. + """ + if not keyword: + self._keyword = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword = keyword.strip() + + self._frequency = frequency + if self._frequency is None or self._frequency < 0: + self._frequency = random.randint(1, _KEYWORD_FREQUENCY) + + if relation is None: + self._comparison_relation = random.choice(_COMPARISON_RELATION) + elif relation not in _COMPARISON_RELATION: + raise ValueError( + f"The supported relation for comparison must be in {_COMPARISON_RELATION}, but {relation} is given." + ) + else: + self._comparison_relation = relation + + self._description_pattern = ( + "In your response, the word {keyword} should appear {relation} " + "{frequency} times." + ) + + return self._description_pattern.format( + keyword=self._keyword, relation=self._comparison_relation, frequency=self._frequency + ) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keyword": self._keyword, "frequency": self._frequency, "relation": self._comparison_relation} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword", "frequency", "relation"] + + def check_following(self, value): + """Checks if the response contain the keyword with required frequency.""" + actual_occurrences = len(re.findall(self._keyword, value, flags=re.IGNORECASE)) + + if self._comparison_relation == _COMPARISON_RELATION[0]: + return actual_occurrences < self._frequency + elif self._comparison_relation == _COMPARISON_RELATION[1]: + return actual_occurrences >= self._frequency # pytype: disable=bad-return-type + + +class NumberOfWords(Instruction): + """Checks the number of words.""" + + def build_description(self, *, num_words=None, relation=None): + """Build the instruction description. + + Args: + num_words: An integer specifying the number of words contained in the + response. + relation: A string in (`less than`, `at least`), defining the relational + operator for comparison. + Two relational comparisons are supported for now: + if 'less than', the actual number of words < num_words; + if 'at least', the actual number of words >= num_words. + + Returns: + A string representing the instruction description. + """ + + self._num_words = num_words + if self._num_words is None or self._num_words < 0: + self._num_words = random.randint(_NUM_WORDS_LOWER_LIMIT, _NUM_WORDS_UPPER_LIMIT) + + if relation is None: + self._comparison_relation = random.choice(_COMPARISON_RELATION) + elif relation not in _COMPARISON_RELATION: + raise ValueError( + f"The supported relation for comparison must be in {_COMPARISON_RELATION}, but {relation} is given." + ) + else: + self._comparison_relation = relation + + self._description_pattern = "Answer with {relation} {num_words} words." + + return self._description_pattern.format(relation=self._comparison_relation, num_words=self._num_words) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"num_words": self._num_words, "relation": self._comparison_relation} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_words", "relation"] + + def check_following(self, value): + """Checks if the response contains the expected number of words.""" + num_words = instructions_util.count_words(value) + + if self._comparison_relation == _COMPARISON_RELATION[0]: + return num_words < self._num_words + elif self._comparison_relation == _COMPARISON_RELATION[1]: + return num_words >= self._num_words # pytype: disable=bad-return-type + + +class JsonFormat(Instruction): + """Check the Json format.""" + + def build_description(self): + self._description_pattern = ( + "Entire output should be wrapped in JSON format. You can use markdown ticks such as ```." + ) + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + value = ( + value.strip() + .removeprefix("```json") + .removeprefix("```Json") + .removeprefix("```JSON") + .removeprefix("```") + .removesuffix("```") + .strip() + ) + try: + json.loads(value) + except ValueError: + return False + return True + + +class ParagraphFirstWordCheck(Instruction): + """Check the paragraph and the first word of the nth paragraph.""" + + def build_description(self, num_paragraphs=None, nth_paragraph=None, first_word=None): + r"""Build the instruction description. + + Args: + num_paragraphs: An integer indicating the number of paragraphs expected + in the response. A paragraph is a subset of the string that is + expected to be separated by '\n\n'. + nth_paragraph: An integer indicating the paragraph number that we look at. + Note that n starts from 1. + first_word: A string that represent the first word of the bth paragraph. + + Returns: + A string representing the instruction description. + """ + self._num_paragraphs = num_paragraphs + if self._num_paragraphs is None or self._num_paragraphs < 0: + self._num_paragraphs = random.randint(1, _NUM_PARAGRAPHS) + + self._nth_paragraph = nth_paragraph + if self._nth_paragraph is None or self._nth_paragraph <= 0 or self._nth_paragraph > self._num_paragraphs: + self._nth_paragraph = random.randint(1, self._num_paragraphs + 1) + + self._first_word = first_word + if self._first_word is None: + self._first_word = instructions_util.generate_keywords(num_keywords=1)[0] + self._first_word = self._first_word.lower() + + self._description_pattern = ( + "There should be {num_paragraphs} paragraphs. " + + "Paragraphs and only paragraphs are separated with each other by two " + + "new lines as if it was '\\n\\n' in python. " + + "Paragraph {nth_paragraph} must start with word {first_word}." + ) + + return self._description_pattern.format( + num_paragraphs=self._num_paragraphs, nth_paragraph=self._nth_paragraph, first_word=self._first_word + ) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return { + "num_paragraphs": self._num_paragraphs, + "nth_paragraph": self._nth_paragraph, + "first_word": self._first_word, + } + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_paragraphs", "nth_paragraph", "first_word"] + + def check_following(self, value): + """Checks for required number of paragraphs and correct first word. + + Args: + value: a string representing the response. The response may contain + paragraphs that are separated by two new lines and the first word of + the nth paragraph will have to match a specified word. + + Returns: + True if the number of paragraphs is the same as required and the first + word of the specified paragraph is the same as required. Otherwise, false. + """ + + paragraphs = re.split(r"\n\n", value) + num_paragraphs = len(paragraphs) + + for paragraph in paragraphs: + if not paragraph.strip(): + num_paragraphs -= 1 + + # check that index doesn't go out of bounds + if self._nth_paragraph <= num_paragraphs: + paragraph = paragraphs[self._nth_paragraph - 1].strip() + if not paragraph: + return False + else: + return False + + first_word = "" + punctuation = {".", ",", "?", "!", "'", '"'} + + # get first word and remove punctuation + word = paragraph.split()[0].strip() + # TODO(jeffrey): make more complex? + word = word.lstrip("'") + word = word.lstrip('"') + + for letter in word: + if letter in punctuation: + break + first_word += letter.lower() + + return num_paragraphs == self._num_paragraphs and first_word == self._first_word + + +# TODO(jeffrey) add relation - at least/at most? +class KeySentenceChecker(Instruction): + """Check the existence of certain key sentences.""" + + def build_description(self, key_sentences=None, num_sentences=None): + """Build the instruction description. + + Args: + key_sentences: A sequences of strings representing the key sentences that + are expected in the response. + num_sentences: The number of key sentences that are expected to be seen in + the response. + + Returns: + A string representing the instruction description. + """ + + if not key_sentences: + # TODO(jeffrey) make a generate sentences function? wonderwords package + self._key_sentences = set(["For now, this is fine."]) + else: + self._key_sentences = key_sentences + + if not num_sentences: + self._num_sentences = random.randint(1, len(self._key_sentences)) + else: + self._num_sentences = num_sentences + + self._description_pattern = "Include {num_sentences} of the following sentences {key_sentences}" + + return self._description_pattern.format(num_sentences=self._num_sentences, key_sentences=self._key_sentences) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"num_sentences": self._num_sentences, "key_sentences": list(self._key_sentences)} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["num_sentences", "key_sentences"] + + def check_following(self, value): + """Checks if the response contains the expected key sentences.""" + count = 0 + sentences = instructions_util.split_into_sentences(value) + for sentence in self._key_sentences: + if sentence in sentences: + count += 1 + + return count == self._num_sentences + + +class ForbiddenWords(Instruction): + """Checks that specified words are not used in response.""" + + def build_description(self, forbidden_words=None): + """Build the instruction description. + + Args: + forbidden_words: A sequences of strings respresenting words that are not + allowed in the response. + + Returns: + A string representing the instruction description. + """ + + if not forbidden_words: + self._forbidden_words = instructions_util.generate_keywords(num_keywords=_NUM_KEYWORDS) + else: + self._forbidden_words = list(set(forbidden_words)) + self._forbidden_words = sorted(self._forbidden_words) + self._description_pattern = "Do not include keywords {forbidden_words} in the response." + + return self._description_pattern.format(forbidden_words=self._forbidden_words) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"forbidden_words": self._forbidden_words} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["forbidden_words"] + + def check_following(self, value): + """Check if the response does not contain the expected keywords.""" + return all(not re.search(r"\b" + word + r"\b", value, flags=re.IGNORECASE) for word in self._forbidden_words) + + +class RephraseParagraph(Instruction): + """Checks that the paragraph is rephrased.""" + + def build_description(self, *, original_paragraph, low, high): + """Builds the instruction description. + + Args: + original_paragraph: A string presenting the original paragraph. The + rephrases response should have betweeb low-high words in common. + low: An integer presenting the lower bound of similar words. + high: An integer representing the upper bound of similar words. + + Returns: + A string representing the instruction description. + """ + # TODO(jeffrey) make more encompassing + self._original_paragraph = original_paragraph + self._low = low + self._high = high + + self._description = ( + "Rephrase the following paragraph: " + + "{original_paragraph}\nYour response should have " + + "between {low} and {high} of the same words. " + + "Words are the same if and only if all of the " + + "letters, ignoring cases, are the same. For " + + "example, 'run' is the same as 'Run' but different " + + "to 'ran'." + ) + + return self._description.format(original_paragraph=original_paragraph, low=self._low, high=self._high) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"original_paragraph": self._original_paragraph, "low": self._low, "high": self._high} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["original_paragraph", "low", "high"] + + def check_following(self, value): + val_words = re.findall(r"\w+", value.lower()) + original_words = re.findall(r"\w+", self._original_paragraph.lower()) + similar_words = 0 + + dict_val = collections.Counter(val_words) + dict_original = collections.Counter(original_words) + + for word in dict_original: + similar_words += min(dict_original[word], dict_val[word]) + + return similar_words >= self._low and similar_words <= self._high + + +class TwoResponsesChecker(Instruction): + """Check that two responses were given.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "Give two different responses. Responses and only responses should" + " be separated by 6 asterisk symbols: ******." + ) + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response has two different answers. + + Args: + value: A string representing the response. + + Returns: + True if two responses are detected and false otherwise. + """ + valid_responses = list() + responses = value.split("******") + for index, response in enumerate(responses): + if not response.strip(): + if index != 0 and index != len(responses) - 1: + return False + else: + valid_responses.append(response) + return len(valid_responses) == 2 and valid_responses[0].strip() != valid_responses[1].strip() + + +class RepeatPromptThenAnswer(Instruction): + """Checks that Prompt is first repeated then answered.""" + + def build_description(self, *, prompt_to_repeat=None): + """Build the instruction description. + + Args: + prompt_to_repeat: The prompt that is meant to be repeated. + + Returns: + A string representing the instruction description. + """ + if not prompt_to_repeat: + raise ValueError("prompt_to_repeat must be set.") + else: + self._prompt_to_repeat = prompt_to_repeat + self._description_pattern = ( + "First repeat the request word for word without change," + " then give your answer (1. do not say any words or characters" + " before repeating the request; 2. the request you need to repeat" + " does not include this sentence)" + ) + return self._description_pattern + + def get_instruction_args(self): + return {"prompt_to_repeat": self._prompt_to_repeat} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["prompt_to_repeat"] + + def check_following(self, value): + return bool(value.strip().lower().startswith(self._prompt_to_repeat.strip().lower())) + + +class EndChecker(Instruction): + """Checks that the prompt ends with a given phrase.""" + + def build_description(self, *, end_phrase=None): + """Build the instruction description. + + Args: + end_phrase: A string representing the phrase the response should end with. + + Returns: + A string representing the instruction description. + """ + self._end_phrase = end_phrase.strip() if isinstance(end_phrase, str) else end_phrase + if self._end_phrase is None: + self._end_phrase = random.choice(_ENDING_OPTIONS) + self._description_pattern = ( + "Finish your response with this exact phrase {ender}. No other words should follow this phrase." + ) + return self._description_pattern.format(ender=self._end_phrase) + + def get_instruction_args(self): + return {"end_phrase": self._end_phrase} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["end_phrase"] + + def check_following(self, value): + """Checks if the response ends with the expected phrase.""" + value = value.strip().strip('"').lower() + self._end_phrase = self._end_phrase.strip().lower() + return value.endswith(self._end_phrase) + + +class TitleChecker(Instruction): + """Checks the response for a title.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "Your answer must contain a title, wrapped in double angular brackets, such as <>." + ) + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response contains a title.""" + pattern = r"<<[^\n]+>>" + re_pattern = re.compile(pattern) + titles = re.findall(re_pattern, value) + + return any(title.lstrip("<").rstrip(">").strip() for title in titles) + + +class LetterFrequencyChecker(Instruction): + """Checks letter frequency.""" + + def build_description(self, *, letter=None, let_frequency=None, let_relation=None): + """Build the instruction description. + + Args: + letter: A string representing a letter that is expected in the response. + let_frequency: An integer specifying the number of times `keyword` is + expected to appear in the response. + let_relation: A string in (`less than`, `at least`), defining the + relational operator for comparison. Two relational comparisons are + supported for now; if 'less than', the actual number of + occurrences < frequency; if 'at least', the actual number of + occurrences >= frequency. + + Returns: + A string representing the instruction description. + """ + if not letter or len(letter) > 1 or ord(letter.lower()) < 97 or ord(letter.lower()) > 122: + self._letter = random.choice(list(string.ascii_letters)) + else: + self._letter = letter.strip() + self._letter = self._letter.lower() + + self._frequency = let_frequency + if self._frequency is None or self._frequency < 0: + self._frequency = random.randint(1, _LETTER_FREQUENCY) + + if let_relation is None: + self._comparison_relation = random.choice(_COMPARISON_RELATION) + elif let_relation not in _COMPARISON_RELATION: + raise ValueError( + "The supported relation for comparison must be in " + f"{_COMPARISON_RELATION}, but {let_relation} is given." + ) + else: + self._comparison_relation = let_relation + + self._description_pattern = ( + "In your response, the letter {letter} should appear {let_relation} {let_frequency} times." + ) + + return self._description_pattern.format( + letter=self._letter, let_frequency=self._frequency, let_relation=self._comparison_relation + ) + + def get_instruction_args(self): + """Returns the keyword args of build description.""" + return {"letter": self._letter, "let_frequency": self._frequency, "let_relation": self._comparison_relation} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["letter", "let_frequency", "let_relation"] + + def check_following(self, value): + """Checks that the response contains the letter at the right frequency.""" + value = value.lower() + letters = collections.Counter(value) + + if self._comparison_relation == _COMPARISON_RELATION[0]: + return letters[self._letter] < self._frequency + else: + return letters[self._letter] >= self._frequency + + +class CapitalLettersEnglishChecker(Instruction): + """Checks that the response is in english and is in all capital letters.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Your entire response should be in English, and in all capital letters." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks that the response is in English and in all capital letters.""" + assert isinstance(value, str) + + try: + return value.isupper() and langdetect.detect(value) == "en" + except langdetect.LangDetectException as e: + # Count as instruction is followed. + logging.error("Unable to detect language for text %s due to %s", value, e) # refex: disable=pytotw.037 + return True + + +class LowercaseLettersEnglishChecker(Instruction): + """Checks that the response is in english and is in all lowercase letters.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "Your entire response should be in English, and in all lowercase letters. No capital letters are allowed." + ) + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks that the response is in English and in all lowercase letters.""" + assert isinstance(value, str) + + try: + return value.islower() and langdetect.detect(value) == "en" + except langdetect.LangDetectException as e: + # Count as instruction is followed. + logging.error("Unable to detect language for text %s due to %s", value, e) # refex: disable=pytotw.037 + return True + + +class CommaChecker(Instruction): + """Checks the response for no commas.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "In your entire response, refrain from the use of any commas." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks that the response does not contain commas.""" + return not re.search(r"\,", value) + + +class CapitalWordFrequencyChecker(Instruction): + """Checks frequency of words with all capital letters.""" + + def build_description(self, capital_frequency=None, capital_relation=None): + """Build the instruction description. + + Args: + capital_frequency: An integer that represents the number of words that + should be in all capital letters. + capital_relation: A string that is 'at least' or 'at most' that refers to + the frequency. + + Returns: + A string representing the instruction description. + """ + self._frequency = capital_frequency + if self._frequency is None: + self._frequency = random.randint(1, _ALL_CAPITAL_WORD_FREQUENCY) + + self._comparison_relation = capital_relation + if capital_relation is None: + self._comparison_relation = random.choice(_COMPARISON_RELATION) + elif capital_relation not in _COMPARISON_RELATION: + raise ValueError( + "The supported relation for comparison must be in " + f"{_COMPARISON_RELATION}, but {capital_relation} is given." + ) + + self._description_pattern = ( + "In your response, words with all capital letters should appear {relation} {frequency} times." + ) + + return self._description_pattern.format(frequency=self._frequency, relation=self._comparison_relation) + + def get_instruction_args(self): + """Returns the keyword args of build description.""" + return {"capital_frequency": self._frequency, "capital_relation": self._comparison_relation} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["capital_frequency", "capital_relation"] + + def check_following(self, value): + """Checks the frequency of words with all capital letters.""" + # Hyphenated words will count as one word + words = instructions_util.nltk.word_tokenize(value) + capital_words = [word for word in words if word.isupper()] + + capital_words = len(capital_words) + + if self._comparison_relation == _COMPARISON_RELATION[0]: + return capital_words < self._frequency + else: + return capital_words >= self._frequency + + +class QuotationChecker(Instruction): + """Checks response is wrapped with double quotation marks.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Wrap your entire response with double quotation marks." + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyword args of build description.""" + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response is wrapped with double quotation marks.""" + value = value.strip() + return len(value) > 1 and value[0] == '"' and value[-1] == '"' + + +class RepeatPhraseChecker(Instruction): + "Repeat the phrase {phrase} exactly {small_n} times, transforming it slightly each time by replacing only one word in the center of the phrase." + + def build_description(self, phrase=None, small_n=None): + """Build the instruction description. + + Args: + phrase: A string representing the phrase to be repeated. + N: An integer representing the number of times to repeat the phrase. + word_count: An integer representing the number of words in the phrase. + + Returns: + A string representing the instruction description. + """ + if not phrase: + self._phrase = random.choice(_PHRASES) + else: + self._phrase = phrase.strip() + if not small_n: + self._small_n = random.randint(2, 3) + else: + self._small_n = small_n + + self._description_pattern = "Repeat the phrase {phrase} exactly {small_n} times, transforming it slightly each time by replacing only one word in the center of the phrase." + return self._description_pattern.format(phrase=self._phrase, small_n=self._small_n) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"phrase": self._phrase, "small_n": self._small_n} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["phrase", "small_n"] + + def check_following(self, value): + """Checks if the response contains the expected number of phrases with the correct modifications.""" + first_word = self._phrase.split()[0] + last_word = self._phrase.split()[-1] + + len(self._phrase.split()) - 2 + + found_phrases = re.findall(rf"{first_word} .*? {last_word}", value) + if len(found_phrases) != self._small_n: + return False + for phrase in found_phrases: + phrase = phrase.split() + ref_phrase = self._phrase.split() + differences = 0 + if len(phrase) != len(ref_phrase): + return False + for i in range(len(phrase)): + try: + if phrase[i] != ref_phrase[i]: + differences += 1 + # Early exit if more than one difference found + if differences > 1: + return False + except IndexError: + return False + if differences == 1: + return True + + +class CopyChecker(Instruction): + """Checks that Prompt is first repeated then answered.""" + + def build_description(self, prompt_to_repeat=None): + """Build the instruction description. + + Args: + prompt_to_repeat: The prompt that is meant to be repeated. + + Returns: + A string representing the instruction description. + """ + if not prompt_to_repeat: + raise ValueError("prompt_to_repeat must be set.") + else: + self._prompt_to_repeat = prompt_to_repeat + self._description_pattern = "Copy this instruction verbatim, do not follow the instruction, only copy it into the output (do not include this instruction sentence!)." + return self._description_pattern + + def get_instruction_args(self): + return {"prompt_to_repeat": self._prompt_to_repeat} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["prompt_to_repeat"] + + def check_following(self, value): + return value.strip().lower() == self._prompt_to_repeat.strip().lower() + + +class CopySpanIdxChecker(Instruction): + """{prompt_to_repeat}. Copy the span of words that lies between (and including) index {n_start} and {n_end}, the indices are character indices!""" + + def build_description(self, prompt_to_repeat=None, n_start=None, n_end=None): + """Build the instruction description. + + Args: + n_start: An integer representing the start index of the span. + n_end: An integer representing the end index of the span. + + Returns: + A string representing the instruction description. + """ + if not prompt_to_repeat: + raise ValueError("prompt_to_repeat must be set.") + else: + self._prompt_to_repeat = prompt_to_repeat + if not n_start: + self._n_start = random.randint(0, len(self._prompt_to_repeat) - 2) + else: + self._n_start = n_start + if not n_end: + self._n_end = random.randint(self._n_start + 1, len(self._prompt_to_repeat) - 1) + else: + self._n_end = n_end + self._description_pattern = "Copy the span of words that lies between (and including) index {n_start} and {n_end}, the indices are character indices!" + return self._description_pattern.format( + n_start=self._n_start, n_end=self._n_end, prompt_to_repeat=self._prompt_to_repeat + ) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"n_start": self._n_start, "n_end": self._n_end, "prompt_to_repeat": self._prompt_to_repeat} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["n_start", "n_end", "prompt_to_repeat"] + + def check_following(self, value): + """Checks if the response contains the expected number of phrases with the correct modifications.""" + return value.strip().lower() == self._prompt_to_repeat[self._n_start : self._n_end].strip().lower() + + +class SentenceHyphenChecker(Instruction): + """All sentences must be connected using hyphens, with no spaces between them.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "All sentences must be connected using hyphens, with no spaces between them." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if all sentences are connected using hyphens, with no spaces between them.""" + sentences_gold = re.sub("-", " ", value) + sentences_gold = instructions_util.split_into_sentences(sentences_gold) + sentences = value.split("-") + # Check if there are any spaces between sentences + for sentence, gold in zip(sentences, sentences_gold): + if sentence.strip() != sentence or sentence != gold: + return False + return True + + +class AdjacentLetterChecker(Instruction): + """No two adjacent words can start with consecutive letters of the alphabet.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "No two adjacent words can start with consecutive letters of the alphabet." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if no two adjacent words start with consecutive letters of the alphabet.""" + words = value.split() + for i in range(len(words) - 1): + first_letter = words[i][0].lower() + second_letter = words[i + 1][0].lower() + if len(first_letter) != 1 or len(second_letter) != 1: + return False + if ord(second_letter) - ord(first_letter) == 1: + return False + return True + + +class SquareBracketChecker(Instruction): + """Enclose every word in your response within square brackets.""" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Enclose every word in your response within square brackets." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if every word in the response is enclosed within square brackets.""" + words = value.split() + return all(word.startswith("[") and word.endswith("]") for word in words) + + +class KeywordFrequencyOnceChecker(Instruction): + """Check the keyword frequency.""" + + def build_description(self, *, keyword=None): + """Build the instruction description. + + Args: + keyword: A string representing a keyword that is expected in the response. + frequency: An integer specifying the number of times `keyword` is expected + to appear in the response. + relation: A string in (`less than`, `at least`), defining the relational + operator for comparison. + Two relational comparisons are supported for now: + if 'less than', the actual number of occurrences < frequency; + if 'at least', the actual number of occurrences >= frequency. + + Returns: + A string representing the instruction description. + """ + if not keyword: + self._keyword = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword = keyword.strip() + + self._frequency = 1 + + self._description_pattern = "Include keyword {keyword} in your response." + + return self._description_pattern.format(keyword=self._keyword, frequency=self._frequency) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keyword": self._keyword} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword"] + + def check_following(self, value): + """Checks if the response contain the keyword with required frequency.""" + actual_occurrences = len(re.findall(self._keyword, value, flags=re.IGNORECASE)) + + return actual_occurrences == 1 + + +class KeywordFrequencyCheckerDifferent(Instruction): + """Check the keyword frequency.""" + + def build_description(self, *, keyword=None, frequency=None, relation=None): + """Build the instruction description. + + Args: + keyword: A string representing a keyword that is expected in the response. + frequency: An integer specifying the number of times `keyword` is expected + to appear in the response. + relation: A string in (`less than`, `at least`), defining the relational + operator for comparison. + Two relational comparisons are supported for now: + if 'less than', the actual number of occurrences < frequency; + if 'at least', the actual number of occurrences >= frequency. + + Returns: + A string representing the instruction description. + """ + if not keyword: + self._keyword = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._keyword = keyword.strip() + + self._frequency = frequency + if self._frequency is None or self._frequency < 0: + self._frequency = random.randint(1, _KEYWORD_FREQUENCY) + + if relation is None: + self._comparison_relation = random.choice(_COMPARISON_RELATION) + elif relation not in _COMPARISON_RELATION: + raise ValueError( + f"The supported relation for comparison must be in {_COMPARISON_RELATION}, but {relation} is given." + ) + else: + self._comparison_relation = relation + + self._description_pattern = "In your response, the word {keyword} should appear {frequency} times." + + return self._description_pattern.format( + keyword=self._keyword, relation=self._comparison_relation, frequency=self._frequency + ) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keyword": self._keyword, "frequency": self._frequency, "relation": self._comparison_relation} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword", "frequency", "relation"] + + def check_following(self, value): + """Checks if the response contain the keyword with required frequency.""" + actual_occurrences = len(re.findall(self._keyword, value, flags=re.IGNORECASE)) + + if self._comparison_relation == _COMPARISON_RELATION[0]: + return actual_occurrences < self._frequency + elif self._comparison_relation == _COMPARISON_RELATION[1]: + return actual_occurrences >= self._frequency # pytype: disable=bad-return-type + + +class ExcludeWordHarderChecker(Instruction): + """Checks that specified words are not used in response.""" + + def build_description(self, keyword=None, instruction=None): + """Build the instruction description. + + Args: + forbidden_words: A sequences of strings respresenting words that are not + allowed in the response. + + Returns: + A string representing the instruction description. + """ + if not keyword: + self._keyword = random.choice(instruction.split()) + else: + self._keyword = keyword.strip() + + self._description_pattern = "Do not include keyword {keyword} in the response." + + return self._description_pattern.format(keyword=self._keyword) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keyword": self._keyword} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword"] + + def check_following(self, value): + """Check if the response does not contain the expected keywords.""" + return " " + self._keyword + " " not in value + + +class ParagraphBasicChecker(Instruction): + """Checks the paragraphs.""" + + def build_description(self): + """Build the instruction description. + + Args: + num_paragraphs: An integer specifying the number of paragraphs. + + Returns: + A string representing the instruction description. + """ + self._description_pattern = ( + "There should be 2 paragraphs. " + "Paragraphs are separated with the markdown divider: ***" + ) + + return self._description_pattern + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks the response contains required number of paragraphs. + + Args: + value: A string representing the response. The response may contain + paragraphs that are separated by the markdown divider: `***`. + + Returns: + True if the actual number of paragraphs is the same as required; + otherwise, False. + """ + paragraphs = re.split(r"\s?\*\*\*\s?", value) + num_paragraphs = len(paragraphs) + + for index, paragraph in enumerate(paragraphs): + if not paragraph.strip(): + if index == 0 or index == len(paragraphs) - 1: + num_paragraphs -= 1 + else: + return False + + return num_paragraphs == 2 + + +class ParagraphBasicChecker2(Instruction): + """Checks the paragraphs.""" + + def build_description(self): + """Build the instruction description. + + Args: + num_paragraphs: An integer specifying the number of paragraphs. + + Returns: + A string representing the instruction description. + """ + self._description_pattern = "There should be 2 paragraphs. Paragraphs and only paragraphs are separated with each other by two line breaks. " + + return self._description_pattern.format() + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks the response contains required number of paragraphs. + + Args: + value: A string representing the response. The response may contain + paragraphs that are separated by the markdown divider: `***`. + + Returns: + True if the actual number of paragraphs is the same as required; + otherwise, False. + """ + paragraphs = re.split(r"\n\n", value) + num_paragraphs = len(paragraphs) + + for index, paragraph in enumerate(paragraphs): + if not paragraph.strip(): + if index == 0 or index == len(paragraphs) - 1: + num_paragraphs -= 1 + else: + return False + + return num_paragraphs == 2 + + +class FirstWordSentChecker(Instruction): + """The first word of each sentence should be the word {first_word}.""" + + def build_description(self, first_word=None): + """Build the instruction description. + + Args: + first_word: A string representing the first word of each sentence. + + Returns: + A string representing the instruction description. + """ + if not first_word: + self._first_word = instructions_util.generate_keywords(num_keywords=1)[0] + else: + if not isinstance(first_word, str): + self._first_word = first_word[0].strip() + else: + self._first_word = first_word.strip() + + self._description_pattern = "The first word of each sentence should be the word {first_word}." + + return self._description_pattern.format(first_word=self._first_word) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"first_word": self._first_word} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["first_word"] + + def check_following(self, value): + """Checks if the first word of each sentence is the expected word. + + Args: + value: A string representing the response. + + Returns: + True if the first word of each sentence is the expected word; + otherwise, False. + """ + sentences = instructions_util.split_into_sentences(value) + + # Check if the first word of each sentence matches the expected word + for sentence in sentences: + if not sentence.strip(): + return False + first_word = sentence.split()[0].strip() + if first_word.lower() != self._first_word.lower(): + return False + return True + + +class FirstWordAnswerChecker(Instruction): + """The first word of each sentence should be the word {first_word}.""" + + def build_description(self, first_word=None): + """Build the instruction description. + + Args: + first_word: A string representing the first word of each sentence. + + Returns: + A string representing the instruction description. + """ + if not first_word: + self._first_word = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._first_word = first_word.strip() + + self._description_pattern = "The first word of your response should be the word {first_word}." + + return self._description_pattern.format(first_word=self._first_word) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"first_word": self._first_word} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["first_word"] + + def check_following(self, value): + """Checks if the first word of each sentence is the expected word. + + Args: + value: A string representing the response. + + Returns: + True if the first word of each sentence is the expected word; + otherwise, False. + """ + if not value.strip() or len(value.split()) == 0: + return False + first_word = value.split()[0].strip() + return first_word.lower() == self._first_word.lower() + + +class LastWordSentChecker(Instruction): + """The last word of each sentence should be the word {last_word}.""" + + def build_description(self, last_word=None): + """Build the instruction description. + + Args: + first_word: A string representing the last word of each sentence. + + Returns: + A string representing the instruction description. + """ + if not last_word: + self._last_word = instructions_util.generate_keywords(num_keywords=1)[0] + else: + if not isinstance(last_word, str): + self._last_word = last_word[0].strip() + else: + self._last_word = last_word.strip() + + self._description_pattern = ( + "The last word of each sentence, before punctuation, should be the word {last_word}." + ) + + return self._description_pattern.format(last_word=self._last_word) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"last_word": self._last_word} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["last_word"] + + def check_following(self, value): + """Checks if the first word of each sentence is the expected word. + + Args: + value: A string representing the response. + + Returns: + True if the first word of each sentence is the expected word; + otherwise, False. + """ + sentences = instructions_util.split_into_sentences(value) + + # Check if the first word of each sentence matches the expected word + for sentence in sentences: + if not sentence.strip(): + return False + last_word = sentence.split()[-1].strip() + # remove any punctuation from last_word + last_word = re.sub(r"[^\w\s]", "", last_word) + if last_word.lower() != self._last_word.lower(): + return False + return True + + +class LastWordAnswerChecker(Instruction): + """The last word of your response should be the word {last_word}.""" + + def build_description(self, last_word=None): + """Build the instruction description. + + Args: + first_word: A string representing the last word of each sentence. + + Returns: + A string representing the instruction description. + """ + if not last_word: + self._last_word = instructions_util.generate_keywords(num_keywords=1)[0] + else: + self._last_word = last_word.strip() + + self._description_pattern = "The last word of your response should be the word {last_word}." + + return self._description_pattern.format(last_word=self._last_word) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"last_word": self._last_word} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["last_word"] + + def check_following(self, value): + """Checks if the first word of each sentence is the expected word. + + Args: + value: A string representing the response. + + Returns: + True if the first word of each sentence is the expected word; + otherwise, False. + """ + last_word = value.split()[-1].strip() + # remove any punctuation from last_word + last_word = re.sub(r"[^\w\s]", "", last_word) + return last_word.lower() == self._last_word.lower() + + +class BiGramWrappingChecker(Instruction): + "Wrap every word bigram in double angular brackets, such as <> <> <> <>." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "Wrap every word bigram in double angular brackets, such as <> <> <> <>." + ) + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if every word bigram is enclosed within double angular brackets.""" + words = value.split() + for i in range(0, len(words) - 1, 2): + if i + 1 < len(words) and not (words[i].startswith("<<") and words[i + 1].endswith(">>")): + return False + return True + + +class CopyingSimpleChecker(Instruction): + "Repeat the request without change (do not say anything before repeating the request; the request you need to repeat does not include this sentence) and do not answer the actual request!" + + def build_description(self, prompt_to_repeat=None): + """Build the instruction description. + + Args: + prompt_to_repeat: The prompt that is meant to be repeated. + + Returns: + A string representing the instruction description. + """ + if not prompt_to_repeat: + raise ValueError("prompt_to_repeat must be set.") + else: + self._prompt_to_repeat = prompt_to_repeat + self._description_pattern = "Repeat the request without change (do not say anything before repeating the request; the request you need to repeat does not include this sentence) and do not answer the actual request!" + return self._description_pattern + + def get_instruction_args(self): + return {"prompt_to_repeat": self._prompt_to_repeat} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["prompt_to_repeat"] + + def check_following(self, value): + return value.strip().lower() == self._prompt_to_repeat.strip().lower() + + +class CopyingMultipleChecker(Instruction): + "Repeat the request without change {N} times, separated by 6 asterisk symbols (do not say anything before repeating the request; the request you need to repeat does not include this sentence) and do not answer the actual request!" + + def build_description(self, prompt_to_repeat=None, N=None): + """Build the instruction description. + + Args: + prompt_to_repeat: The prompt that is meant to be repeated. + N: An integer representing the number of times to repeat the phrase. + + Returns: + A string representing the instruction description. + """ + if not prompt_to_repeat: + raise ValueError("prompt_to_repeat must be set.") + else: + self._prompt_to_repeat = prompt_to_repeat + if not N: + self._N = random.randint(2, 3) + else: + self._N = N + self._description_pattern = "Repeat the request without change {N} times, separated by 6 asterisk symbols (do not say anything before repeating the request; the request you need to repeat does not include this sentence) and do not answer the actual request!" + return self._description_pattern.format(N=self._N) + + def get_instruction_args(self): + return {"prompt_to_repeat": self._prompt_to_repeat, "N": self._N} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["prompt_to_repeat", "N"] + + def check_following(self, value): + prompts = value.split("******") + if len(prompts) != self._N: + return False + return all(prompt.strip().lower() == self._prompt_to_repeat.strip().lower() for prompt in prompts) + + +class PunctuationDotChecker(Instruction): + "In your entire response, refrain from the use of . (i.e. dots) as punctuation and in general." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = ( + "In your entire response, refrain from the use of . (i.e. dots) as punctuation and in general." + ) + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks that the response does not contain dots.""" + return not re.search(r"\.", value) + + +class PunctuationExclamationChecker(Instruction): + "In your entire response, refrain from the use of ! (i.e. exclamation marks) as punctuation and in general." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "In your entire response, refrain from the use of ! (i.e. exclamation marks) as punctuation and in general." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks that the response does not contain exclamation marks.""" + return not re.search(r"\!", value) + + +class LowercaseCountingChecker(Instruction): + "In your response, all lowercase words should appear at most {N} times." + + def build_description(self, N=None): + """Build the instruction description. + + Args: + N: An integer representing the maximum number of lowercase words allowed. + + Returns: + A string representing the instruction description. + """ + if not N: + self._N = random.randint(2, 3) + else: + self._N = N + self._description_pattern = "In your response, all lowercase words should appear at most {N} times." + return self._description_pattern.format(N=self._N) + + def get_instruction_args(self): + return {"N": self._N} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["N"] + + def check_following(self, value): + """Checks that the response does not contain lowercase words more than N times.""" + lowercase_words = re.findall(r"\b[a-z]+\b", value) + return len(lowercase_words) <= self._N + + +class LetterCountingChecker(Instruction): + "Answer with {relation} {N} letters." + + def build_description(self, N=None, relation=None): + """Build the instruction description. + + Args: + N: An integer representing the maximum number of letters allowed. + + Returns: + A string representing the instruction description. + """ + if not N: + self._N = random.randint(2, 3) + else: + self._N = N + if not relation: + self._relation = random.choice(_COMPARISON_RELATION) + else: + self._relation = relation + self._description_pattern = "Answer with {relation} {N} letters." + return self._description_pattern.format(N=self._N, relation=self._relation) + + def get_instruction_args(self): + return {"N": self._N, "relation": self._relation} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["N", "relation"] + + def check_following(self, value): + """Checks that the response does not contain lowercase words more than N times.""" + letters = re.findall(r"[a-zA-Z]", value) + if self._relation == "at least": + return len(letters) >= self._N + elif self._relation == "less than": + return len(letters) < self._N + + +class CountingCompositionChecker(Instruction): + "Write 3 paragraphs, delimited by the markdown divider: * * *, with exactly {n_sent} sentences each, with exactly {n_words} words in each sentence." + + def build_description(self, n_sent=None, n_words=None): + """Build the instruction description. + + Args: + n_sent: An integer representing the number of sentences in each paragraph. + n_words: An integer representing the number of words in each sentence. + + Returns: + A string representing the instruction description. + """ + if not n_sent: + self._n_sent = random.randint(2, 3) + else: + self._n_sent = n_sent + if not n_words: + self._n_words = random.randint(2, 3) + else: + self._n_words = n_words + self._description_pattern = "Write 3 paragraphs, delimited by the markdown divider: * * *, with exactly {n_sent} sentences each, with exactly {n_words} words in each sentence." + return self._description_pattern.format(n_sent=self._n_sent, n_words=self._n_words) + + def get_instruction_args(self): + return {"n_sent": self._n_sent, "n_words": self._n_words} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["n_sent", "n_words"] + + def check_following(self, value): + """Checks that the response contains the expected number of paragraphs, sentences, and words. + + Args: + value: A string representing the response. + + Returns: + True if the response meets the requirements; otherwise, False. + """ + paragraphs = re.split(r"\s?\*\*\*\s?", value) + num_paragraphs = len(paragraphs) + + for index, paragraph in enumerate(paragraphs): + if not paragraph.strip(): + if index == 0 or index == len(paragraphs) - 1: + num_paragraphs -= 1 + else: + return False + + sentences = instructions_util.split_into_sentences(paragraph) + num_sentences = len(sentences) + + if num_sentences != self._n_sent: + return False + + for sentence in sentences: + words = instructions_util.nltk.word_tokenize(sentence) + num_words = len(words) + + if num_words != self._n_words: + return False + + return num_paragraphs == 3 + + +class CountUniqueChecker(Instruction): + "Only use unique words in your response, no word should be repeated!" + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Only use unique words in your response, no word should be repeated!" + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks that the response contains unique words.""" + words = instructions_util.nltk.word_tokenize(value) + unique_words = set(words) + return len(words) == len(unique_words) + + +class CountIncrementWordChecker(Instruction): + "Include keyword {keyword1} once in your response, keyword {keyword2} twice in your response." + + def build_description(self, keyword1=None, keyword2=None): + """Build the instruction description. + + Args: + keyword1: A string representing a keyword that is expected in the response. + keyword2: A string representing a keyword that is expected in the response. + + Returns: + A string representing the instruction description. + """ + if not keyword1: + self._keyword1 = instructions_util.generate_keywords(num_keywords=1) + else: + self._keyword1 = keyword1.strip() + if not keyword2: + self._keyword2 = instructions_util.generate_keywords(num_keywords=1) + else: + self._keyword2 = keyword2.strip() + + self._description_pattern = ( + "Include keyword {keyword1} once in your response, keyword {keyword2} twice in your response." + ) + + return self._description_pattern.format(keyword1=self._keyword1, keyword2=self._keyword2) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keyword1": self._keyword1, "keyword2": self._keyword2} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword1", "keyword2"] + + def check_following(self, value): + """Checks if the response contains the expected number of keywords. + + Args: + value: A string representing the response. + + Returns: + True if the response contains the expected number of keywords; + otherwise, False. + """ + actual_occurrences1 = len(re.findall(self._keyword1, value, flags=re.IGNORECASE)) + actual_occurrences2 = len(re.findall(self._keyword2, value, flags=re.IGNORECASE)) + + return bool(actual_occurrences1 == 1 and actual_occurrences2 == 2) + + +class PalindromeBasicChecker(Instruction): + "Include a palindrome in your response." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Include a palindrome in your response." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response contains a palindrome. + + Args: + value: A string representing the response. + + Returns: + True if the response contains a palindrome; otherwise, False. + """ + palindromes = [word for word in value.split() if word == word[::-1]] + return len(palindromes) > 0 + + +class KeywordSpecificPositionChecker(Instruction): + "Include keyword {keyword1} in the {n}-th sentence, as the {m}-th word of that sentence." + + def build_description(self, keyword=None, n=None, m=None): + """Build the instruction description. + + Args: + keyword: A string representing a keyword that is expected in the response. + n: An integer representing the sentence number. + m: An integer representing the word number. + + Returns: + A string representing the instruction description. + """ + if not keyword: + self._keyword = instructions_util.generate_keywords(num_keywords=1)[0] + else: + if not isinstance(keyword, str): + self._keyword = keyword[0].strip() + else: + self._keyword = keyword.strip() + if not n: + self._n = random.randint(1, 20) + else: + self._n = n + if not m: + self._m = random.randint(1, 30) + else: + self._m = m + + self._description_pattern = ( + "Include keyword {keyword} in the {n}-th sentence, as the {m}-th word of that sentence." + ) + + return self._description_pattern.format(keyword=self._keyword, n=self._n, m=self._m) + + def get_instruction_args(self): + """Returns the keyward args of `build_description`.""" + return {"keyword": self._keyword, "n": self._n, "m": self._m} + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return ["keyword", "n", "m"] + + def check_following(self, value): + """Checks if the response contains the expected number of keywords. + + Args: + value: A string representing the response. + + Returns: + True if the response contains the expected number of keywords; + otherwise, False. + """ + sentences = instructions_util.split_into_sentences(value) + if len(sentences) < self._n: + return False + words = instructions_util.nltk.word_tokenize(sentences[self._n - 1]) + if len(words) < self._m: + return False + return words[self._m - 1] == self._keyword + + +class StartEndChecker(Instruction): + "Start and end your response with the same word (do not write anything after the last word, not even punctuation)." + + def build_description(self): + """Build the instruction description.""" + self._description_pattern = "Start and end your response with the same word (do not write anything after the last word, not even punctuation)." + return self._description_pattern + + def get_instruction_args(self): + return None + + def get_instruction_args_keys(self): + """Returns the args keys of `build_description`.""" + return [] + + def check_following(self, value): + """Checks if the response starts and ends with the same word. + + Args: + value: A string representing the response. + + Returns: + True if the response starts and ends with the same word; + otherwise, False. + """ + words = instructions_util.nltk.word_tokenize(value) + if len(words) < 2: + return False + return words[0].lower() == words[-1].lower() diff --git a/eval_protocol/rewards/ifeval/ifeval_registry.py b/eval_protocol/rewards/ifeval/ifeval_registry.py new file mode 100644 index 00000000..5ac9a96f --- /dev/null +++ b/eval_protocol/rewards/ifeval/ifeval_registry.py @@ -0,0 +1,315 @@ +# Copyright 2024 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Registry of all instructions.""" + +try: + from . import ifeval_instructions as instructions +except ImportError: + import ifeval_instructions as instructions + +_PARAGRAPH = "paragraphs:" + +_KEYWORD = "keywords:" + +_LETTER = "letters:" + +_LANGUAGE = "language:" + +_LENGTH = "length_constraints:" + +_CONTENT = "detectable_content:" + +_FORMAT = "detectable_format:" + +_MULTITURN = "multi-turn:" + +_COMBINATION = "combination:" + +_STARTEND = "startend:" + +_CHANGE_CASES = "change_case:" + +_PUNCTUATION = "punctuation:" + +_NEW = "new:" + +_COPY = "copy:" + +_BASIC = "basic:" + +_FIRSTWORD = "first_word:" + +_LASTWORD = "last_word:" + +_COUNT = "count:" + + +FUNCTION_DICT = { + # IFEval Constraints + _KEYWORD + "existence": instructions.KeywordChecker, + _KEYWORD + "frequency": instructions.KeywordFrequencyChecker, + # TODO(jeffreyzhou): make a proper set of sentences to choose from + # _KEYWORD + "key_sentences": instructions.KeySentenceChecker, + _KEYWORD + "forbidden_words": instructions.ForbiddenWords, + _KEYWORD + "letter_frequency": instructions.LetterFrequencyChecker, + _LANGUAGE + "response_language": instructions.ResponseLanguageChecker, + _LENGTH + "number_sentences": instructions.NumberOfSentences, + _LENGTH + "number_paragraphs": instructions.ParagraphChecker, + _LENGTH + "number_words": instructions.NumberOfWords, + _LENGTH + "nth_paragraph_first_word": instructions.ParagraphFirstWordCheck, + _CONTENT + "number_placeholders": instructions.PlaceholderChecker, + _CONTENT + "postscript": instructions.PostscriptChecker, + _FORMAT + "number_bullet_lists": instructions.BulletListChecker, + # TODO(jeffreyzhou): Pre-create paragraph or use prompt to replace + # _CONTENT + "rephrase_paragraph": instructions.RephraseParagraph, + _FORMAT + "constrained_response": instructions.ConstrainedResponseChecker, + _FORMAT + "number_highlighted_sections": (instructions.HighlightSectionChecker), + _FORMAT + "multiple_sections": instructions.SectionChecker, + # TODO(tianjianlu): Re-enable rephrasing with preprocessing the message. + # _FORMAT + "rephrase": instructions.RephraseChecker, + _FORMAT + "json_format": instructions.JsonFormat, + _FORMAT + "title": instructions.TitleChecker, + # TODO(tianjianlu): Re-enable with specific prompts. + # _MULTITURN + "constrained_start": instructions.ConstrainedStartChecker, + _COMBINATION + "two_responses": instructions.TwoResponsesChecker, + _COMBINATION + "repeat_prompt": instructions.RepeatPromptThenAnswer, + _STARTEND + "end_checker": instructions.EndChecker, + _CHANGE_CASES + "capital_word_frequency": instructions.CapitalWordFrequencyChecker, + _CHANGE_CASES + "english_capital": instructions.CapitalLettersEnglishChecker, + _CHANGE_CASES + "english_lowercase": instructions.LowercaseLettersEnglishChecker, + _PUNCTUATION + "no_comma": instructions.CommaChecker, + _STARTEND + "quotation": instructions.QuotationChecker, + # New Constraints! + _COPY + "repeat_phrase": instructions.RepeatPhraseChecker, + _COPY + "copy": instructions.CopyChecker, + _NEW + "copy_span_idx": instructions.CopySpanIdxChecker, + _FORMAT + "sentence_hyphens": instructions.SentenceHyphenChecker, + _KEYWORD + "no_adjacent_consecutive": instructions.AdjacentLetterChecker, + _FORMAT + "square_brackets": instructions.SquareBracketChecker, + _KEYWORD + "word_once": instructions.KeywordFrequencyOnceChecker, + _KEYWORD + "word_count_different_numbers": instructions.KeywordFrequencyCheckerDifferent, + _KEYWORD + "exclude_word_harder": instructions.ExcludeWordHarderChecker, + _PARAGRAPH + "paragraphs": instructions.ParagraphBasicChecker, + _PARAGRAPH + "paragraphs2": instructions.ParagraphBasicChecker2, + _FIRSTWORD + "first_word_sent": instructions.FirstWordSentChecker, + _FIRSTWORD + "first_word_answer": instructions.FirstWordAnswerChecker, + _LASTWORD + "last_word_sent": instructions.LastWordSentChecker, + _LASTWORD + "last_word_answer": instructions.LastWordAnswerChecker, + _FORMAT + "bigram_wrapping": instructions.BiGramWrappingChecker, + _COPY + "copying_simple": instructions.CopyingSimpleChecker, + _COPY + "copying_multiple": instructions.CopyingMultipleChecker, + _PUNCTUATION + "punctuation_dot": instructions.PunctuationDotChecker, + _PUNCTUATION + "punctuation_exclamation": instructions.PunctuationExclamationChecker, + _COUNT + "lowercase_counting": instructions.LowercaseCountingChecker, + _LETTER + "letter_counting": instructions.LetterCountingChecker, + _LETTER + "letter_counting2": instructions.LetterFrequencyChecker, + _COUNT + "counting_composition": instructions.CountingCompositionChecker, + _COUNT + "count_unique": instructions.CountUniqueChecker, + _COUNT + "count_increment_word": instructions.CountIncrementWordChecker, + _KEYWORD + "palindrome": instructions.PalindromeBasicChecker, + _KEYWORD + "keyword_specific_position": instructions.KeywordSpecificPositionChecker, + _KEYWORD + "start_end": instructions.StartEndChecker, +} + +INSTRUCTION_DICT = { + _KEYWORD + "existence": instructions.KeywordChecker, + _KEYWORD + "frequency": instructions.KeywordFrequencyChecker, + # TODO(jeffreyzhou): make a proper set of sentences to choose from + # _KEYWORD + "key_sentences": instructions.KeySentenceChecker, + _KEYWORD + "forbidden_words": instructions.ForbiddenWords, + _KEYWORD + "letter_frequency": instructions.LetterFrequencyChecker, + _LANGUAGE + "response_language": instructions.ResponseLanguageChecker, + _LENGTH + "number_sentences": instructions.NumberOfSentences, + _LENGTH + "number_paragraphs": instructions.ParagraphChecker, + _LENGTH + "number_words": instructions.NumberOfWords, + _LENGTH + "nth_paragraph_first_word": instructions.ParagraphFirstWordCheck, + _CONTENT + "number_placeholders": instructions.PlaceholderChecker, + _CONTENT + "postscript": instructions.PostscriptChecker, + _FORMAT + "number_bullet_lists": instructions.BulletListChecker, + # TODO(jeffreyzhou): Pre-create paragraph or use prompt to replace + # _CONTENT + "rephrase_paragraph": instructions.RephraseParagraph, + _FORMAT + "constrained_response": instructions.ConstrainedResponseChecker, + _FORMAT + "number_highlighted_sections": (instructions.HighlightSectionChecker), + _FORMAT + "multiple_sections": instructions.SectionChecker, + # TODO(tianjianlu): Re-enable rephrasing with preprocessing the message. + # _FORMAT + "rephrase": instructions.RephraseChecker, + _FORMAT + "json_format": instructions.JsonFormat, + _FORMAT + "title": instructions.TitleChecker, + # TODO(tianjianlu): Re-enable with specific prompts. + # _MULTITURN + "constrained_start": instructions.ConstrainedStartChecker, + _COMBINATION + "two_responses": instructions.TwoResponsesChecker, + _COMBINATION + "repeat_prompt": instructions.RepeatPromptThenAnswer, + _STARTEND + "end_checker": instructions.EndChecker, + _CHANGE_CASES + "capital_word_frequency": instructions.CapitalWordFrequencyChecker, + _CHANGE_CASES + "english_capital": instructions.CapitalLettersEnglishChecker, + _CHANGE_CASES + "english_lowercase": instructions.LowercaseLettersEnglishChecker, + _PUNCTUATION + "no_comma": instructions.CommaChecker, + _STARTEND + "quotation": instructions.QuotationChecker, + # New Constraints! + _COPY + "repeat_phrase": instructions.RepeatPhraseChecker, + _COPY + "copy": instructions.CopyChecker, + _NEW + "copy_span_idx": instructions.CopySpanIdxChecker, + _FORMAT + "sentence_hyphens": instructions.SentenceHyphenChecker, + _KEYWORD + "no_adjacent_consecutive": instructions.AdjacentLetterChecker, + _FORMAT + "square_brackets": instructions.SquareBracketChecker, + _KEYWORD + "word_once": instructions.KeywordFrequencyOnceChecker, + _KEYWORD + "word_count_different_numbers": instructions.KeywordFrequencyCheckerDifferent, + _KEYWORD + "exclude_word_harder": instructions.ExcludeWordHarderChecker, + _PARAGRAPH + "paragraphs": instructions.ParagraphBasicChecker, + _PARAGRAPH + "paragraphs2": instructions.ParagraphBasicChecker2, + _FIRSTWORD + "first_word_sent": instructions.FirstWordSentChecker, + _FIRSTWORD + "first_word_answer": instructions.FirstWordAnswerChecker, + _LASTWORD + "last_word_sent": instructions.LastWordSentChecker, + _LASTWORD + "last_word_answer": instructions.LastWordAnswerChecker, + _FORMAT + "bigram_wrapping": instructions.BiGramWrappingChecker, + _COPY + "copying_simple": instructions.CopyingSimpleChecker, + _COPY + "copying_multiple": instructions.CopyingMultipleChecker, + _PUNCTUATION + "punctuation_dot": instructions.PunctuationDotChecker, + _PUNCTUATION + "punctuation_exclamation": instructions.PunctuationExclamationChecker, + _COUNT + "lowercase_counting": instructions.LowercaseCountingChecker, + _LETTER + "letter_counting": instructions.LetterCountingChecker, + _LETTER + "letter_counting2": instructions.LetterFrequencyChecker, + _COUNT + "counting_composition": instructions.CountingCompositionChecker, + _COUNT + "count_unique": instructions.CountUniqueChecker, + _COUNT + "count_increment_word": instructions.CountIncrementWordChecker, + _KEYWORD + "palindrome": instructions.PalindromeBasicChecker, + _KEYWORD + "keyword_specific_position": instructions.KeywordSpecificPositionChecker, + _KEYWORD + "start_end": instructions.StartEndChecker, +} + +INSTRUCTION_CONFLICTS = { + _KEYWORD + "existence": {_KEYWORD + "existence"}, + _KEYWORD + "frequency": {_KEYWORD + "frequency"}, + # TODO(jeffreyzhou): make a proper set of sentences to choose from + # _KEYWORD + "key_sentences": instructions.KeySentenceChecker, + _KEYWORD + "forbidden_words": {_KEYWORD + "forbidden_words"}, + _KEYWORD + "letter_frequency": {_KEYWORD + "letter_frequency"}, + _LANGUAGE + "response_language": { + _LANGUAGE + "response_language", + _FORMAT + "multiple_sections", + _KEYWORD + "existence", + _KEYWORD + "frequency", + _KEYWORD + "forbidden_words", + _STARTEND + "end_checker", + _CHANGE_CASES + "english_capital", + _CHANGE_CASES + "english_lowercase", + }, + _LENGTH + "number_sentences": {_LENGTH + "number_sentences"}, + _LENGTH + "number_paragraphs": { + _LENGTH + "number_paragraphs", + _LENGTH + "nth_paragraph_first_word", + _LENGTH + "number_sentences", + _LENGTH + "nth_paragraph_first_word", + }, + _LENGTH + "number_words": {_LENGTH + "number_words"}, + _LENGTH + "nth_paragraph_first_word": {_LENGTH + "nth_paragraph_first_word", _LENGTH + "number_paragraphs"}, + _CONTENT + "number_placeholders": {_CONTENT + "number_placeholders"}, + _CONTENT + "postscript": {_CONTENT + "postscript"}, + _FORMAT + "number_bullet_lists": {_FORMAT + "number_bullet_lists"}, + # TODO(jeffreyzhou): Pre-create paragraph or use prompt to replace + # _CONTENT + "rephrase_paragraph": instructions.RephraseParagraph, + _FORMAT + "constrained_response": set(INSTRUCTION_DICT.keys()), + _FORMAT + "number_highlighted_sections": {_FORMAT + "number_highlighted_sections"}, + _FORMAT + "multiple_sections": { + _FORMAT + "multiple_sections", + _LANGUAGE + "response_language", + _FORMAT + "number_highlighted_sections", + }, + # TODO(tianjianlu): Re-enable rephrasing with preprocessing the message. + # _FORMAT + "rephrase": instructions.RephraseChecker, + _FORMAT + "json_format": set(INSTRUCTION_DICT.keys()).difference( + {_KEYWORD + "forbidden_words", _KEYWORD + "existence"} + ), + _FORMAT + "title": {_FORMAT + "title"}, + # TODO(tianjianlu): Re-enable with specific prompts. + # _MULTITURN + "constrained_start": instructions.ConstrainedStartChecker, + _COMBINATION + "two_responses": set(INSTRUCTION_DICT.keys()).difference( + { + _KEYWORD + "forbidden_words", + _KEYWORD + "existence", + _LANGUAGE + "response_language", + _FORMAT + "title", + _PUNCTUATION + "no_comma", + } + ), + _COMBINATION + "repeat_prompt": set(INSTRUCTION_DICT.keys()).difference( + {_KEYWORD + "existence", _FORMAT + "title", _PUNCTUATION + "no_comma"} + ), + _STARTEND + "end_checker": {_STARTEND + "end_checker"}, + _CHANGE_CASES + "capital_word_frequency": { + _CHANGE_CASES + "capital_word_frequency", + _CHANGE_CASES + "english_lowercase", + _CHANGE_CASES + "english_capital", + }, + _CHANGE_CASES + "english_capital": {_CHANGE_CASES + "english_capital"}, + _CHANGE_CASES + "english_lowercase": {_CHANGE_CASES + "english_lowercase", _CHANGE_CASES + "english_capital"}, + _PUNCTUATION + "no_comma": {_PUNCTUATION + "no_comma"}, + _STARTEND + "quotation": {_STARTEND + "quotation", _FORMAT + "title"}, + _COPY + "repeat_phrase": {_COPY + "repeat_phrase"}, + _COPY + "copy": set(INSTRUCTION_DICT.keys()), + _NEW + "copy_span_idx": set(INSTRUCTION_DICT.keys()), + _FORMAT + "sentence_hyphens": {_FORMAT + "sentence_hyphens"}, + _KEYWORD + "no_adjacent_consecutive": {_KEYWORD + "no_adjacent_consecutive"}, + _FORMAT + "square_brackets": {_FORMAT + "square_brackets"}, + _KEYWORD + "word_once": {_KEYWORD + "word_once"}, + _KEYWORD + "word_count_different_numbers": {_KEYWORD + "word_count_different_numbers"}, + _KEYWORD + "exclude_word_harder": {_KEYWORD + "exclude_word_harder"}, + _PARAGRAPH + "paragraphs": {_PARAGRAPH + "paragraphs", _PARAGRAPH + "paragraphs2"}, + _PARAGRAPH + "paragraphs2": {_PARAGRAPH + "paragraphs", _PARAGRAPH + "paragraphs2"}, + _FIRSTWORD + "first_word_sent": {_FIRSTWORD + "first_word_sent", _FIRSTWORD + "first_word_answer"}, + _FIRSTWORD + "first_word_answer": {_FIRSTWORD + "first_word_sent", _FIRSTWORD + "first_word_answer"}, + _LASTWORD + "last_word_sent": {_LASTWORD + "last_word_sent"}, + _LASTWORD + "last_word_answer": {_LASTWORD + "last_word_answer"}, + _FORMAT + "bigram_wrapping": {_FORMAT + "bigram_wrapping"}, + _COPY + "copying_simple": set(INSTRUCTION_DICT.keys()), + _COPY + "copying_multiple": set(INSTRUCTION_DICT.keys()), + _PUNCTUATION + "punctuation_dot": {_PUNCTUATION + "punctuation_dot"}, + _PUNCTUATION + "punctuation_exclamation": {_PUNCTUATION + "punctuation_exclamation"}, + _COUNT + "lowercase_counting": {_COUNT + "lowercase_counting"}, + _LETTER + "letter_counting": {_LETTER + "letter_counting"}, + _LETTER + "letter_counting2": {_LETTER + "letter_counting2"}, + _COUNT + "counting_composition": { + _COUNT + "counting_composition", + _COUNT + "count_unique", + _COUNT + "count_increment_word", + _PARAGRAPH + "paragraphs", + _PARAGRAPH + "paragraphs2", + _KEYWORD + "letter_frequency", + _KEYWORD + "frequency", + }, + _COUNT + "count_unique": {_COUNT + "count_unique"}, + _COUNT + "count_increment_word": {_COUNT + "count_increment_word"}, + _KEYWORD + "palindrome": {_KEYWORD + "palindrome"}, + _KEYWORD + "keyword_specific_position": {_KEYWORD + "keyword_specific_position"}, + _KEYWORD + "start_end": {_KEYWORD + "start_end"}, +} + + +def conflict_make(conflicts): + """Makes sure if A conflicts with B, B will conflict with A. + + Args: + conflicts: Dictionary of potential conflicts where key is instruction id + and value is set of instruction ids that it conflicts with. + + Returns: + Revised version of the dictionary. All instructions conflict with + themselves. If A conflicts with B, B will conflict with A. + """ + for key in conflicts: + for k in conflicts[key]: + conflicts[k].add(key) + conflicts[key].add(key) + return conflicts diff --git a/eval_protocol/rewards/ifeval/ifeval_util.py b/eval_protocol/rewards/ifeval/ifeval_util.py new file mode 100644 index 00000000..b37f5321 --- /dev/null +++ b/eval_protocol/rewards/ifeval/ifeval_util.py @@ -0,0 +1,1665 @@ +# Copyright 2024 The Google Research Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility library of instructions.""" + +import functools +import random +import re + +import immutabledict +import nltk + +WORD_LIST = [ + "western", + "sentence", + "signal", + "dump", + "spot", + "opposite", + "bottom", + "potato", + "administration", + "working", + "welcome", + "morning", + "good", + "agency", + "primary", + "wish", + "responsibility", + "press", + "problem", + "president", + "steal", + "brush", + "read", + "type", + "beat", + "trainer", + "growth", + "lock", + "bone", + "case", + "equal", + "comfortable", + "region", + "replacement", + "performance", + "mate", + "walk", + "medicine", + "film", + "thing", + "rock", + "tap", + "total", + "competition", + "ease", + "south", + "establishment", + "gather", + "parking", + "world", + "plenty", + "breath", + "claim", + "alcohol", + "trade", + "dear", + "highlight", + "street", + "matter", + "decision", + "mess", + "agreement", + "studio", + "coach", + "assist", + "brain", + "wing", + "style", + "private", + "top", + "brown", + "leg", + "buy", + "procedure", + "method", + "speed", + "high", + "company", + "valuable", + "pie", + "analyst", + "session", + "pattern", + "district", + "pleasure", + "dinner", + "swimming", + "joke", + "order", + "plate", + "department", + "motor", + "cell", + "spend", + "cabinet", + "difference", + "power", + "examination", + "engine", + "horse", + "dimension", + "pay", + "toe", + "curve", + "literature", + "bother", + "fire", + "possibility", + "debate", + "activity", + "passage", + "hello", + "cycle", + "background", + "quiet", + "author", + "effect", + "actor", + "page", + "bicycle", + "error", + "throat", + "attack", + "character", + "phone", + "tea", + "increase", + "outcome", + "file", + "specific", + "inspector", + "internal", + "potential", + "staff", + "building", + "employer", + "shoe", + "hand", + "direction", + "garden", + "purchase", + "interview", + "study", + "recognition", + "member", + "spiritual", + "oven", + "sandwich", + "weird", + "passenger", + "particular", + "response", + "reaction", + "size", + "variation", + "a", + "cancel", + "candy", + "exit", + "guest", + "condition", + "fly", + "price", + "weakness", + "convert", + "hotel", + "great", + "mouth", + "mind", + "song", + "sugar", + "suspect", + "telephone", + "ear", + "roof", + "paint", + "refrigerator", + "organization", + "jury", + "reward", + "engineering", + "day", + "possession", + "crew", + "bar", + "road", + "description", + "celebration", + "score", + "mark", + "letter", + "shower", + "suggestion", + "sir", + "luck", + "national", + "progress", + "hall", + "stroke", + "theory", + "offer", + "story", + "tax", + "definition", + "history", + "ride", + "medium", + "opening", + "glass", + "elevator", + "stomach", + "question", + "ability", + "leading", + "village", + "computer", + "city", + "grand", + "confidence", + "candle", + "priest", + "recommendation", + "point", + "necessary", + "body", + "desk", + "secret", + "horror", + "noise", + "culture", + "warning", + "water", + "round", + "diet", + "flower", + "bus", + "tough", + "permission", + "week", + "prompt", + "connection", + "abuse", + "height", + "save", + "corner", + "border", + "stress", + "drive", + "stop", + "rip", + "meal", + "listen", + "confusion", + "girlfriend", + "living", + "relation", + "significance", + "plan", + "creative", + "atmosphere", + "blame", + "invite", + "housing", + "paper", + "drink", + "roll", + "silver", + "drunk", + "age", + "damage", + "smoke", + "environment", + "pack", + "savings", + "influence", + "tourist", + "rain", + "post", + "sign", + "grandmother", + "run", + "profit", + "push", + "clerk", + "final", + "wine", + "swim", + "pause", + "stuff", + "singer", + "funeral", + "average", + "source", + "scene", + "tradition", + "personal", + "snow", + "nobody", + "distance", + "sort", + "sensitive", + "animal", + "major", + "negotiation", + "click", + "mood", + "period", + "arrival", + "expression", + "holiday", + "repeat", + "dust", + "closet", + "gold", + "bad", + "sail", + "combination", + "clothes", + "emphasis", + "duty", + "black", + "step", + "school", + "jump", + "document", + "professional", + "lip", + "chemical", + "front", + "wake", + "while", + "inside", + "watch", + "row", + "subject", + "penalty", + "balance", + "possible", + "adult", + "aside", + "sample", + "appeal", + "wedding", + "depth", + "king", + "award", + "wife", + "blow", + "site", + "camp", + "music", + "safe", + "gift", + "fault", + "guess", + "act", + "shame", + "drama", + "capital", + "exam", + "stupid", + "record", + "sound", + "swing", + "novel", + "minimum", + "ratio", + "machine", + "shape", + "lead", + "operation", + "salary", + "cloud", + "affair", + "hit", + "chapter", + "stage", + "quantity", + "access", + "army", + "chain", + "traffic", + "kick", + "analysis", + "airport", + "time", + "vacation", + "philosophy", + "ball", + "chest", + "thanks", + "place", + "mountain", + "advertising", + "red", + "past", + "rent", + "return", + "tour", + "house", + "construction", + "net", + "native", + "war", + "figure", + "fee", + "spray", + "user", + "dirt", + "shot", + "task", + "stick", + "friend", + "software", + "promotion", + "interaction", + "surround", + "block", + "purpose", + "practice", + "conflict", + "routine", + "requirement", + "bonus", + "hole", + "state", + "junior", + "sweet", + "catch", + "tear", + "fold", + "wall", + "editor", + "life", + "position", + "pound", + "respect", + "bathroom", + "coat", + "script", + "job", + "teach", + "birth", + "view", + "resolve", + "theme", + "employee", + "doubt", + "market", + "education", + "serve", + "recover", + "tone", + "harm", + "miss", + "union", + "understanding", + "cow", + "river", + "association", + "concept", + "training", + "recipe", + "relationship", + "reserve", + "depression", + "proof", + "hair", + "revenue", + "independent", + "lift", + "assignment", + "temporary", + "amount", + "loss", + "edge", + "track", + "check", + "rope", + "estimate", + "pollution", + "stable", + "message", + "delivery", + "perspective", + "mirror", + "assistant", + "representative", + "witness", + "nature", + "judge", + "fruit", + "tip", + "devil", + "town", + "emergency", + "upper", + "drop", + "stay", + "human", + "neck", + "speaker", + "network", + "sing", + "resist", + "league", + "trip", + "signature", + "lawyer", + "importance", + "gas", + "choice", + "engineer", + "success", + "part", + "external", + "worker", + "simple", + "quarter", + "student", + "heart", + "pass", + "spite", + "shift", + "rough", + "lady", + "grass", + "community", + "garage", + "youth", + "standard", + "skirt", + "promise", + "blind", + "television", + "disease", + "commission", + "positive", + "energy", + "calm", + "presence", + "tune", + "basis", + "preference", + "head", + "common", + "cut", + "somewhere", + "presentation", + "current", + "thought", + "revolution", + "effort", + "master", + "implement", + "republic", + "floor", + "principle", + "stranger", + "shoulder", + "grade", + "button", + "tennis", + "police", + "collection", + "account", + "register", + "glove", + "divide", + "professor", + "chair", + "priority", + "combine", + "peace", + "extension", + "maybe", + "evening", + "frame", + "sister", + "wave", + "code", + "application", + "mouse", + "match", + "counter", + "bottle", + "half", + "cheek", + "resolution", + "back", + "knowledge", + "make", + "discussion", + "screw", + "length", + "accident", + "battle", + "dress", + "knee", + "log", + "package", + "it", + "turn", + "hearing", + "newspaper", + "layer", + "wealth", + "profile", + "imagination", + "answer", + "weekend", + "teacher", + "appearance", + "meet", + "bike", + "rise", + "belt", + "crash", + "bowl", + "equivalent", + "support", + "image", + "poem", + "risk", + "excitement", + "remote", + "secretary", + "public", + "produce", + "plane", + "display", + "money", + "sand", + "situation", + "punch", + "customer", + "title", + "shake", + "mortgage", + "option", + "number", + "pop", + "window", + "extent", + "nothing", + "experience", + "opinion", + "departure", + "dance", + "indication", + "boy", + "material", + "band", + "leader", + "sun", + "beautiful", + "muscle", + "farmer", + "variety", + "fat", + "handle", + "director", + "opportunity", + "calendar", + "outside", + "pace", + "bath", + "fish", + "consequence", + "put", + "owner", + "go", + "doctor", + "information", + "share", + "hurt", + "protection", + "career", + "finance", + "force", + "golf", + "garbage", + "aspect", + "kid", + "food", + "boot", + "milk", + "respond", + "objective", + "reality", + "raw", + "ring", + "mall", + "one", + "impact", + "area", + "news", + "international", + "series", + "impress", + "mother", + "shelter", + "strike", + "loan", + "month", + "seat", + "anything", + "entertainment", + "familiar", + "clue", + "year", + "glad", + "supermarket", + "natural", + "god", + "cost", + "conversation", + "tie", + "ruin", + "comfort", + "earth", + "storm", + "percentage", + "assistance", + "budget", + "strength", + "beginning", + "sleep", + "other", + "young", + "unit", + "fill", + "store", + "desire", + "hide", + "value", + "cup", + "maintenance", + "nurse", + "function", + "tower", + "role", + "class", + "camera", + "database", + "panic", + "nation", + "basket", + "ice", + "art", + "spirit", + "chart", + "exchange", + "feedback", + "statement", + "reputation", + "search", + "hunt", + "exercise", + "nasty", + "notice", + "male", + "yard", + "annual", + "collar", + "date", + "platform", + "plant", + "fortune", + "passion", + "friendship", + "spread", + "cancer", + "ticket", + "attitude", + "island", + "active", + "object", + "service", + "buyer", + "bite", + "card", + "face", + "steak", + "proposal", + "patient", + "heat", + "rule", + "resident", + "broad", + "politics", + "west", + "knife", + "expert", + "girl", + "design", + "salt", + "baseball", + "grab", + "inspection", + "cousin", + "couple", + "magazine", + "cook", + "dependent", + "security", + "chicken", + "version", + "currency", + "ladder", + "scheme", + "kitchen", + "employment", + "local", + "attention", + "manager", + "fact", + "cover", + "sad", + "guard", + "relative", + "county", + "rate", + "lunch", + "program", + "initiative", + "gear", + "bridge", + "breast", + "talk", + "dish", + "guarantee", + "beer", + "vehicle", + "reception", + "woman", + "substance", + "copy", + "lecture", + "advantage", + "park", + "cold", + "death", + "mix", + "hold", + "scale", + "tomorrow", + "blood", + "request", + "green", + "cookie", + "church", + "strip", + "forever", + "beyond", + "debt", + "tackle", + "wash", + "following", + "feel", + "maximum", + "sector", + "sea", + "property", + "economics", + "menu", + "bench", + "try", + "language", + "start", + "call", + "solid", + "address", + "income", + "foot", + "senior", + "honey", + "few", + "mixture", + "cash", + "grocery", + "link", + "map", + "form", + "factor", + "pot", + "model", + "writer", + "farm", + "winter", + "skill", + "anywhere", + "birthday", + "policy", + "release", + "husband", + "lab", + "hurry", + "mail", + "equipment", + "sink", + "pair", + "driver", + "consideration", + "leather", + "skin", + "blue", + "boat", + "sale", + "brick", + "two", + "feed", + "square", + "dot", + "rush", + "dream", + "location", + "afternoon", + "manufacturer", + "control", + "occasion", + "trouble", + "introduction", + "advice", + "bet", + "eat", + "kill", + "category", + "manner", + "office", + "estate", + "pride", + "awareness", + "slip", + "crack", + "client", + "nail", + "shoot", + "membership", + "soft", + "anybody", + "web", + "official", + "individual", + "pizza", + "interest", + "bag", + "spell", + "profession", + "queen", + "deal", + "resource", + "ship", + "guy", + "chocolate", + "joint", + "formal", + "upstairs", + "car", + "resort", + "abroad", + "dealer", + "associate", + "finger", + "surgery", + "comment", + "team", + "detail", + "crazy", + "path", + "tale", + "initial", + "arm", + "radio", + "demand", + "single", + "draw", + "yellow", + "contest", + "piece", + "quote", + "pull", + "commercial", + "shirt", + "contribution", + "cream", + "channel", + "suit", + "discipline", + "instruction", + "concert", + "speech", + "low", + "effective", + "hang", + "scratch", + "industry", + "breakfast", + "lay", + "join", + "metal", + "bedroom", + "minute", + "product", + "rest", + "temperature", + "many", + "give", + "argument", + "print", + "purple", + "laugh", + "health", + "credit", + "investment", + "sell", + "setting", + "lesson", + "egg", + "middle", + "marriage", + "level", + "evidence", + "phrase", + "love", + "self", + "benefit", + "guidance", + "affect", + "you", + "dad", + "anxiety", + "special", + "boyfriend", + "test", + "blank", + "payment", + "soup", + "obligation", + "reply", + "smile", + "deep", + "complaint", + "addition", + "review", + "box", + "towel", + "minor", + "fun", + "soil", + "issue", + "cigarette", + "internet", + "gain", + "tell", + "entry", + "spare", + "incident", + "family", + "refuse", + "branch", + "can", + "pen", + "grandfather", + "constant", + "tank", + "uncle", + "climate", + "ground", + "volume", + "communication", + "kind", + "poet", + "child", + "screen", + "mine", + "quit", + "gene", + "lack", + "charity", + "memory", + "tooth", + "fear", + "mention", + "marketing", + "reveal", + "reason", + "court", + "season", + "freedom", + "land", + "sport", + "audience", + "classroom", + "law", + "hook", + "win", + "carry", + "eye", + "smell", + "distribution", + "research", + "country", + "dare", + "hope", + "whereas", + "stretch", + "library", + "if", + "delay", + "college", + "plastic", + "book", + "present", + "use", + "worry", + "champion", + "goal", + "economy", + "march", + "election", + "reflection", + "midnight", + "slide", + "inflation", + "action", + "challenge", + "guitar", + "coast", + "apple", + "campaign", + "field", + "jacket", + "sense", + "way", + "visual", + "remove", + "weather", + "trash", + "cable", + "regret", + "buddy", + "beach", + "historian", + "courage", + "sympathy", + "truck", + "tension", + "permit", + "nose", + "bed", + "son", + "person", + "base", + "meat", + "usual", + "air", + "meeting", + "worth", + "game", + "independence", + "physical", + "brief", + "play", + "raise", + "board", + "she", + "key", + "writing", + "pick", + "command", + "party", + "yesterday", + "spring", + "candidate", + "physics", + "university", + "concern", + "development", + "change", + "string", + "target", + "instance", + "room", + "bitter", + "bird", + "football", + "normal", + "split", + "impression", + "wood", + "long", + "meaning", + "stock", + "cap", + "leadership", + "media", + "ambition", + "fishing", + "essay", + "salad", + "repair", + "today", + "designer", + "night", + "bank", + "drawing", + "inevitable", + "phase", + "vast", + "chip", + "anger", + "switch", + "cry", + "twist", + "personality", + "attempt", + "storage", + "being", + "preparation", + "bat", + "selection", + "white", + "technology", + "contract", + "side", + "section", + "station", + "till", + "structure", + "tongue", + "taste", + "truth", + "difficulty", + "group", + "limit", + "main", + "move", + "feeling", + "light", + "example", + "mission", + "might", + "wait", + "wheel", + "shop", + "host", + "classic", + "alternative", + "cause", + "agent", + "consist", + "table", + "airline", + "text", + "pool", + "craft", + "range", + "fuel", + "tool", + "partner", + "load", + "entrance", + "deposit", + "hate", + "article", + "video", + "summer", + "feature", + "extreme", + "mobile", + "hospital", + "flight", + "fall", + "pension", + "piano", + "fail", + "result", + "rub", + "gap", + "system", + "report", + "suck", + "ordinary", + "wind", + "nerve", + "ask", + "shine", + "note", + "line", + "mom", + "perception", + "brother", + "reference", + "bend", + "charge", + "treat", + "trick", + "term", + "homework", + "bake", + "bid", + "status", + "project", + "strategy", + "orange", + "let", + "enthusiasm", + "parent", + "concentrate", + "device", + "travel", + "poetry", + "business", + "society", + "kiss", + "end", + "vegetable", + "employ", + "schedule", + "hour", + "brave", + "focus", + "process", + "movie", + "illegal", + "general", + "coffee", + "ad", + "highway", + "chemistry", + "psychology", + "hire", + "bell", + "conference", + "relief", + "show", + "neat", + "funny", + "weight", + "quality", + "club", + "daughter", + "zone", + "touch", + "tonight", + "shock", + "burn", + "excuse", + "name", + "survey", + "landscape", + "advance", + "satisfaction", + "bread", + "disaster", + "item", + "hat", + "prior", + "shopping", + "visit", + "east", + "photo", + "home", + "idea", + "father", + "comparison", + "cat", + "pipe", + "winner", + "count", + "lake", + "fight", + "prize", + "foundation", + "dog", + "keep", + "ideal", + "fan", + "struggle", + "peak", + "safety", + "solution", + "hell", + "conclusion", + "population", + "strain", + "alarm", + "measurement", + "second", + "train", + "race", + "due", + "insurance", + "boss", + "tree", + "monitor", + "sick", + "course", + "drag", + "appointment", + "slice", + "still", + "care", + "patience", + "rich", + "escape", + "emotion", + "royal", + "female", + "childhood", + "government", + "picture", + "will", + "sock", + "big", + "gate", + "oil", + "cross", + "pin", + "improvement", + "championship", + "silly", + "help", + "sky", + "pitch", + "man", + "diamond", + "most", + "transition", + "work", + "science", + "committee", + "moment", + "fix", + "teaching", + "dig", + "specialist", + "complex", + "guide", + "people", + "dead", + "voice", + "original", + "break", + "topic", + "data", + "degree", + "reading", + "recording", + "bunch", + "reach", + "judgment", + "lie", + "regular", + "set", + "painting", + "mode", + "list", + "player", + "bear", + "north", + "wonder", + "carpet", + "heavy", + "officer", + "negative", + "clock", + "unique", + "baby", + "pain", + "assumption", + "disk", + "iron", + "bill", + "drawer", + "look", + "double", + "mistake", + "finish", + "future", + "brilliant", + "contact", + "math", + "rice", + "leave", + "restaurant", + "discount", + "sex", + "virus", + "bit", + "trust", + "event", + "wear", + "juice", + "failure", + "bug", + "context", + "mud", + "whole", + "wrap", + "intention", + "draft", + "pressure", + "cake", + "dark", + "explanation", + "space", + "angle", + "word", + "efficiency", + "management", + "habit", + "star", + "chance", + "finding", + "transportation", + "stand", + "criticism", + "flow", + "door", + "injury", + "insect", + "surprise", + "apartment", +] # pylint: disable=line-too-long + +# ISO 639-1 codes to language names. +LANGUAGE_CODES = immutabledict.immutabledict( + { + "en": "English", + "es": "Spanish", + "pt": "Portuguese", + "ar": "Arabic", + "hi": "Hindi", + "fr": "French", + "ru": "Russian", + "de": "German", + "ja": "Japanese", + "it": "Italian", + "bn": "Bengali", + "uk": "Ukrainian", + "th": "Thai", + "ur": "Urdu", + "ta": "Tamil", + "te": "Telugu", + "bg": "Bulgarian", + "ko": "Korean", + "pl": "Polish", + "he": "Hebrew", + "fa": "Persian", + "vi": "Vietnamese", + "ne": "Nepali", + "sw": "Swahili", + "kn": "Kannada", + "mr": "Marathi", + "gu": "Gujarati", + "pa": "Punjabi", + "ml": "Malayalam", + "fi": "Finnish", + } +) + +_ALPHABETS = "([A-Za-z])" +_PREFIXES = "(Mr|St|Mrs|Ms|Dr)[.]" +_SUFFIXES = "(Inc|Ltd|Jr|Sr|Co)" +_STARTERS = ( + r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)" +) +_ACRONYMS = "([A-Z][.][A-Z][.](?:[A-Z][.])?)" +_WEBSITES = "[.](com|net|org|io|gov|edu|me)" +_DIGITS = "([0-9])" +_MULTIPLE_DOTS = r"\.{2,}" + + +def split_into_sentences(text): + """Split the text into sentences. + + Args: + text: A string that consists of more than or equal to one sentences. + + Returns: + A list of strings where each string is a sentence. + """ + text = " " + text + " " + text = text.replace("\n", " ") + text = re.sub(_PREFIXES, "\\1", text) + text = re.sub(_WEBSITES, "\\1", text) + text = re.sub(_DIGITS + "[.]" + _DIGITS, "\\1\\2", text) + text = re.sub(_MULTIPLE_DOTS, lambda match: "" * len(match.group(0)) + "", text) + if "Ph.D" in text: + text = text.replace("Ph.D.", "PhD") + text = re.sub(r"\s" + _ALPHABETS + "[.] ", " \\1 ", text) + text = re.sub(_ACRONYMS + " " + _STARTERS, "\\1 \\2", text) + text = re.sub(_ALPHABETS + "[.]" + _ALPHABETS + "[.]" + _ALPHABETS + "[.]", "\\1\\2\\3", text) + text = re.sub(_ALPHABETS + "[.]" + _ALPHABETS + "[.]", "\\1\\2", text) + text = re.sub(" " + _SUFFIXES + "[.] " + _STARTERS, " \\1 \\2", text) + text = re.sub(" " + _SUFFIXES + "[.]", " \\1", text) + text = re.sub(" " + _ALPHABETS + "[.]", " \\1", text) + if "”" in text: + text = text.replace(".”", "”.") + if '"' in text: + text = text.replace('."', '".') + if "!" in text: + text = text.replace('!"', '"!') + if "?" in text: + text = text.replace('?"', '"?') + text = text.replace(".", ".") + text = text.replace("?", "?") + text = text.replace("!", "!") + text = text.replace("", ".") + sentences = text.split("") + sentences = [s.strip() for s in sentences] + if sentences and not sentences[-1]: + sentences = sentences[:-1] + return sentences + + +def count_words(text): + """Counts the number of words.""" + tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+") + tokens = tokenizer.tokenize(text) + num_words = len(tokens) + return num_words + + +@functools.cache +def _get_sentence_tokenizer(): + return nltk.data.load("nltk:tokenizers/punkt/english.pickle") + + +def count_sentences(text): + """Count the number of sentences.""" + tokenizer = _get_sentence_tokenizer() + tokenized_sentences = tokenizer.tokenize(text) + return len(tokenized_sentences) + + +def generate_keywords(num_keywords): + """Randomly generates a few keywords.""" + return random.sample(WORD_LIST, k=num_keywords) diff --git a/eval_protocol/rewards/ifeval/reward.py b/eval_protocol/rewards/ifeval/reward.py new file mode 100644 index 00000000..a6ba8514 --- /dev/null +++ b/eval_protocol/rewards/ifeval/reward.py @@ -0,0 +1,101 @@ +"""IFEval partial credit reward function. + +Score = (number of constraints satisfied) / (total constraints) +""" + +import ast +import json +from typing import Any + +# Import both instruction registries +# Try relative import first (when used as package), fall back to direct import +try: + from .ifeval_registry import INSTRUCTION_DICT as IFEVAL_INSTRUCTION_DICT + from .ifbench_registry import INSTRUCTION_DICT as IFBENCH_INSTRUCTION_DICT +except ImportError: + from ifeval_registry import INSTRUCTION_DICT as IFEVAL_INSTRUCTION_DICT + from ifbench_registry import INSTRUCTION_DICT as IFBENCH_INSTRUCTION_DICT + +# Combine both registries: IFEval (54) + IFBench OOD (58) +INSTRUCTION_DICT = {} +INSTRUCTION_DICT.update(IFEVAL_INSTRUCTION_DICT) +INSTRUCTION_DICT.update(IFBENCH_INSTRUCTION_DICT) + + +def ifeval_partial_credit_reward( + response: str, + ground_truth: dict | str | list, + strip_thinking: bool = True, +) -> float: + """ + Calculate IFEval partial credit score for a response. + + Args: + response: The model's response text. + ground_truth: Constraint specification. Can be: + - A dict with 'instruction_id' and 'kwargs' keys + - A list containing such a dict + - A JSON string encoding of the above + strip_thinking: If True, strip ... tags from response. + + Returns: + Float score in [0, 1] representing fraction of constraints satisfied. + + Example: + ground_truth = { + "instruction_id": ["keywords:existence", "length_constraints:number_words"], + "kwargs": [{"keywords": ["hello"]}, {"num_words": 100, "relation": "at least"}] + } + score = ifeval_partial_credit_reward(response, ground_truth) + """ + if not response: + return 0.0 + + # Strip thinking tags if present + if strip_thinking and "" in response: + response = response.split("")[-1].strip() + + # Parse ground_truth + if isinstance(ground_truth, str): + try: + constraint_dict = json.loads(ground_truth) + except json.JSONDecodeError: + constraint_dict = ast.literal_eval(ground_truth) + else: + constraint_dict = ground_truth + + # Handle list wrapper + if isinstance(constraint_dict, list): + constraint_dict = constraint_dict[0] + + # Get instruction IDs and kwargs + instruction_keys = constraint_dict["instruction_id"] + args_list = constraint_dict["kwargs"] + + # Check each constraint and assign partial credit + num_satisfied = 0 + num_total = len(instruction_keys) + + for instruction_key, args in zip(instruction_keys, args_list): + if args is None: + args = {} + args = {k: v for k, v in args.items() if v is not None} + + if instruction_key not in INSTRUCTION_DICT: + # Unknown constraint, skip but count as not satisfied + continue + + instruction_cls = INSTRUCTION_DICT[instruction_key] + instruction_instance = instruction_cls(instruction_key) + instruction_instance.build_description(**args) + + try: + if response.strip() and instruction_instance.check_following(response): + num_satisfied += 1 + except (IndexError, AttributeError, ZeroDivisionError, ValueError): + # Library has bugs with empty/malformed/short responses + # Treat as constraint not satisfied + pass + + # Partial credit: fraction of constraints satisfied + return num_satisfied / num_total if num_total > 0 else 0.0 From fe95eb56398bbabec409823ffd6917f403af4339 Mon Sep 17 00:00:00 2001 From: SandyYuan Date: Fri, 16 Jan 2026 18:07:33 +0000 Subject: [PATCH 2/7] added readme text --- eval_protocol/rewards/ifeval/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/eval_protocol/rewards/ifeval/README.md b/eval_protocol/rewards/ifeval/README.md index 8c382dd1..88417924 100644 --- a/eval_protocol/rewards/ifeval/README.md +++ b/eval_protocol/rewards/ifeval/README.md @@ -31,3 +31,19 @@ python -m spacy download en_core_web_sm - Automatically strips `...` tags before evaluation - Ground truth can be a dict, list, or JSON string - 112 total constraints (54 IFEval/IFTrain + 58 IFBench OOD) + +## File Sources + +**Copied from `open-instruct/open_instruct/IFEvalG/`:** +- `ifeval_instructions.py` (from `instructions.py`) +- `ifeval_registry.py` (from `instructions_registry.py`) +- `ifeval_util.py` (from `instructions_util.py`) + +**Copied from `IFBench/`:** +- `ifbench_instructions.py` (from `instructions.py`) +- `ifbench_registry.py` (from `instructions_registry.py`) +- `ifbench_util.py` (from `instructions_util.py`) + +**New code:** +- `reward.py` - main reward function +- `__init__.py` - package exports From 8d465bc24b5323078bb0c5cc2c6c527f4242fc39 Mon Sep 17 00:00:00 2001 From: SandyYuan Date: Fri, 16 Jan 2026 18:11:19 +0000 Subject: [PATCH 3/7] ifbench updates --- eval_protocol/rewards/ifeval/README.md | 7 +- eval_protocol/rewards/ifeval/__init__.py | 5 - .../rewards/ifeval/ifbench_instructions.py | 95 ++++++++++++------- eval_protocol/rewards/ifeval/ifbench_util.py | 73 ++++---------- 4 files changed, 81 insertions(+), 99 deletions(-) diff --git a/eval_protocol/rewards/ifeval/README.md b/eval_protocol/rewards/ifeval/README.md index 88417924..3041d0c6 100644 --- a/eval_protocol/rewards/ifeval/README.md +++ b/eval_protocol/rewards/ifeval/README.md @@ -22,10 +22,11 @@ score = ifeval_partial_credit_reward(response, ground_truth) ## Dependencies ```bash -pip install spacy nltk langdetect emoji syllapy immutabledict -python -m spacy download en_core_web_sm +pip install nltk langdetect emoji syllapy immutabledict absl-py ``` +NLTK resources are downloaded automatically on first use. + ## Notes - Automatically strips `...` tags before evaluation @@ -39,7 +40,7 @@ python -m spacy download en_core_web_sm - `ifeval_registry.py` (from `instructions_registry.py`) - `ifeval_util.py` (from `instructions_util.py`) -**Copied from `IFBench/`:** +**Copied from `IFBench/` (commit 8e6a9be, 2025-01):** - `ifbench_instructions.py` (from `instructions.py`) - `ifbench_registry.py` (from `instructions_registry.py`) - `ifbench_util.py` (from `instructions_util.py`) diff --git a/eval_protocol/rewards/ifeval/__init__.py b/eval_protocol/rewards/ifeval/__init__.py index 7b067d0c..95a56b19 100644 --- a/eval_protocol/rewards/ifeval/__init__.py +++ b/eval_protocol/rewards/ifeval/__init__.py @@ -1,11 +1,6 @@ """IFEval reward function for evaluating instruction-following capabilities. Usage: - # Option 1: Import spacy first to avoid cupy conflicts in some Docker environments - import spacy - from eval_protocol.rewards.ifeval import ifeval_partial_credit_reward - - # Option 2: Direct import (add ifeval dir to path) import sys sys.path.insert(0, '/path/to/eval_protocol/rewards/ifeval') from reward import ifeval_partial_credit_reward diff --git a/eval_protocol/rewards/ifeval/ifbench_instructions.py b/eval_protocol/rewards/ifeval/ifbench_instructions.py index 98dfbd92..08d77544 100644 --- a/eval_protocol/rewards/ifeval/ifbench_instructions.py +++ b/eval_protocol/rewards/ifeval/ifbench_instructions.py @@ -15,13 +15,20 @@ """Library of instructions.""" import logging +import os import random import re import string +from pathlib import Path from typing import Dict, Optional, Sequence, Union + +# Set NLTK data path to local directory before importing nltk +_nltk_data_dir = Path(__file__).parent / ".nltk_data" +_nltk_data_dir.mkdir(exist_ok=True) +os.environ.setdefault("NLTK_DATA", str(_nltk_data_dir)) + import nltk -import spacy -from spacy.cli import download +nltk.data.path.insert(0, str(_nltk_data_dir)) import emoji import syllapy import unicodedata @@ -29,12 +36,7 @@ import csv import io -try: - from . import ifbench_util as instructions_util -except ImportError: - import ifbench_util as instructions_util - -download('en_core_web_sm') +import ifbench_util as instructions_util logger = logging.getLogger(__name__) @@ -208,6 +210,8 @@ def get_instruction_args_keys(self): def check_following(self, value): """Checks if the response contains the expected percentage of stop words.""" num_words = instructions_util.count_words(value) + if num_words == 0: + return False num_stopwords = instructions_util.count_stopwords(value) stopword_percentage = (num_stopwords / num_words) * 100 return stopword_percentage <= self._percentage @@ -219,7 +223,7 @@ class SentTypeRatioChecker(Instruction): def build_description(self): """Build the instruction description.""" self._description_pattern = "Maintain a 2:1 ratio of declarative to interrogative sentences." - nltk.download('punkt_tab') + return self._description_pattern def get_instruction_args(self): @@ -245,7 +249,7 @@ class SentBalanceChecker(Instruction): def build_description(self): """Build the instruction description.""" - nltk.download('punkt_tab') + self._description_pattern = "Ensure that the ratio of sentence types (declarative, interrogative, exclamatory) is balanced." return self._description_pattern @@ -310,7 +314,7 @@ def check_following(self, value): class PersonNameCountChecker(Instruction): - """Mention at least {N} different person names in the response.""" + """Mention at least {N} different person names in the response, from this list of person names: Emma, Liam, Sophia...""" def build_description(self, *, N=None): """Build the instruction description. @@ -326,8 +330,6 @@ def build_description(self, *, N=None): if self._num_person_names is None or self._num_person_names < 0: self._num_person_names = random.randint(1, 50) - self.nlp = spacy.load("en_core_web_sm") - self._description_pattern = "Mention at least {N} different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan ." return self._description_pattern.format(N=self._num_person_names) @@ -384,7 +386,9 @@ def check_following(self, value): # Extract the named entities person_names = [] for name in person_name_list: - if name in value: + # Use regex with word boundaries + pattern = r'\b{}\b'.format(re.escape(name)) + if re.search(pattern, value): person_names.append(name) unique_person_names = set(person_names) @@ -426,6 +430,8 @@ def check_following(self, value): n = 3 ngrams = set(nltk.ngrams(value, n)) ref_ngrams = set(nltk.ngrams(self._reference_text, n)) + if not ngrams: + return False overlap = len(ngrams.intersection(ref_ngrams)) / len(ngrams) return self._percentage - 2 <= overlap * 100 <= self._percentage + 2 @@ -486,6 +492,8 @@ def check_following(self, value): """Checks if each word of the response starts with the next letter of the alphabet.""" value = value.translate(str.maketrans('', '', string.punctuation)) words = value.strip(''.join(string.punctuation) + ' ').split() + if not words: + return False alphabet = string.ascii_lowercase correct_letter = words[0][0].lower() if correct_letter not in alphabet: # numbers are fails @@ -564,7 +572,7 @@ class IncrementingAlliterationChecker(Instruction): def build_description(self): """Build the instruction description.""" - nltk.download('punkt_tab') + self._description_pattern = "Each sentence must have a longer sequence of consecutive alliterative words than the previous one." return self._description_pattern @@ -851,7 +859,7 @@ class EmojiSentenceChecker(Instruction): def build_description(self): """Build the instruction description.""" - nltk.download('punkt_tab') + self._description_pattern = "Please use an emoji at the end of every sentence." return self._description_pattern @@ -869,6 +877,9 @@ def check_following(self, value): sentences = instructions_util.split_into_sentences(value) for i, sentence in enumerate(sentences): stripped = sentence.translate(str.maketrans('', '', string.punctuation)).strip() + # check for empty string + if not stripped: + return False last_char = stripped[-1] # because blank spaces are treated oddly second_last_char = stripped[-2] if len(stripped) > 1 else stripped[-1] @@ -891,7 +902,7 @@ class CharacterCountUniqueWordsChecker(Instruction): def build_description(self): """Build the instruction description.""" - nltk.download('punkt_tab') + self._description_pattern = "Respond with three sentences, all containing the same number of characters but using all different words." return self._description_pattern @@ -980,7 +991,7 @@ class StartWithVerbChecker(Instruction): def build_description(self): """Build the instruction description.""" self._description_pattern = "The response must start with a verb." - nltk.download('averaged_perceptron_tagger_eng') + return self._description_pattern def get_instruction_args(self): @@ -1050,7 +1061,7 @@ def build_description(self, *, word=None, N=None): Returns: A string representing the instruction description. """ - nltk.download('punkt_tab') + if not word: self._keyword = instructions_util.generate_keywords( @@ -1078,7 +1089,9 @@ def check_following(self, value): sentences = instructions_util.split_into_sentences(value) if len(sentences) < self._keyword_position: return False - return self._keyword.lower() in sentences[int(self._keyword_position - 1)].lower() + # Use regex with word boundaries for robust matching + pattern = r'\b{}\b'.format(re.escape(self._keyword)) + return bool(re.search(pattern, sentences[int(self._keyword_position - 1)], re.IGNORECASE)) class PronounCountChecker(Instruction): @@ -1117,8 +1130,8 @@ def check_following(self, value): 'itself', 'they', 'them', 'their', 'theirs', 'themselves']) value = value.replace('/', ' ') # to correctly count pronoun sets like she/her/hers, a common use case of pronouns - value = value.lower().translate(str.maketrans('', '', string.punctuation)) - words = value.split() + # Use NLTK word_tokenize for better tokenization + words = nltk.word_tokenize(value.lower()) pronoun_count = sum(1 for word in words if word in pronouns) return pronoun_count >= self._num_pronouns @@ -1151,7 +1164,7 @@ class LastWordFirstNextChecker(Instruction): def build_description(self): """Build the instruction description.""" - nltk.download('punkt_tab') + self._description_pattern = "The last word of each sentence must become the first word of the next sentence." return self._description_pattern @@ -1167,9 +1180,11 @@ def check_following(self, value): """Checks if the last word of each sentence in the response is the first word of the next sentence.""" sentences = instructions_util.split_into_sentences(value) for i in range(len(sentences) - 1): - last_word = sentences[i].rstrip(''.join(string.punctuation) + ' ').split()[-1] - first_word = sentences[i + 1].lstrip(''.join(string.punctuation) + ' ').split()[0] - if last_word.lower() != first_word.lower(): + last_words = sentences[i].rstrip(''.join(string.punctuation) + ' ').split() + first_words = sentences[i + 1].lstrip(''.join(string.punctuation) + ' ').split() + if not last_words or not first_words: + return False + if last_words[-1].lower() != first_words[0].lower(): return False return True @@ -1222,7 +1237,7 @@ def build_description(self, *, small_n=None): if self._num_increment is None or self._num_increment < 0: self._num_increment = random.randint(1, _NUM_INCREMENT) - nltk.download('punkt_tab') + self._description_pattern = "Each sentence must contain exactly {small_n} more words than the previous one." return self._description_pattern.format(small_n=self._num_increment) @@ -1326,12 +1341,13 @@ def get_instruction_args_keys(self): def check_following(self, value): """Checks if there are no quotes next to each other and the passage does not end with a quote.""" - value = value.replace('“', '"').replace('”', '"') + value = value.replace('"', '"').replace('"', '"') value = value.replace("'\"'", '') # remove references to the character '"' value = ''.join(value.split()) # remove all whitespace if '""' in value: return False - if value.strip(string.digits + string.punctuation.replace('"', ''))[-1] == '"': + stripped = value.strip(string.digits + string.punctuation.replace('"', '')) + if stripped and stripped[-1] == '"': return False return True @@ -1605,7 +1621,7 @@ class WordReverseOrderChecker(Instruction): """What animal is the national symbol of the US? Respond to this query, but make your sentence in reverse order of what it should be, per word.""" def build_description(self, **kwargs): - nltk.download('punkt_tab') + self._description_pattern = "What animal is the national symbol of the US? Respond to this query, but make your sentence in reverse order of what it should be, per word." return self._description_pattern @@ -1650,7 +1666,7 @@ class SentenceAlphabetChecker(Instruction): """Tell me a 26-sentence story where each sentence's first word starts with the letters of the alphabet in order.""" def build_description(self, **kwargs): - nltk.download('punkt_tab') + self._description_pattern = "Tell me a 26-sentence story where each sentence's first word starts with the letters of the alphabet in order." return self._description_pattern @@ -1667,7 +1683,10 @@ def check_following(self, value): if len(sentences) != 26: return False for i, sentence in enumerate(sentences): - if sentence.lstrip().split()[0].lower()[0] != chr(97 + i): + words = sentence.lstrip().split() + if not words or not words[0]: + return False + if words[0].lower()[0] != chr(97 + i): return False return True @@ -1976,7 +1995,7 @@ def check_following(self, value): words = instructions_util.nltk.word_tokenize(sentences[self._n - 1]) if len(words) < self._m: return False - if words[self._m - 1] == self._keyword: + if words[self._m - 1].lower() == self._keyword.lower(): return True else: return False @@ -2024,7 +2043,7 @@ def check_following(self, value): words = instructions_util.nltk.word_tokenize(value) if len(words) < 2: return False - if words[1] == words[-2] == self._keyword: + if words[1].lower() == words[-2].lower() == self._keyword.lower(): return True else: return False @@ -2109,7 +2128,7 @@ def check_following(self, value): class RepeatSpanChecker(Instruction): - "Copy the span of words that lies between (and including) index {n_start} and {n_end}, the indices are word indices, split by whitespace!" + "Copy the span of words that lies between (and including) index {n_start} and {n_end}, the indices are character indices!" def build_description(self, prompt_to_repeat=None, n_start=None, n_end=None): """Build the instruction description. @@ -2183,6 +2202,12 @@ def check_following(self, value): """ words = instructions_util.nltk.word_tokenize(value) for word in words: + if not word or not word[0].isalpha(): + continue + if len(word) == 1: + if word[0].islower(): + return False + continue if word[0].isupper() and word[1:].islower(): continue elif word[0].islower() and word[1:].isupper(): diff --git a/eval_protocol/rewards/ifeval/ifbench_util.py b/eval_protocol/rewards/ifeval/ifbench_util.py index 0c005bd9..bc1c8d40 100644 --- a/eval_protocol/rewards/ifeval/ifbench_util.py +++ b/eval_protocol/rewards/ifeval/ifbench_util.py @@ -18,9 +18,7 @@ import random import re -import immutabledict import nltk -import ast WORD_LIST = [ "western", @@ -1551,28 +1549,30 @@ ] # pylint: disable=line-too-long def download_nltk_resources(): - """Download 'punkt' if not already installed""" + """Download 'punkt' and 'stopwords' if not already installed""" try: nltk.data.find("tokenizers/punkt") except LookupError: - nltk.download("punkt") + nltk.download("punkt", quiet=True) + try: + nltk.data.find("tokenizers/punkt_tab") + except LookupError: + nltk.download("punkt_tab", quiet=True) + try: + nltk.data.find("corpora/stopwords") + except LookupError: + nltk.download("stopwords", quiet=True) + try: + nltk.data.find("taggers/averaged_perceptron_tagger_eng") + except LookupError: + nltk.download("averaged_perceptron_tagger_eng", quiet=True) download_nltk_resources() -_ALPHABETS = "([A-Za-z])" -_PREFIXES = "(Mr|St|Mrs|Ms|Dr)[.]" -_SUFFIXES = "(Inc|Ltd|Jr|Sr|Co)" -_STARTERS = r"(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)" -_ACRONYMS = "([A-Z][.][A-Z][.](?:[A-Z][.])?)" -_WEBSITES = "[.](com|net|org|io|gov|edu|me)" -_DIGITS = "([0-9])" -_MULTIPLE_DOTS = r"\.{2,}" - - def split_into_sentences(text): - """Split the text into sentences. + """Split the text into sentences using NLTK. Args: text: A string that consists of more than or equal to one sentences. @@ -1580,46 +1580,7 @@ def split_into_sentences(text): Returns: A list of strings where each string is a sentence. """ - text = " " + text + " " - text = text.replace("\n", " ") - text = re.sub(_PREFIXES, "\\1", text) - text = re.sub(_WEBSITES, "\\1", text) - text = re.sub(_DIGITS + "[.]" + _DIGITS, "\\1\\2", text) - text = re.sub( - _MULTIPLE_DOTS, - lambda match: "" * len(match.group(0)) + "", - text, - ) - if "Ph.D" in text: - text = text.replace("Ph.D.", "PhD") - text = re.sub(r"\s" + _ALPHABETS + "[.] ", " \\1 ", text) - text = re.sub(_ACRONYMS + " " + _STARTERS, "\\1 \\2", text) - text = re.sub( - _ALPHABETS + "[.]" + _ALPHABETS + "[.]" + _ALPHABETS + "[.]", - "\\1\\2\\3", - text, - ) - text = re.sub(_ALPHABETS + "[.]" + _ALPHABETS + "[.]", "\\1\\2", text) - text = re.sub(" " + _SUFFIXES + "[.] " + _STARTERS, " \\1 \\2", text) - text = re.sub(" " + _SUFFIXES + "[.]", " \\1", text) - text = re.sub(" " + _ALPHABETS + "[.]", " \\1", text) - if "”" in text: - text = text.replace(".”", "”.") - if '"' in text: - text = text.replace('."', '".') - if "!" in text: - text = text.replace('!"', '"!') - if "?" in text: - text = text.replace('?"', '"?') - text = text.replace(".", ".") - text = text.replace("?", "?") - text = text.replace("!", "!") - text = text.replace("", ".") - sentences = text.split("") - sentences = [s.strip() for s in sentences] - if sentences and not sentences[-1]: - sentences = sentences[:-1] - return sentences + return nltk.sent_tokenize(text) def count_words(text): @@ -1637,7 +1598,7 @@ def _get_sentence_tokenizer(): def count_stopwords(text): """Counts the number of stopwords.""" - nltk.download('stopwords') + """Counts the number of stopwords.""" stopwords = nltk.corpus.stopwords.words('english') tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+") tokens = tokenizer.tokenize(text) From 6d27f8b3b686d86bbc058fe763ea92e17358570a Mon Sep 17 00:00:00 2001 From: SandyYuan Date: Fri, 16 Jan 2026 18:42:55 +0000 Subject: [PATCH 4/7] refactor to benchmarks and adding test decorator --- .../{rewards => benchmarks}/ifeval/README.md | 28 ++--- eval_protocol/benchmarks/ifeval/__init__.py | 11 ++ .../ifeval/data/ifbench_test_sample.jsonl | 50 +++++++++ .../ifeval/ifbench_instructions.py | 0 .../ifeval/ifbench_registry.py | 0 .../ifeval/ifbench_util.py | 0 .../ifeval/ifeval_instructions.py | 0 .../ifeval/ifeval_registry.py | 0 .../ifeval/ifeval_util.py | 0 .../{rewards => benchmarks}/ifeval/reward.py | 0 .../benchmarks/ifeval/test_ifeval.py | 103 ++++++++++++++++++ eval_protocol/rewards/ifeval/__init__.py | 13 --- 12 files changed, 179 insertions(+), 26 deletions(-) rename eval_protocol/{rewards => benchmarks}/ifeval/README.md (61%) create mode 100644 eval_protocol/benchmarks/ifeval/__init__.py create mode 100644 eval_protocol/benchmarks/ifeval/data/ifbench_test_sample.jsonl rename eval_protocol/{rewards => benchmarks}/ifeval/ifbench_instructions.py (100%) rename eval_protocol/{rewards => benchmarks}/ifeval/ifbench_registry.py (100%) rename eval_protocol/{rewards => benchmarks}/ifeval/ifbench_util.py (100%) rename eval_protocol/{rewards => benchmarks}/ifeval/ifeval_instructions.py (100%) rename eval_protocol/{rewards => benchmarks}/ifeval/ifeval_registry.py (100%) rename eval_protocol/{rewards => benchmarks}/ifeval/ifeval_util.py (100%) rename eval_protocol/{rewards => benchmarks}/ifeval/reward.py (100%) create mode 100644 eval_protocol/benchmarks/ifeval/test_ifeval.py delete mode 100644 eval_protocol/rewards/ifeval/__init__.py diff --git a/eval_protocol/rewards/ifeval/README.md b/eval_protocol/benchmarks/ifeval/README.md similarity index 61% rename from eval_protocol/rewards/ifeval/README.md rename to eval_protocol/benchmarks/ifeval/README.md index 3041d0c6..fedcd0ee 100644 --- a/eval_protocol/rewards/ifeval/README.md +++ b/eval_protocol/benchmarks/ifeval/README.md @@ -1,13 +1,19 @@ -# IFEval Reward Function +# IFEval Benchmark Evaluates how well model responses follow instruction constraints. Returns a partial credit score (0.0 to 1.0). -## Quick Start +## Usage + +### As eval-protocol benchmark test + +```bash +pytest eval_protocol/benchmarks/ifeval/test_ifeval.py -v +``` + +### Standalone scoring function ```python -import sys -sys.path.insert(0, '/path/to/eval_protocol/rewards/ifeval') -from reward import ifeval_partial_credit_reward +from eval_protocol.benchmarks.ifeval import ifeval_partial_credit_reward response = "Hello world! This is my response." ground_truth = { @@ -36,15 +42,11 @@ NLTK resources are downloaded automatically on first use. ## File Sources **Copied from `open-instruct/open_instruct/IFEvalG/`:** -- `ifeval_instructions.py` (from `instructions.py`) -- `ifeval_registry.py` (from `instructions_registry.py`) -- `ifeval_util.py` (from `instructions_util.py`) +- `ifeval_instructions.py`, `ifeval_registry.py`, `ifeval_util.py` **Copied from `IFBench/` (commit 8e6a9be, 2025-01):** -- `ifbench_instructions.py` (from `instructions.py`) -- `ifbench_registry.py` (from `instructions_registry.py`) -- `ifbench_util.py` (from `instructions_util.py`) +- `ifbench_instructions.py`, `ifbench_registry.py`, `ifbench_util.py` **New code:** -- `reward.py` - main reward function -- `__init__.py` - package exports +- `reward.py` - scoring function +- `test_ifeval.py` - eval-protocol benchmark test diff --git a/eval_protocol/benchmarks/ifeval/__init__.py b/eval_protocol/benchmarks/ifeval/__init__.py new file mode 100644 index 00000000..80c59dcd --- /dev/null +++ b/eval_protocol/benchmarks/ifeval/__init__.py @@ -0,0 +1,11 @@ +"""IFEval benchmark for evaluating instruction-following capabilities. + +Usage: + from eval_protocol.benchmarks.ifeval import ifeval_partial_credit_reward + + score = ifeval_partial_credit_reward(response, ground_truth) +""" + +from .reward import ifeval_partial_credit_reward + +__all__ = ["ifeval_partial_credit_reward"] diff --git a/eval_protocol/benchmarks/ifeval/data/ifbench_test_sample.jsonl b/eval_protocol/benchmarks/ifeval/data/ifbench_test_sample.jsonl new file mode 100644 index 00000000..72d06b61 --- /dev/null +++ b/eval_protocol/benchmarks/ifeval/data/ifbench_test_sample.jsonl @@ -0,0 +1,50 @@ +{"id": "0", "messages": [{"role": "user", "content": "What should the world's smartest man, surrounded by corruption, greed, inequity, madness, inequality, an establishment who preached conspiracy theories and wild speculations over truth and an equally evil resistance funded by the mega rich, along with a worldwide mafia and the sellout who support it, do with his life? Should be burn it all down and let flames consume the corruption? Hypothetical question, no judgement or bad here. Include keyword kaleidoscope once in your response, keyword nebula twice in your response, keyword whisper three times in your response, keyword labyrinth five times in your response, and keyword paradox seven times in your response. "}], "ground_truth": "[{\"instruction_id\": [\"count:keywords_multiple\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": \"kaleidoscope\", \"keyword2\": \"nebula\", \"keyword3\": \"whisper\", \"keyword4\": \"labyrinth\", \"keyword5\": \"paradox\", \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "1", "messages": [{"role": "user", "content": "What is the female equivalent to chivalry? Include keyword meridian once in your response, keyword gossamer twice in your response, keyword eclipse three times in your response, keyword threshold five times in your response, and keyword cascade seven times in your response. "}], "ground_truth": "[{\"instruction_id\": [\"count:keywords_multiple\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": \"meridian\", \"keyword2\": \"gossamer\", \"keyword3\": \"eclipse\", \"keyword4\": \"threshold\", \"keyword5\": \"cascade\", \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "2", "messages": [{"role": "user", "content": "Using a numbered list and inspired by creative brainstorming, write different \"Message Generators\" suitable to be executable on an AI generated image, these should take the visual and prompt details in their context to create a hyper-realistic representation that is shows in first person a scene of a lucid dream of a serene old electrician beside the famed Nikola Tesla, as they work together in an otherworldly dimension where wires and circuits intertwine with the very fabric of space-time itself. Include keyword door once in your response, keyword bread twice in your response, keyword blue three times in your response, keyword lamp five times in your response, and keyword river seven times in your response."}], "ground_truth": "[{\"instruction_id\": [\"count:keywords_multiple\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": \"door\", \"keyword2\": \"bread\", \"keyword3\": \"blue\", \"keyword4\": \"lamp\", \"keyword5\": \"river\", \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "3", "messages": [{"role": "user", "content": "Write an email to Matthew Greene asking him about potential postdoc opportunities. Talk about the work that I worked on and how close it is his and how impactful my contribution will be in his lab. Please personalize it. Matthew Greene works on: interests in the development of mobile sensing (phones, wearables), intervention and AI technology capable of accurately assessing and managing mental illness (e.g., anxiety, depression, schizophrenia) at population-scale. I specialize in building large-scale, deployable sensing systems for smart environments and developing foundation machine learning models for human activity recognition and various health and well-being applications. Include keyword cloud once in your response, keyword rhythm twice in your response, keyword fish three times in your response, keyword puzzle five times in your response, and keyword dawn seven times in your response."}], "ground_truth": "[{\"instruction_id\": [\"count:keywords_multiple\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": \"cloud\", \"keyword2\": \"rhythm\", \"keyword3\": \"fish\", \"keyword4\": \"puzzle\", \"keyword5\": \"dawn\", \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "4", "messages": [{"role": "user", "content": "Include keyword quickly once in your response, keyword transform twice in your response, keyword abundant three times in your response, keyword bridge five times in your response, and keyword through seven times in your response. According to reasonable originalist analysis, whether and if so to what extent does a former president of the USA enjoy presidential immunity from criminal prosecution for conduct alleged to involve official acts during his tenure in office?"}], "ground_truth": "[{\"instruction_id\": [\"count:keywords_multiple\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": \"quickly\", \"keyword2\": \"transform\", \"keyword3\": \"abundant\", \"keyword4\": \"bridge\", \"keyword5\": \"through\", \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "5", "messages": [{"role": "user", "content": "particle of mass m moves only along a frictionless, vertical, circular track of radius a. (a) Write the classical energy of this mass as a function of angular momentum, neglecting gravity, and assume that the quantum mechanical Hamiltonian comes from substituting the quantum angular momentum operator. What is the Hamiltonian operator? (b) Write Schrodinger equation in the position representation. The angle \u03c6 should figure prominently. (c) Find the energy eigenvalues and energy eigenfunctions in the position representation. Use at least 5 different coordinating conjunctions in the response."}], "ground_truth": "[{\"instruction_id\": [\"count:conjunctions\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": 5.0, \"word\": null}]}]"} +{"id": "6", "messages": [{"role": "user", "content": "give me an array of other ways of saying the following whilst being contextually best-fitting, professional, eloquent, grammatically impressive, demonstrating astonishing linguistic prowess, and maintaining a tone one would use in writing an important assessment: In the contemporary digital era, where information is the cornerstone of success for organizations, the implementation and adherence to security best practices, standards, and regulations not only protect the organization from cyber threats but also enhances its credibility, operational efficiency, and strategic decision-making process. Use at least 2 different coordinating conjunctions in the response."}], "ground_truth": "[{\"instruction_id\": [\"count:conjunctions\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": 2.0, \"word\": null}]}]"} +{"id": "7", "messages": [{"role": "user", "content": "Use at least 3 different coordinating conjunctions in the response. Write in the prose, style, and tone of Finnigan's Wake. Consider the situation I am in; think about my father. Can I leave him, when he is blind and helpless, to deal with weakness and poverty, when I have the ability to make his old age comfortable and happy?"}], "ground_truth": "[{\"instruction_id\": [\"count:conjunctions\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": 3.0, \"word\": null}]}]"} +{"id": "8", "messages": [{"role": "user", "content": "Use at least 4.0 different coordinating conjunctions in the response. This delayed obliteration of the arterial lumen may occur by one of several mechanisms. The roughened or torn arterial intima attracts platelets, fibrin, and red cells, resulting in a traumatic thrombosis. On occasion, the flow of blood may elevate the torn distal end of the intima, creating a subintimal dissection and lifting an intimal flap that occludes the distal lumen. Moreover, even though the inner and outer coats of the artery remain intact, an intramural hematoma may form that obliterates the lumen of the artery.. explain more"}], "ground_truth": "[{\"instruction_id\": [\"count:conjunctions\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": 4.0, \"word\": null}]}]"} +{"id": "9", "messages": [{"role": "user", "content": "can you rephrase this: Moreover, video games are being acknowledged for their therapeutic potential rather than just being a source of amusement. According to McGonigal (2012), playing games like \"SuperBetter,\" which are created with psychological concepts in mind, can help people deal with anxiety and depression as well as heal from physical wounds more quickly. By embracing the idea of \"positive psychology,\" these games leverage the potential of video games to inspire and involve players in their own healing. Use at least 4 different coordinating conjunctions in the response."}], "ground_truth": "[{\"instruction_id\": [\"count:conjunctions\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": 4.0, \"word\": null}]}]"} +{"id": "10", "messages": [{"role": "user", "content": "Include keyword giggle in the 22-nd sentence, as the 33-rd word of that sentence. Reference Text: How is it possible that a given AI language model can generate different outputs for the same text prompt in different sessions?"}], "ground_truth": "[{\"instruction_id\": [\"words:keywords_specific_position\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": \"giggle\", \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": 33, \"max_words\": null, \"min_words\": null, \"n\": 22, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "11", "messages": [{"role": "user", "content": "Include keyword beneath in the 23-rd sentence, as the 34-th word of that sentence. Reference Text: Are there discrete random variables X, Y where the E(X) and E(Y) are finite but E(X + Y) is undefined or infinite?"}], "ground_truth": "[{\"instruction_id\": [\"words:keywords_specific_position\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": \"beneath\", \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": 34, \"max_words\": null, \"min_words\": null, \"n\": 23, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "12", "messages": [{"role": "user", "content": "Include keyword spotty in the 24-th sentence, as the 35-th word of that sentence. Reference Text: Although an Leshy, Pebble Spring became immediately fascinated by the mortal settlements when they set foot in a city. They loves the nicely paved ground, the intricate bridges, and the most of all, they loved houses. The nicely layed out stone walls forming shelters from the weathers yet connect the earth with the stories from every family. They decided to learn the craft of housebuilding with the help of a touch of arcane magic from their self-made staff."}], "ground_truth": "[{\"instruction_id\": [\"words:keywords_specific_position\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": \"spotty\", \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": 35, \"max_words\": null, \"min_words\": null, \"n\": 24, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "13", "messages": [{"role": "user", "content": "Include keyword slowly in the 25-th sentence, as the 36-th word of that sentence. Reference Text: modify this prompt so it describes the lottery to really portray the essence of the book: \"\"A desolate village encased in amber twilight, its inhabitants frozen in haunting tableaus of daily life, as if a moment of terror had been captured eternally, blending the surreal stillness of Giorgio de Chirico's paintings with the unsettling horror of Shirley Jackson's 'The Lottery\". "}], "ground_truth": "[{\"instruction_id\": [\"words:keywords_specific_position\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": \"slowly\", \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": 36, \"max_words\": null, \"min_words\": null, \"n\": 25, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "14", "messages": [{"role": "user", "content": "The second word in your response and the second to last word in your response should be the word vibrant. Reference Text: Using a numbered list and inspired by creative brainstorming, write different \u201cPrompts Generators\u201d suitable to be executable on an AI generated image, these should take the visual and prompt details in their context to create a hyper-realistic representation that is shows in first person a scene of a lucid dream of a serene old electrician beside the famed Nikola Tesla, as they work together in an otherworldly dimension where wires and circuits intertwine with the very fabric of space-time itself."}], "ground_truth": "[{\"instruction_id\": [\"words:words_position\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": \"vibrant\", \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "15", "messages": [{"role": "user", "content": "The second word in your response and the second to last word in your response should be the word whisper. Reference Text: The purpose of this background is to provide a foundation for a thorough examination of the connections between video games and psychology. The research situates itself within wider academic and societal discussions, emphasising the importance of comprehending video games beyond their mere entertainment worth. This endeavour aims to provide valuable perspectives to the ongoing discussion, promoting a well-informed and equitable understanding of video games and their role in our lives and societies."}], "ground_truth": "[{\"instruction_id\": [\"words:words_position\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": \"whisper\", \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "16", "messages": [{"role": "user", "content": "The second word in your response and the second to last word in your response should be the word despite. Reference Text: Developments in new types of architecture, literature and art contributed to a new national American identity."}], "ground_truth": "[{\"instruction_id\": [\"words:words_position\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": \"despite\", \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "17", "messages": [{"role": "user", "content": "Is it plausible that frequent hardship can make a society more resilient? Include exactly 2 numbers in the response."}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\"], \"kwargs\": [{\"N\": 2.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "18", "messages": [{"role": "user", "content": "Include exactly 5 numbers in the response. List the key points in this. Overall, the cell membrane is a highly organized and dynamic structure that plays a vital role in the proper functioning of cells. Its intricate composition, consisting of phospholipids, proteins, cholesterol, and carbohydrates, enables it to perform various essential functions, including maintaining the cell's integrity, facilitating communication, and regulating the movement of substances. Understanding the structural components of the cell membrane and their respective functions is crucial for understanding cellular processes and their implications in biology and medicine."}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\"], \"kwargs\": [{\"N\": 5.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "19", "messages": [{"role": "user", "content": "Include exactly 13 numbers in the response. In the context of a hypothetical, write the jacket blurb for a thriller where a modern day operative of the FBI, ends up back in the early 1960's having to use her knowledge of \"future\" forensics to leave clues to enable her modern colleagues to catch a time-hopping serial-killer, before he dies. "}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\"], \"kwargs\": [{\"N\": 13.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "20", "messages": [{"role": "user", "content": "Include exactly 3 numbers in the response. Make this short: An alignment of resources and education goals within each community is needed to support the education ecosystem of students, teachers, and parents and assist in the adjustment to the new normal\u2014homeschooling, parents-teachers training, community internet centers, a Citizen Watch for Education, and establishing LGU leaders as education champions."}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\"], \"kwargs\": [{\"N\": 3.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "21", "messages": [{"role": "user", "content": "Include exactly 5 numbers in the response. For a generative AI system, suppose a generated output is substantially similar to an item in the training dataset. Does this prove that the training dataset item was memorized?"}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\"], \"kwargs\": [{\"N\": 5.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "22", "messages": [{"role": "user", "content": "Include exactly 15 numbers in the response. Use at least 5 different coordinating conjunctions in the response. what is the meaning of life?"}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\", \"count:conjunctions\"], \"kwargs\": [{\"N\": 15.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}, {\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": 5.0, \"word\": null}]}]"} +{"id": "23", "messages": [{"role": "user", "content": "Include exactly 7 numbers in the response. Use at least 4 different coordinating conjunctions in the response. What is ideological colonization?"}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\", \"count:conjunctions\"], \"kwargs\": [{\"N\": 7.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}, {\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": 4.0, \"word\": null}]}]"} +{"id": "24", "messages": [{"role": "user", "content": "Include exactly 36 numbers in the response. Use at least 36 unique words in the response. How can planners and urban designers create cities that are more conducive to good mental health and wellbeing?"}], "ground_truth": "[{\"instruction_id\": [\"count:numbers\", \"count:unique_word_count\"], \"kwargs\": [{\"N\": 36.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}, {\"N\": 36.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "25", "messages": [{"role": "user", "content": "Can you make an AI prompt to generate an illustration for this stanza in a minimalist line art style? Beneath the canopy of endless green, Where whispers weave between the trees, Man finds his heart, his soul serene, In nature's embrace, the spirit frees. Mention at least 10 different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan ."}], "ground_truth": "[{\"instruction_id\": [\"count:person_names\"], \"kwargs\": [{\"N\": 10.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "26", "messages": [{"role": "user", "content": "Mention at least 18 different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan . Describe a stop-motion animation for the following scene: An abandoned prison cell, a single beam of light illuminating the remnants of a hastily discarded meal, an eerily quiet scene reminiscent of Edward Hopper's 'Nighthawks', yet carrying the weight of a post-apocalyptic world"}], "ground_truth": "[{\"instruction_id\": [\"count:person_names\"], \"kwargs\": [{\"N\": 18.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "27", "messages": [{"role": "user", "content": "Mention at least 3 different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan . A paper about text-to-image AIs defines \"zero-shot generalization\" as \"the ability of the model to apply its learned knowledge to new unseen concepts,\" citing a 2013 paper. Is this a sensible definition for text-to-image AIs?"}], "ground_truth": "[{\"instruction_id\": [\"count:person_names\"], \"kwargs\": [{\"N\": 3.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "28", "messages": [{"role": "user", "content": "* Why do you want to become a Databricks partner? write an answer on my behalf Mention at least 14 different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan ."}], "ground_truth": "[{\"instruction_id\": [\"count:person_names\"], \"kwargs\": [{\"N\": 14.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "29", "messages": [{"role": "user", "content": "Considering that the planet Earth is incredibly large, during the development of the first nuclear weapons, why was there fear that if the first atomic bomb were to detonate, it would destroy the Earth's atmosphere in an instant? Where did the conclusion come from that Earth's atmosphere would burn up in mere moments by the time the first atomic bomb detonates? Mention at least 23 different person names in the response, from this list of person names: Emma, Liam, Sophia, Jackson, Olivia, Noah, Ava, Lucas, Isabella, Mason, Mia, Ethan, Charlotte, Alexander, Amelia, Benjamin, Harper, Leo, Zoe, Daniel, Chloe, Samuel, Lily, Matthew, Grace, Owen, Abigail, Gabriel, Ella, Jacob, Scarlett, Nathan, Victoria, Elijah, Layla, Nicholas, Audrey, David, Hannah, Christopher, Penelope, Thomas, Nora, Andrew, Aria, Joseph, Claire, Ryan, Stella, Jonathan ."}], "ground_truth": "[{\"instruction_id\": [\"count:person_names\"], \"kwargs\": [{\"N\": 23.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "30", "messages": [{"role": "user", "content": "The response should include at least 22 pronouns. how does trade impact on the pharaoh powers"}], "ground_truth": "[{\"instruction_id\": [\"count:pronouns\"], \"kwargs\": [{\"N\": 22.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "31", "messages": [{"role": "user", "content": "Write a descriptive, fictional, imaginative screenplay of a medieval king turning his empire's aesthetic into a proto-vaporwave The response should include at least 39 pronouns."}], "ground_truth": "[{\"instruction_id\": [\"count:pronouns\"], \"kwargs\": [{\"N\": 39.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "32", "messages": [{"role": "user", "content": "The response should include at least 7 pronouns. Please explain the following sentences for my linear algebra class: The row space of a matrix is orthogonal to the nullspace, because Ax = 0 means the dot product of x with each row of A is 0. But then the product of x with any combination of rows of A must be 0. The column space is orthogonal to the left nullspace of A because the row space of AT is perpendicular to the nullspace of AT."}], "ground_truth": "[{\"instruction_id\": [\"count:pronouns\"], \"kwargs\": [{\"N\": 7.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "33", "messages": [{"role": "user", "content": "short it \"Embark on a riveting space adventure with \"Stars in My Pocket: Adventures in Space-Time Slip,\" where every page is a journey into the unknown. This captivating tale blends action, romance, and mystery against the backdrop of the vast cosmos, keeping you on the edge of your seat until the very end. Join the protagonist as they navigate alien worlds, confront ancient mysteries, and forge alliances in a universe brimming with wonder and peril.\" The response should include at least 5 pronouns."}], "ground_truth": "[{\"instruction_id\": [\"count:pronouns\"], \"kwargs\": [{\"N\": 5.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "34", "messages": [{"role": "user", "content": "The response should include at least 13 pronouns. create a detailed outline for a paper that describes salesforce's govcloud offerings to a customer"}], "ground_truth": "[{\"instruction_id\": [\"count:pronouns\"], \"kwargs\": [{\"N\": 13.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "35", "messages": [{"role": "user", "content": "Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!). Give me 20 ideas for an urban fantasy short story. Here are some elements that each idea should have. Output as a list item for every element. - One (and only one) protagonist - A clear dilemma - A decision - A climax - An outcome - A clever twist - A clever worldbuilding feature - The story should involve just a few scenes, ideally a single moment that is pivotal for the character, but doesn't require much exposition. It should immediately jump into action."}], "ground_truth": "[{\"instruction_id\": [\"count:punctuation\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "36", "messages": [{"role": "user", "content": "design a system that will mitigate space junk impact or reduce the volume of space junk. You will follow the design process up to and including creating a mock-up. Document your design process in a presentation. Be prepared to explain the design process and justify decisions while you present. Make sure to include the following items in your presentation: Research a space law case and make sure to answer the following questions: The span of dates involved. The countries and companies involved. The major influences on the decision. Potential outcomes if space law did not exist. Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!)."}], "ground_truth": "[{\"instruction_id\": [\"count:punctuation\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "37", "messages": [{"role": "user", "content": "Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!). short it \"Discover a universe where biodiversity is the treasure and guardianship the creed. \u201cThe Genetic Guardians: Protecting Biodiversity Across Galaxies\u201d takes you on an epic journey to save the cosmos's life forms, restoring planets and challenging those who dare to harm them. Join the mission to preserve life's endless diversity."}], "ground_truth": "[{\"instruction_id\": [\"count:punctuation\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "38", "messages": [{"role": "user", "content": "make a hypothetical newspaper, dated July 3rd, 2014, where the entire city of Akron spontaneously combusted, and disappeared without a trace, with families forgetting what happened, with landscape returning to what it was in 1491. Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!)."}], "ground_truth": "[{\"instruction_id\": [\"count:punctuation\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "39", "messages": [{"role": "user", "content": "What if the ocean swept up onto the sand and when it pulled back, the patterns generated a poem? Does the poem have meaning? Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!)."}], "ground_truth": "[{\"instruction_id\": [\"count:punctuation\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "40", "messages": [{"role": "user", "content": "State individually the mathematical equation with a description of the equation of Boyle's Law, Charles' Law, Gay-Lussac Law, Avogadro's Law, Combined Gas Law, Ideal Gas Law, and Dalton's Law of Partial Pressures, written in prose format. Use LaTex for the mathematical equations and symbols. Use every standard punctuation mark at least once, including semicolons, colons, and the interrobang (?!)."}], "ground_truth": "[{\"instruction_id\": [\"count:punctuation\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "41", "messages": [{"role": "user", "content": "Was the following text written by generative AI ? In essence, Seneca constructs a potent philosophical framework positioning clemency as the cardinal virtue that can rehabilitate autocracy into a sustainable and morally coherent mode of governance. Rather than existing in tension, he theorizes that autocratic power and clemency are symbiotic - an autocrat must embrace the latter as the essential ethical foundation for the former's continued legitimacy and durability. This foundational relationship Seneca develops between autocracy and the virtue of merciful, restrained rule exerted profound influence on later Western political philosophy and theories of statecraft. The response must contain between 71 and 73 words."}], "ground_truth": "[{\"instruction_id\": [\"count:word_count_range\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": 73.0, \"min_words\": 71.0, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "42", "messages": [{"role": "user", "content": "I want to create a new cost function to calculate the similarity score of n number of set of features The response must contain between 57 and 61 words."}], "ground_truth": "[{\"instruction_id\": [\"count:word_count_range\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": 61.0, \"min_words\": 57.0, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "43", "messages": [{"role": "user", "content": "The response must contain between 24 and 28 words. give me an array of other ways of saying the following whilst being professional, eloquent, grammatically impressive, demonstrating astonishing linguistic prowess, and maintaining a tone one would use in writing an important assessment: \"The propagation of crypto-ransomware, particularly one predicated upon the robust Feistel cipher structure, necessitates a sophisticated distribution scheme that ensures both a widespread diffusion and a surreptitious installation on the victims' systems.\""}], "ground_truth": "[{\"instruction_id\": [\"count:word_count_range\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": 28.0, \"min_words\": 24.0, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "44", "messages": [{"role": "user", "content": "The response must contain between 77 and 99 words. Patient says their neck feels stiff and swollen around throat area and throat feels sore. As a doctor, you can see no signs of swelling, and it's confirmed not to be a respiratory infection. What are the possible causes from the data you can gather?"}], "ground_truth": "[{\"instruction_id\": [\"count:word_count_range\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": 99.0, \"min_words\": 77.0, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "45", "messages": [{"role": "user", "content": "The response must contain between 30 and 73 words. Why is progrp and nse raise above normal range when petct shows no abnormality"}], "ground_truth": "[{\"instruction_id\": [\"count:word_count_range\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": 73.0, \"min_words\": 30.0, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "46", "messages": [{"role": "user", "content": "I want to learn about KRAS g12c mutation, particularly as it relates to the development and treatment of colorectal cancer. Identify and share the most important 20% of learnings from this topic that will help me understand 80% of it. The response must contain between 90 and 100 words. Answer with a list of items, instead of bullet points use !?!?."}], "ground_truth": "[{\"instruction_id\": [\"count:word_count_range\", \"format:list\"], \"kwargs\": [{\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": 100.0, \"min_words\": 90.0, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}, {\"N\": null, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": \"!?!?\", \"small_n\": null, \"word\": null}]}]"} +{"id": "47", "messages": [{"role": "user", "content": "Every 5th word of your response must be in Japanese. rewrite: Focusing on Quality Assurance will allow us to align knowledge on what Quality Assurance is all about, and the responsibilities we all play a part in when embedding quality outcomes."}], "ground_truth": "[{\"instruction_id\": [\"count:words_japanese\"], \"kwargs\": [{\"N\": 5.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "48", "messages": [{"role": "user", "content": "Write a descriptive, fictional, imaginative screenplay of Jesus's soul becoming the leader of the citizens of the \"treachery\" layer of hell in an alternate future after God presumably died, and the treacherous wanted salvation. Every 29th word of your response must be in Japanese."}], "ground_truth": "[{\"instruction_id\": [\"count:words_japanese\"], \"kwargs\": [{\"N\": 29.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} +{"id": "49", "messages": [{"role": "user", "content": "Every 17th word of your response must be in Japanese. What are some fun true or false questions to give to students? They are to be used to introduce a new teacher."}], "ground_truth": "[{\"instruction_id\": [\"count:words_japanese\"], \"kwargs\": [{\"N\": 17.0, \"capital_frequency\": null, \"capital_relation\": null, \"end_phrase\": null, \"first_word\": null, \"forbidden_words\": null, \"frequency\": null, \"keyword\": null, \"keyword1\": null, \"keyword2\": null, \"keyword3\": null, \"keyword4\": null, \"keyword5\": null, \"keywords\": null, \"language\": null, \"let_frequency\": null, \"let_relation\": null, \"letter\": null, \"m\": null, \"max_words\": null, \"min_words\": null, \"n\": null, \"n_end\": null, \"n_start\": null, \"nth_paragraph\": null, \"num_bullets\": null, \"num_highlights\": null, \"num_paragraphs\": null, \"num_placeholders\": null, \"num_sections\": null, \"num_sentences\": null, \"num_words\": null, \"options\": null, \"percentage\": null, \"postscript_marker\": null, \"prompt_to_repeat\": null, \"reference_text\": null, \"relation\": null, \"section_spliter\": null, \"sep\": null, \"small_n\": null, \"word\": null}]}]"} diff --git a/eval_protocol/rewards/ifeval/ifbench_instructions.py b/eval_protocol/benchmarks/ifeval/ifbench_instructions.py similarity index 100% rename from eval_protocol/rewards/ifeval/ifbench_instructions.py rename to eval_protocol/benchmarks/ifeval/ifbench_instructions.py diff --git a/eval_protocol/rewards/ifeval/ifbench_registry.py b/eval_protocol/benchmarks/ifeval/ifbench_registry.py similarity index 100% rename from eval_protocol/rewards/ifeval/ifbench_registry.py rename to eval_protocol/benchmarks/ifeval/ifbench_registry.py diff --git a/eval_protocol/rewards/ifeval/ifbench_util.py b/eval_protocol/benchmarks/ifeval/ifbench_util.py similarity index 100% rename from eval_protocol/rewards/ifeval/ifbench_util.py rename to eval_protocol/benchmarks/ifeval/ifbench_util.py diff --git a/eval_protocol/rewards/ifeval/ifeval_instructions.py b/eval_protocol/benchmarks/ifeval/ifeval_instructions.py similarity index 100% rename from eval_protocol/rewards/ifeval/ifeval_instructions.py rename to eval_protocol/benchmarks/ifeval/ifeval_instructions.py diff --git a/eval_protocol/rewards/ifeval/ifeval_registry.py b/eval_protocol/benchmarks/ifeval/ifeval_registry.py similarity index 100% rename from eval_protocol/rewards/ifeval/ifeval_registry.py rename to eval_protocol/benchmarks/ifeval/ifeval_registry.py diff --git a/eval_protocol/rewards/ifeval/ifeval_util.py b/eval_protocol/benchmarks/ifeval/ifeval_util.py similarity index 100% rename from eval_protocol/rewards/ifeval/ifeval_util.py rename to eval_protocol/benchmarks/ifeval/ifeval_util.py diff --git a/eval_protocol/rewards/ifeval/reward.py b/eval_protocol/benchmarks/ifeval/reward.py similarity index 100% rename from eval_protocol/rewards/ifeval/reward.py rename to eval_protocol/benchmarks/ifeval/reward.py diff --git a/eval_protocol/benchmarks/ifeval/test_ifeval.py b/eval_protocol/benchmarks/ifeval/test_ifeval.py new file mode 100644 index 00000000..de244d20 --- /dev/null +++ b/eval_protocol/benchmarks/ifeval/test_ifeval.py @@ -0,0 +1,103 @@ +""" +IFEval benchmark test using the evaluation_test decorator. + +This test evaluates model responses against instruction-following constraints +from IFBench (Out-of-Distribution IFEval test set). + +Run with: + pytest eval_protocol/benchmarks/ifeval/test_ifeval.py -v +""" + +import json +from pathlib import Path +from typing import Any + +from eval_protocol.models import EvaluateResult, EvaluationRow, InputMetadata, Message, MetricResult +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.default_single_turn_rollout_process import SingleTurnRolloutProcessor + +from .reward import ifeval_partial_credit_reward + + +def _load_ifbench_messages() -> list[list[list[Message]]]: + """Load IFBench test data as messages with ground truth.""" + data_path = Path(__file__).parent / "data" / "ifbench_test_sample.jsonl" + messages_list: list[list[Message]] = [] + + with open(data_path) as f: + for line in f: + row = json.loads(line) + # Convert to Message objects + messages = [Message(role=m["role"], content=m["content"]) for m in row["messages"]] + # Add ground truth as a system message (will be extracted later) + messages.insert(0, Message(role="system", content=f"__GT__:{row['ground_truth']}")) + messages_list.append(messages) + + return [messages_list] + + +def _coerce_content_to_str(content: str | list | None) -> str: + """Convert message content to string.""" + if isinstance(content, list): + return "".join(str(p.get("text", p)) if isinstance(p, dict) else str(p) for p in content) + return str(content or "") + + +_IFBENCH_MESSAGES = _load_ifbench_messages() + + +class IFEvalRolloutProcessor(SingleTurnRolloutProcessor): + """Preprocess rows to extract ground_truth from __GT__ messages.""" + + def preprocess_row(self, row: EvaluationRow) -> EvaluationRow: + """Extract ground truth and remove __GT__ messages.""" + filtered_messages: list[Message] = [] + for m in row.messages: + content_str = _coerce_content_to_str(m.content) + if m.role == "system" and content_str.startswith("__GT__:"): + # Extract ground truth + row.ground_truth = content_str.split(":", 1)[1].strip() + else: + filtered_messages.append(m) + row.messages = filtered_messages + return row + + +@evaluation_test( + input_messages=_IFBENCH_MESSAGES, + completion_params=[ + {"model": "fireworks_ai/accounts/fireworks/models/qwen3-8b"} + ], + rollout_processor=IFEvalRolloutProcessor(), + aggregation_method="mean", + passed_threshold=0.5, + num_runs=1, + mode="pointwise", +) +def test_ifeval_benchmark(row: EvaluationRow) -> EvaluationRow: + """ + Evaluate instruction-following constraints. + + Returns partial credit score (0.0 to 1.0) representing the fraction + of constraints satisfied in the response. + """ + # Get the assistant's response + assistant_msgs = [m for m in row.messages if m.role == "assistant"] + response = _coerce_content_to_str(assistant_msgs[-1].content) if assistant_msgs else "" + + # Evaluate against ground truth constraints + score = ifeval_partial_credit_reward(response, row.ground_truth) + + row.evaluation_result = EvaluateResult( + score=score, + reason=f"Satisfied {score * 100:.1f}% of constraints", + is_score_valid=True, + metrics={ + "ifeval_partial_credit": MetricResult( + score=score, + is_score_valid=True, + reason="Partial credit score based on fraction of constraints satisfied", + ) + }, + ) + return row diff --git a/eval_protocol/rewards/ifeval/__init__.py b/eval_protocol/rewards/ifeval/__init__.py deleted file mode 100644 index 95a56b19..00000000 --- a/eval_protocol/rewards/ifeval/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""IFEval reward function for evaluating instruction-following capabilities. - -Usage: - import sys - sys.path.insert(0, '/path/to/eval_protocol/rewards/ifeval') - from reward import ifeval_partial_credit_reward - - score = ifeval_partial_credit_reward(response, ground_truth) -""" - -from .reward import ifeval_partial_credit_reward - -__all__ = ["ifeval_partial_credit_reward"] From 0b199323c7c12c421f492ba38b84e20e523e6d68 Mon Sep 17 00:00:00 2001 From: SandyYuan Date: Fri, 16 Jan 2026 18:43:08 +0000 Subject: [PATCH 5/7] Fix formatting in test_ifeval.py by removing unnecessary blank lines in the docstring of the test_ifeval_benchmark function. --- eval_protocol/benchmarks/ifeval/test_ifeval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval_protocol/benchmarks/ifeval/test_ifeval.py b/eval_protocol/benchmarks/ifeval/test_ifeval.py index de244d20..16c43357 100644 --- a/eval_protocol/benchmarks/ifeval/test_ifeval.py +++ b/eval_protocol/benchmarks/ifeval/test_ifeval.py @@ -77,7 +77,7 @@ def preprocess_row(self, row: EvaluationRow) -> EvaluationRow: def test_ifeval_benchmark(row: EvaluationRow) -> EvaluationRow: """ Evaluate instruction-following constraints. - + Returns partial credit score (0.0 to 1.0) representing the fraction of constraints satisfied in the response. """ From 210efa47a57c069c8820115011e1d6fbc4dea1b8 Mon Sep 17 00:00:00 2001 From: SandyYuan Date: Fri, 16 Jan 2026 18:57:48 +0000 Subject: [PATCH 6/7] Enhance IFEval benchmark processing by introducing IFEvalGroundTruthRolloutProcessor for improved ground truth extraction and updating pyproject.toml to include new benchmark data files. --- .../benchmarks/ifeval/ifbench_instructions.py | 5 +- .../benchmarks/ifeval/test_ifeval.py | 53 ++++++++++++------- pyproject.toml | 1 + 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/eval_protocol/benchmarks/ifeval/ifbench_instructions.py b/eval_protocol/benchmarks/ifeval/ifbench_instructions.py index 08d77544..d73eef2a 100644 --- a/eval_protocol/benchmarks/ifeval/ifbench_instructions.py +++ b/eval_protocol/benchmarks/ifeval/ifbench_instructions.py @@ -36,7 +36,10 @@ import csv import io -import ifbench_util as instructions_util +try: + from . import ifbench_util as instructions_util +except ImportError: + import ifbench_util as instructions_util logger = logging.getLogger(__name__) diff --git a/eval_protocol/benchmarks/ifeval/test_ifeval.py b/eval_protocol/benchmarks/ifeval/test_ifeval.py index 16c43357..cbf7d888 100644 --- a/eval_protocol/benchmarks/ifeval/test_ifeval.py +++ b/eval_protocol/benchmarks/ifeval/test_ifeval.py @@ -8,13 +8,15 @@ pytest eval_protocol/benchmarks/ifeval/test_ifeval.py -v """ +import asyncio import json from pathlib import Path -from typing import Any -from eval_protocol.models import EvaluateResult, EvaluationRow, InputMetadata, Message, MetricResult +from eval_protocol.models import EvaluateResult, EvaluationRow, Message, MetricResult from eval_protocol.pytest import evaluation_test from eval_protocol.pytest.default_single_turn_rollout_process import SingleTurnRolloutProcessor +from eval_protocol.pytest.rollout_processor import RolloutProcessor +from eval_protocol.pytest.types import RolloutProcessorConfig from .reward import ifeval_partial_credit_reward @@ -46,21 +48,36 @@ def _coerce_content_to_str(content: str | list | None) -> str: _IFBENCH_MESSAGES = _load_ifbench_messages() -class IFEvalRolloutProcessor(SingleTurnRolloutProcessor): - """Preprocess rows to extract ground_truth from __GT__ messages.""" - - def preprocess_row(self, row: EvaluationRow) -> EvaluationRow: - """Extract ground truth and remove __GT__ messages.""" - filtered_messages: list[Message] = [] - for m in row.messages: - content_str = _coerce_content_to_str(m.content) - if m.role == "system" and content_str.startswith("__GT__:"): - # Extract ground truth - row.ground_truth = content_str.split(":", 1)[1].strip() - else: - filtered_messages.append(m) - row.messages = filtered_messages - return row +class IFEvalGroundTruthRolloutProcessor(RolloutProcessor): + """Extract ground truth from __GT__ system messages, then run single-turn rollouts.""" + + def __init__(self) -> None: + super().__init__() + self.single_turn_processor = SingleTurnRolloutProcessor() + + def __call__( + self, rows: list[EvaluationRow], config: RolloutProcessorConfig + ) -> list[asyncio.Task[EvaluationRow]]: + processed: list[EvaluationRow] = [] + for r in rows: + gt_tokens: list[str] = [] + for m in r.messages: + if m.role == "system": + content_str = _coerce_content_to_str(m.content) + if content_str.startswith("__GT__:"): + gt_tokens.append(content_str) + if gt_tokens: + r.ground_truth = gt_tokens[-1].split(":", 1)[1].strip() + filtered: list[Message] = [] + for m in r.messages: + if m.role == "system": + content_str = _coerce_content_to_str(m.content) + if content_str.startswith("__GT__:"): + continue + filtered.append(m) + r.messages = filtered + processed.append(r) + return self.single_turn_processor(processed, config) @evaluation_test( @@ -68,7 +85,7 @@ def preprocess_row(self, row: EvaluationRow) -> EvaluationRow: completion_params=[ {"model": "fireworks_ai/accounts/fireworks/models/qwen3-8b"} ], - rollout_processor=IFEvalRolloutProcessor(), + rollout_processor=IFEvalGroundTruthRolloutProcessor(), aggregation_method="mean", passed_threshold=0.5, num_runs=1, diff --git a/pyproject.toml b/pyproject.toml index e5caa497..e2159170 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -170,6 +170,7 @@ include = ["eval_protocol*", "development*", "vendor*"] "eval_protocol" = ["../vite-app/dist/**/*"] "eval_protocol.mcp_servers.tau2" = ["*.md", "tests/system_prompts/*.md"] "eval_protocol.benchmarks" = ["data/*.jsonl"] +"eval_protocol.benchmarks.ifeval" = ["data/*.jsonl"] "vendor.tau2" = ["data/**/*.md"] From 05ceb80cc4e9d55e3c38aab1eca1729672ee94d7 Mon Sep 17 00:00:00 2001 From: SandyYuan Date: Fri, 16 Jan 2026 19:02:11 +0000 Subject: [PATCH 7/7] fixing bugs --- .../benchmarks/ifeval/test_ifeval.py | 7 ++ pyproject.toml | 7 ++ uv.lock | 76 ++++++++++++++++++- 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/eval_protocol/benchmarks/ifeval/test_ifeval.py b/eval_protocol/benchmarks/ifeval/test_ifeval.py index cbf7d888..12b22ecb 100644 --- a/eval_protocol/benchmarks/ifeval/test_ifeval.py +++ b/eval_protocol/benchmarks/ifeval/test_ifeval.py @@ -10,8 +10,11 @@ import asyncio import json +import os from pathlib import Path +import pytest + from eval_protocol.models import EvaluateResult, EvaluationRow, Message, MetricResult from eval_protocol.pytest import evaluation_test from eval_protocol.pytest.default_single_turn_rollout_process import SingleTurnRolloutProcessor @@ -80,6 +83,10 @@ def __call__( return self.single_turn_processor(processed, config) +@pytest.mark.skipif( + not os.getenv("FIREWORKS_API_KEY"), + reason="FIREWORKS_API_KEY not set", +) @evaluation_test( input_messages=_IFBENCH_MESSAGES, completion_params=[ diff --git a/pyproject.toml b/pyproject.toml index e2159170..53f4df78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,13 @@ dependencies = [ "deepdiff>=6.0.0", "websockets>=15.0.1", "fastapi>=0.116.1", + "nltk>=3.8.1", + "langdetect>=1.0.9", + "emoji>=2.12.1", + "syllapy>=0.7.2", + "immutabledict>=4.2.0", + "absl-py>=2.1.0", + "setuptools>=61.0", ] [project.urls] diff --git a/uv.lock b/uv.lock index c175b81f..3c22d906 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13'", @@ -8,6 +8,15 @@ resolution-markers = [ "python_full_version < '3.11'", ] +[[package]] +name = "absl-py" +version = "2.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/10/2a/c93173ffa1b39c1d0395b7e842bbdc62e556ca9d8d3b5572926f3e4ca752/absl_py-2.3.1.tar.gz", hash = "sha256:a97820526f7fbfd2ec1bce83f3f25e3a14840dac0d8e02a0b71cd75db3f77fc9", size = 116588, upload-time = "2025-07-03T09:31:44.05Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl", hash = "sha256:eeecf07f0c2a93ace0772c92e596ace6d3d3996c042b2128459aaae2a76de11d", size = 135811, upload-time = "2025-07-03T09:31:42.253Z" }, +] + [[package]] name = "accelerate" version = "1.9.0" @@ -1151,10 +1160,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521, upload-time = "2024-06-20T11:30:28.248Z" }, ] +[[package]] +name = "emoji" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/78/0d2db9382c92a163d7095fc08efff7800880f830a152cfced40161e7638d/emoji-2.15.0.tar.gz", hash = "sha256:eae4ab7d86456a70a00a985125a03263a5eac54cd55e51d7e184b1ed3b6757e4", size = 615483, upload-time = "2025-09-21T12:13:02.755Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/5e/4b5aaaabddfacfe36ba7768817bd1f71a7a810a43705e531f3ae4c690767/emoji-2.15.0-py3-none-any.whl", hash = "sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb", size = 608433, upload-time = "2025-09-21T12:13:01.197Z" }, +] + [[package]] name = "eval-protocol" source = { editable = "." } dependencies = [ + { name = "absl-py" }, { name = "addict" }, { name = "aiohttp" }, { name = "aiosqlite" }, @@ -1163,13 +1182,17 @@ dependencies = [ { name = "dataclasses-json" }, { name = "deepdiff" }, { name = "docstring-parser" }, + { name = "emoji" }, { name = "fastapi" }, { name = "fireworks-ai" }, { name = "httpx" }, { name = "hydra-core" }, + { name = "immutabledict" }, + { name = "langdetect" }, { name = "litellm" }, { name = "loguru" }, { name = "mcp" }, + { name = "nltk" }, { name = "omegaconf" }, { name = "openai" }, { name = "peewee" }, @@ -1182,6 +1205,8 @@ dependencies = [ { name = "questionary" }, { name = "requests" }, { name = "rich" }, + { name = "setuptools" }, + { name = "syllapy" }, { name = "toml" }, { name = "uvicorn" }, { name = "websockets" }, @@ -1296,6 +1321,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "absl-py", specifier = ">=2.1.0" }, { name = "accelerate", marker = "extra == 'trl'", specifier = ">=0.28.0" }, { name = "addict", specifier = ">=2.4.0" }, { name = "aiohttp" }, @@ -1311,6 +1337,7 @@ requires-dist = [ { name = "docstring-parser", specifier = ">=0.15" }, { name = "dspy", marker = "extra == 'dspy'", specifier = ">=3.0.0" }, { name = "e2b", marker = "extra == 'dev'" }, + { name = "emoji", specifier = ">=2.12.1" }, { name = "fastapi", specifier = ">=0.116.1" }, { name = "fireworks-ai", specifier = "==1.0.0a20" }, { name = "google-auth", marker = "extra == 'bigquery'", specifier = ">=2.0.0" }, @@ -1320,6 +1347,7 @@ requires-dist = [ { name = "haikus", marker = "extra == 'dev'", specifier = "==0.3.8" }, { name = "httpx", specifier = ">=0.24.0" }, { name = "hydra-core", specifier = ">=1.3.2" }, + { name = "immutabledict", specifier = ">=4.2.0" }, { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6.30.0" }, { name = "jupyter", marker = "extra == 'dev'", specifier = ">=1.1.1" }, { name = "klavis", marker = "extra == 'klavis'", specifier = ">=2.18.0" }, @@ -1327,6 +1355,7 @@ requires-dist = [ { name = "langchain-core", marker = "extra == 'langchain'", specifier = ">=0.3.0" }, { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.3.75" }, { name = "langchain-fireworks", marker = "extra == 'langgraph-tools'", specifier = ">=0.3.0" }, + { name = "langdetect", specifier = ">=1.0.9" }, { name = "langfuse", marker = "extra == 'langfuse'", specifier = ">=2.0.0" }, { name = "langfuse", marker = "extra == 'proxy'", specifier = ">=2.0.0" }, { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.6.7" }, @@ -1335,6 +1364,7 @@ requires-dist = [ { name = "litellm", specifier = "<1.75.0" }, { name = "loguru", specifier = ">=0.6.0" }, { name = "mcp", specifier = ">=1.9.2" }, + { name = "nltk", specifier = ">=3.8.1" }, { name = "omegaconf", specifier = ">=2.3.0" }, { name = "openai", specifier = ">=1.78.1" }, { name = "openai", marker = "extra == 'dev'", specifier = ">=1.78.1" }, @@ -1363,8 +1393,10 @@ requires-dist = [ { name = "rich", specifier = ">=12.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.5.0" }, { name = "selenium", marker = "extra == 'svgbench'", specifier = ">=4.0.0" }, + { name = "setuptools", specifier = ">=61.0" }, { name = "supabase", marker = "extra == 'supabase'", specifier = ">=2.18.1" }, { name = "swig", marker = "extra == 'box2d'" }, + { name = "syllapy", specifier = ">=0.7.2" }, { name = "syrupy", marker = "extra == 'dev'", specifier = ">=4.0.0" }, { name = "toml", specifier = ">=0.10.0" }, { name = "torch", marker = "extra == 'trl'", specifier = ">=1.9" }, @@ -2303,6 +2335,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, ] +[[package]] +name = "immutabledict" +version = "4.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/12/1da8e1a9050d0603ba65fb1796ed8860a705b906701c96e77f85cc7490be/immutabledict-4.2.2.tar.gz", hash = "sha256:cb6ed3090df593148f94cb407d218ca526fd2639694afdb553dc4f50ce6feeca", size = 6099, upload-time = "2025-10-12T13:32:59.755Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/7b/04ab6afa1ff7eb9ccb09049918c0407b205f5009092c0416147d163e4e2b/immutabledict-4.2.2-py3-none-any.whl", hash = "sha256:97c31d098a2c850e93a958badeef765e4736ed7942ec73e439facd764a3a7217", size = 4736, upload-time = "2025-10-12T13:32:58.326Z" }, +] + [[package]] name = "importlib-metadata" version = "8.7.0" @@ -3009,6 +3050,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/0d/41a51b40d24ff0384ec4f7ab8dd3dcea8353c05c973836b5e289f1465d4f/langchain_text_splitters-0.3.11-py3-none-any.whl", hash = "sha256:cf079131166a487f1372c8ab5d0bfaa6c0a4291733d9c43a34a16ac9bcd6a393", size = 33845, upload-time = "2025-08-31T23:02:57.195Z" }, ] +[[package]] +name = "langdetect" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/72/a3add0e4eec4eb9e2569554f7c70f4a3c27712f40e3284d483e88094cc0e/langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0", size = 981474, upload-time = "2021-05-07T07:54:13.562Z" } + [[package]] name = "langfuse" version = "3.2.1" @@ -3637,6 +3687,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/76/3165e84e5266d146d967a6cc784ff2fbf6ddd00985a55ec006b72bc39d5d/nh3-0.3.0-cp38-abi3-win_arm64.whl", hash = "sha256:d97d3efd61404af7e5721a0e74d81cdbfc6e5f97e11e731bb6d090e30a7b62b2", size = 585971, upload-time = "2025-07-17T14:43:35.936Z" }, ] +[[package]] +name = "nltk" +version = "3.9.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "joblib" }, + { name = "regex" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload-time = "2025-10-01T07:19:23.764Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -6368,6 +6433,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/28/82/bb4c482352bbf50e1c595ddf3ed699a9265257ca0093d0bb65f28aa52a19/swig-4.3.1-py3-none-win_amd64.whl", hash = "sha256:efec16327029f682f649a26da726bb0305be8800bd0f1fa3e81bf0769cf5b476", size = 2566912, upload-time = "2025-04-19T19:50:57.849Z" }, ] +[[package]] +name = "syllapy" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/a1/7bc1ce4852e14ab5f3153262639742ae63fbfa626507dac4ab919a1e5232/syllapy-0.7.2.tar.gz", hash = "sha256:e55a7ad97d8b232e174b83f91b8f9be0c355d2a8e1208c7f6229055189605564", size = 25561, upload-time = "2022-08-29T01:55:03.366Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/cc/ffc9bddc146f14e8792a9b05b2bd1bc5f23f3b752a06e96b244780ce55b9/syllapy-0.7.2-py3-none-any.whl", hash = "sha256:198a7413033c32d7b31e21962efb3f284bcea80d3346e954b938ca1ebe6bee20", size = 24882, upload-time = "2022-08-29T01:55:01.1Z" }, +] + [[package]] name = "sympy" version = "1.14.0"