lambda-feedback · m-messer · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/app/docs/dev.md b/app/docs/dev.md
@@ -19,7 +19,15 @@ To successfully run this function, ensure you set your OpenAI API key. The code
 
 1. **model**:
    - Defines the AI model used for evaluation.
-   - Accepts any OpenAI model string (e.g. `gpt-4o-mini`, `gpt-4o`). Recommended: `gpt-4o-mini`.
+   - Accepts a simple alias (`small`, `medium`, `large`, `reasoning`) or any raw OpenAI model string (e.g. `gpt-4o-mini`).
+   - Alias targets have defaults but can be overridden per-call via `small_model`, `medium_model`, `large_model`, and `reasoning_model` parameters.
+
+   | Alias | Default model | Override parameter |
+   |---|---|---|
+   | `small` | `gpt-4o-mini` | `small_model` |
+   | `medium` | `gpt-4o` | `medium_model` |
+   | `large` | `gpt-4.1` | `large_model` |
+   | `reasoning` | `o4-mini` | `reasoning_model` |
 
 2. **question** *(optional)*:
    - The text of the question being answered by the student.
@@ -61,7 +69,7 @@ Note that an input of a variable called `answer` is also required. This can be a
 
 ```python
 parameters = {
-    'model': 'gpt-4o-mini',
+    'model': 'small',
     'question': 'What is photosynthesis?',
     'main_prompt': "The question asked was: {{question}}. The correct answer is: {{answer}}. Evaluate the student's response: {{response}}.",
     'default_prompt': "Output a Boolean: True if the student is correct and False if they are incorrect.",
@@ -88,7 +96,7 @@ The function returns a dictionary with the following structure:
 
 ```python
 parameters = {
-    'model': 'gpt-4o-mini',
+    'model': 'small',
     'main_prompt': "Analyze the student's response about the capital of France. The correct answer is {{answer}}.",
     'default_prompt': "Output a Boolean: True if the student is correct and False if they are incorrect.",
     'feedback_prompt': "You are an AI tutor. Offer constructive feedback."

diff --git a/app/docs/user.md b/app/docs/user.md
@@ -5,7 +5,16 @@ This chatGPT evaluation function is designed to automatically evaluate student r
 
 ## What does the teacher need to input?
 - `model`
-    - Suggest (July 2025), `gpt-4o-mini` or `gpt-4.1-mini`.
+    - Use a simple alias: `small`, `medium`, `large`, or `reasoning`. You can also pass any raw OpenAI model string directly (e.g. `gpt-4o-mini`).
+
+    | Alias | Default model | When to use |
+    |---|---|---|
+    | `small` | `gpt-4o-mini` | Fast and cheap; good for most questions |
+    | `medium` | `gpt-4o` | Better reasoning; use for nuanced marking |
+    | `large` | `gpt-4.1` | Most capable; use for complex evaluation |
+    | `reasoning` | `o4-mini` | Structured reasoning; use for multi-step problems |
+
+    To override a default, add the corresponding parameter: `small_model`, `medium_model`, `large_model`, or `reasoning_model`.
 
 - `question` [optional]
     - The text of the question being answered. Set this if you want to reference the question wording inside your prompts using `{{question}}`.

diff --git a/app/evaluation.py b/app/evaluation.py
@@ -8,6 +8,16 @@
 # A basic way to call ChatGPT from the Lambda Feedback platform
 
 
+def resolve_model(model_str, parameters):
+    aliases = {
+        "small":     parameters.get("small_model",     "gpt-4o-mini"),
+        "medium":    parameters.get("medium_model",    "gpt-4o"),
+        "large":     parameters.get("large_model",     "gpt-4.1"),
+        "reasoning": parameters.get("reasoning_model", "o4-mini"),
+    }
+    return aliases.get(model_str, model_str)
+
+
 def process_prompt(prompt, question, response, answer):
     prompt = prompt.replace("{{answer}}", str(answer))
     prompt = prompt.replace("{{question}}", str(question) or "")
@@ -49,6 +59,8 @@ def evaluation_function(response, answer, parameters):
 
     openai.api_key = os.environ.get("OPENAI_API_KEY")
 
+    model = resolve_model(parameters['model'], parameters)
+
     question = parameters.get("question")
     moderator_prompt = parameters.get(
         "moderator_prompt",
@@ -69,7 +81,7 @@ def evaluation_function(response, answer, parameters):
 
     # Call openAI API for moderation
     moderation_boolean = openai.ChatCompletion.create(
-        model=parameters['model'],
+        model=model,
         messages=[{"role": "system", "content": moderator_prompt},
                   {"role": "user", "content": response}])
 
@@ -81,7 +93,7 @@ def evaluation_function(response, answer, parameters):
 
     # Call openAI API for boolean
     completion_boolean = openai.ChatCompletion.create(
-        model=parameters['model'],
+        model=model,
         messages=[
             {"role": "system", "content": main_prompt + " " + default_prompt}])
 
@@ -94,7 +106,7 @@ def evaluation_function(response, answer, parameters):
     # Check if feedback prompt is empty or not. Only populates feedback in 'output' if there is a 'feedback_prompt'.
     if parameters['feedback_prompt'].strip():
         completion_feedback = openai.ChatCompletion.create(
-            model=parameters['model'],
+            model=model,
             messages=[{"role": "system", "content": " The student response has been judged as " +
                        is_correct_str + main_prompt + " " + feedback_prompt + "# Reminder: the student response is "+is_correct_str}])
 

diff --git a/app/evaluation_tests.py b/app/evaluation_tests.py
@@ -6,11 +6,11 @@
 load_dotenv()
 
 try:
-    from .evaluation import evaluation_function
+    from .evaluation import evaluation_function, resolve_model
 except ImportError:
-    from evaluation import evaluation_function
+    from evaluation import evaluation_function, resolve_model
 
-model = 'gpt-4o-mini'
+model = 'small'
 
 default_prompt = "Output a Boolean: True if the student is correct and False if the student is incorrect. Be reasonable."
 feedback_prompt = "Give objective and constructive feedback. Don't give the correct answer away. Short answer # Student reponse: {{response}}. # Closing remark: Keep it short."
@@ -90,5 +90,20 @@ def test_physics_definition(self):
         self.assertEqual(output["is_correct"], True)
 
 
+class TestModelAliases(unittest.TestCase):
+
+    def test_default_aliases(self):
+        self.assertEqual(resolve_model('small',     {}), 'gpt-4o-mini')
+        self.assertEqual(resolve_model('medium',    {}), 'gpt-4o')
+        self.assertEqual(resolve_model('large',     {}), 'gpt-4.1')
+        self.assertEqual(resolve_model('reasoning', {}), 'o4-mini')
+
+    def test_alias_override_via_parameters(self):
+        self.assertEqual(resolve_model('small', {'small_model': 'gpt-4.1-nano'}), 'gpt-4.1-nano')
+
+    def test_raw_model_string_passthrough(self):
+        self.assertEqual(resolve_model('gpt-4o-mini', {}), 'gpt-4o-mini')
+
+
 if __name__ == "__main__":
     unittest.main()