From fd0702b067ac2272faed044b9c2d1f4dce791c09 Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Thu, 11 Jun 2026 11:33:28 +0100
Subject: [PATCH 1/3] Add support for model aliases and corresponding test
 coverage

---
 app/evaluation.py       | 15 ++++++++++++---
 app/evaluation_tests.py | 18 +++++++++++++++---
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/app/evaluation.py b/app/evaluation.py
index 4b84368..73a80ce 100755
--- a/app/evaluation.py
+++ b/app/evaluation.py
@@ -5,6 +5,13 @@
 
 load_dotenv()
 
+MODEL_ALIASES = {
+    "small":     "gpt-4o-mini",
+    "medium":    "gpt-4o",
+    "large":     "gpt-4.1",
+    "reasoning": "o4-mini",
+}
+
 # A basic way to call ChatGPT from the Lambda Feedback platform
 
 
@@ -49,6 +56,8 @@ def evaluation_function(response, answer, parameters):
 
     openai.api_key = os.environ.get("OPENAI_API_KEY")
 
+    model = MODEL_ALIASES.get(parameters['model'], parameters['model'])
+
     question = parameters.get("question")
     moderator_prompt = parameters.get(
         "moderator_prompt",
@@ -69,7 +78,7 @@ def evaluation_function(response, answer, parameters):
 
     # Call openAI API for moderation
     moderation_boolean = openai.ChatCompletion.create(
-        model=parameters['model'],
+        model=model,
         messages=[{"role": "system", "content": moderator_prompt},
                   {"role": "user", "content": response}])
 
@@ -81,7 +90,7 @@ def evaluation_function(response, answer, parameters):
 
     # Call openAI API for boolean
     completion_boolean = openai.ChatCompletion.create(
-        model=parameters['model'],
+        model=model,
         messages=[
             {"role": "system", "content": main_prompt + " " + default_prompt}])
 
@@ -94,7 +103,7 @@ def evaluation_function(response, answer, parameters):
     # Check if feedback prompt is empty or not. Only populates feedback in 'output' if there is a 'feedback_prompt'.
     if parameters['feedback_prompt'].strip():
         completion_feedback = openai.ChatCompletion.create(
-            model=parameters['model'],
+            model=model,
             messages=[{"role": "system", "content": " The student response has been judged as " +
                        is_correct_str + main_prompt + " " + feedback_prompt + "# Reminder: the student response is "+is_correct_str}])
 
diff --git a/app/evaluation_tests.py b/app/evaluation_tests.py
index 5596d37..164612b 100755
--- a/app/evaluation_tests.py
+++ b/app/evaluation_tests.py
@@ -6,11 +6,11 @@
 load_dotenv()
 
 try:
-    from .evaluation import evaluation_function
+    from .evaluation import evaluation_function, MODEL_ALIASES
 except ImportError:
-    from evaluation import evaluation_function
+    from evaluation import evaluation_function, MODEL_ALIASES
 
-model = 'gpt-4o-mini'
+model = 'small'
 
 default_prompt = "Output a Boolean: True if the student is correct and False if the student is incorrect. Be reasonable."
 feedback_prompt = "Give objective and constructive feedback. Don't give the correct answer away. Short answer # Student reponse: {{response}}. # Closing remark: Keep it short."
@@ -90,5 +90,17 @@ def test_physics_definition(self):
         self.assertEqual(output["is_correct"], True)
 
 
+class TestModelAliases(unittest.TestCase):
+
+    def test_all_aliases_defined(self):
+        for name in ('small', 'medium', 'large', 'reasoning'):
+            self.assertIn(name, MODEL_ALIASES)
+            self.assertTrue(MODEL_ALIASES[name])
+
+    def test_raw_model_string_passthrough(self):
+        raw = 'gpt-4o-mini'
+        self.assertEqual(MODEL_ALIASES.get(raw, raw), raw)
+
+
 if __name__ == "__main__":
     unittest.main()

From 21fab651b52630b8d470b01976b2bcc8e1d2db7b Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Thu, 11 Jun 2026 13:47:43 +0100
Subject: [PATCH 2/3] Update documentation to introduce model aliases with
 usage examples

---
 app/docs/dev.md  | 13 ++++++++++---
 app/docs/user.md |  9 ++++++++-
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/app/docs/dev.md b/app/docs/dev.md
index 7e079b4..df6f8cd 100644
--- a/app/docs/dev.md
+++ b/app/docs/dev.md
@@ -19,7 +19,14 @@ To successfully run this function, ensure you set your OpenAI API key. The code
 
 1. **model**:
    - Defines the AI model used for evaluation.
-   - Accepts any OpenAI model string (e.g. `gpt-4o-mini`, `gpt-4o`). Recommended: `gpt-4o-mini`.
+   - Accepts a simple alias (`small`, `medium`, `large`, `reasoning`) or any raw OpenAI model string (e.g. `gpt-4o-mini`).
+
+   | Alias | Model |
+   |---|---|
+   | `small` | `gpt-4o-mini` |
+   | `medium` | `gpt-4o` |
+   | `large` | `gpt-4.1` |
+   | `reasoning` | `o4-mini` |
 
 2. **question** *(optional)*:
    - The text of the question being answered by the student.
@@ -61,7 +68,7 @@ Note that an input of a variable called `answer` is also required. This can be a
 
 ```python
 parameters = {
-    'model': 'gpt-4o-mini',
+    'model': 'small',
     'question': 'What is photosynthesis?',
     'main_prompt': "The question asked was: {{question}}. The correct answer is: {{answer}}. Evaluate the student's response: {{response}}.",
     'default_prompt': "Output a Boolean: True if the student is correct and False if they are incorrect.",
@@ -88,7 +95,7 @@ The function returns a dictionary with the following structure:
 
 ```python
 parameters = {
-    'model': 'gpt-4o-mini',
+    'model': 'small',
     'main_prompt': "Analyze the student's response about the capital of France. The correct answer is {{answer}}.",
     'default_prompt': "Output a Boolean: True if the student is correct and False if they are incorrect.",
     'feedback_prompt': "You are an AI tutor. Offer constructive feedback."
diff --git a/app/docs/user.md b/app/docs/user.md
index 8417b77..ae1c3f6 100644
--- a/app/docs/user.md
+++ b/app/docs/user.md
@@ -5,7 +5,14 @@ This chatGPT evaluation function is designed to automatically evaluate student r
 
 ## What does the teacher need to input?
 - `model`
-    - Suggest (July 2025), `gpt-4o-mini` or `gpt-4.1-mini`.
+    - Use a simple alias: `small`, `medium`, `large`, or `reasoning`. You can also pass any raw OpenAI model string directly (e.g. `gpt-4o-mini`).
+
+    | Alias | Model | When to use |
+    |---|---|---|
+    | `small` | `gpt-4o-mini` | Fast and cheap; good for most questions |
+    | `medium` | `gpt-4o` | Better reasoning; use for nuanced marking |
+    | `large` | `gpt-4.1` | Most capable; use for complex evaluation |
+    | `reasoning` | `o4-mini` | Structured reasoning; use for multi-step problems |
 
 - `question` [optional]
     - The text of the question being answered. Set this if you want to reference the question wording inside your prompts using `{{question}}`.

From 55e1996e10ce7e5a059a812f324dce4d852633ba Mon Sep 17 00:00:00 2001
From: Marcus Messer <marcus@marcusm.co.uk>
Date: Thu, 11 Jun 2026 15:33:47 +0100
Subject: [PATCH 3/3] Introduce support for overriding model aliases via
 parameters, update docs, and adjust tests accordingly

---
 app/docs/dev.md         | 15 ++++++++-------
 app/docs/user.md        |  4 +++-
 app/evaluation.py       | 19 +++++++++++--------
 app/evaluation_tests.py | 19 +++++++++++--------
 4 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/app/docs/dev.md b/app/docs/dev.md
index df6f8cd..be69755 100644
--- a/app/docs/dev.md
+++ b/app/docs/dev.md
@@ -20,13 +20,14 @@ To successfully run this function, ensure you set your OpenAI API key. The code
 1. **model**:
    - Defines the AI model used for evaluation.
    - Accepts a simple alias (`small`, `medium`, `large`, `reasoning`) or any raw OpenAI model string (e.g. `gpt-4o-mini`).
-
-   | Alias | Model |
-   |---|---|
-   | `small` | `gpt-4o-mini` |
-   | `medium` | `gpt-4o` |
-   | `large` | `gpt-4.1` |
-   | `reasoning` | `o4-mini` |
+   - Alias targets have defaults but can be overridden per-call via `small_model`, `medium_model`, `large_model`, and `reasoning_model` parameters.
+
+   | Alias | Default model | Override parameter |
+   |---|---|---|
+   | `small` | `gpt-4o-mini` | `small_model` |
+   | `medium` | `gpt-4o` | `medium_model` |
+   | `large` | `gpt-4.1` | `large_model` |
+   | `reasoning` | `o4-mini` | `reasoning_model` |
 
 2. **question** *(optional)*:
    - The text of the question being answered by the student.
diff --git a/app/docs/user.md b/app/docs/user.md
index ae1c3f6..a257fc8 100644
--- a/app/docs/user.md
+++ b/app/docs/user.md
@@ -7,13 +7,15 @@ This chatGPT evaluation function is designed to automatically evaluate student r
 - `model`
     - Use a simple alias: `small`, `medium`, `large`, or `reasoning`. You can also pass any raw OpenAI model string directly (e.g. `gpt-4o-mini`).
 
-    | Alias | Model | When to use |
+    | Alias | Default model | When to use |
     |---|---|---|
     | `small` | `gpt-4o-mini` | Fast and cheap; good for most questions |
     | `medium` | `gpt-4o` | Better reasoning; use for nuanced marking |
     | `large` | `gpt-4.1` | Most capable; use for complex evaluation |
     | `reasoning` | `o4-mini` | Structured reasoning; use for multi-step problems |
 
+    To override a default, add the corresponding parameter: `small_model`, `medium_model`, `large_model`, or `reasoning_model`.
+
 - `question` [optional]
     - The text of the question being answered. Set this if you want to reference the question wording inside your prompts using `{{question}}`.
 
diff --git a/app/evaluation.py b/app/evaluation.py
index 73a80ce..589a6f5 100755
--- a/app/evaluation.py
+++ b/app/evaluation.py
@@ -5,16 +5,19 @@
 
 load_dotenv()
 
-MODEL_ALIASES = {
-    "small":     "gpt-4o-mini",
-    "medium":    "gpt-4o",
-    "large":     "gpt-4.1",
-    "reasoning": "o4-mini",
-}
-
 # A basic way to call ChatGPT from the Lambda Feedback platform
 
 
+def resolve_model(model_str, parameters):
+    aliases = {
+        "small":     parameters.get("small_model",     "gpt-4o-mini"),
+        "medium":    parameters.get("medium_model",    "gpt-4o"),
+        "large":     parameters.get("large_model",     "gpt-4.1"),
+        "reasoning": parameters.get("reasoning_model", "o4-mini"),
+    }
+    return aliases.get(model_str, model_str)
+
+
 def process_prompt(prompt, question, response, answer):
     prompt = prompt.replace("{{answer}}", str(answer))
     prompt = prompt.replace("{{question}}", str(question) or "")
@@ -56,7 +59,7 @@ def evaluation_function(response, answer, parameters):
 
     openai.api_key = os.environ.get("OPENAI_API_KEY")
 
-    model = MODEL_ALIASES.get(parameters['model'], parameters['model'])
+    model = resolve_model(parameters['model'], parameters)
 
     question = parameters.get("question")
     moderator_prompt = parameters.get(
diff --git a/app/evaluation_tests.py b/app/evaluation_tests.py
index 164612b..e8de7a4 100755
--- a/app/evaluation_tests.py
+++ b/app/evaluation_tests.py
@@ -6,9 +6,9 @@
 load_dotenv()
 
 try:
-    from .evaluation import evaluation_function, MODEL_ALIASES
+    from .evaluation import evaluation_function, resolve_model
 except ImportError:
-    from evaluation import evaluation_function, MODEL_ALIASES
+    from evaluation import evaluation_function, resolve_model
 
 model = 'small'
 
@@ -92,14 +92,17 @@ def test_physics_definition(self):
 
 class TestModelAliases(unittest.TestCase):
 
-    def test_all_aliases_defined(self):
-        for name in ('small', 'medium', 'large', 'reasoning'):
-            self.assertIn(name, MODEL_ALIASES)
-            self.assertTrue(MODEL_ALIASES[name])
+    def test_default_aliases(self):
+        self.assertEqual(resolve_model('small',     {}), 'gpt-4o-mini')
+        self.assertEqual(resolve_model('medium',    {}), 'gpt-4o')
+        self.assertEqual(resolve_model('large',     {}), 'gpt-4.1')
+        self.assertEqual(resolve_model('reasoning', {}), 'o4-mini')
+
+    def test_alias_override_via_parameters(self):
+        self.assertEqual(resolve_model('small', {'small_model': 'gpt-4.1-nano'}), 'gpt-4.1-nano')
 
     def test_raw_model_string_passthrough(self):
-        raw = 'gpt-4o-mini'
-        self.assertEqual(MODEL_ALIASES.get(raw, raw), raw)
+        self.assertEqual(resolve_model('gpt-4o-mini', {}), 'gpt-4o-mini')
 
 
 if __name__ == "__main__":