lambda-feedback · m-messer · Jun 12, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/.github/workflows/production-deploy.yml b/.github/workflows/production-deploy.yml
@@ -37,8 +37,6 @@ jobs:
     uses: lambda-feedback/evaluation-function-workflows/.github/workflows/deploy.yml@main
     with:
       template-repository-name: 'lambda-feedback/evaluation-function-boilerplate-python'
-      build-file: "app/Dockerfile"
-      build-context: "./app"
       environment: "production"
       version-bump: ${{ inputs.version-bump }}
       branch: ${{ inputs.branch }}

diff --git a/.github/workflows/staging-deploy.yml b/.github/workflows/staging-deploy.yml
@@ -55,8 +55,6 @@ jobs:
     uses: lambda-feedback/evaluation-function-workflows/.github/workflows/deploy.yml@main
     with:
       template-repository-name: "lambda-feedback/evaluation-function-boilerplate-python"
-      build-file: "app/Dockerfile"
-      build-context: "./app"
       build-platforms: "aws"
       environment: "staging"
       lfs: false

diff --git a/README.md b/README.md
@@ -4,6 +4,9 @@ This repository contains the code for an evaluation function that compares two s
 
 This evaluation function is written in Python and uses [SymPy](https://www.sympy.org/en/index.html) in order to evaluate the set expressions. As SymPy does not support parsing set expressions, the function uses a custom parser based on [Lark](https://lark-parser.readthedocs.io/en/latest/). The parser is able to recognize set expressions written in both [asciimath](https://asciimath.org/) or [LaTeX](https://www.latex-project.org/) and convert them into SymPy expressions.
 
+## Deployment
+[![Create Release Request](https://img.shields.io/badge/Create%20Release%20Request-blue?style=for-the-badge)](https://github.com/lambda-feedback/compareSets/issues/new?template=release-request.yml)
+
 ## Repository Structure
 
 ```bash

diff --git a/evaluation_function/evaluation.py b/evaluation_function/evaluation.py
@@ -1,10 +1,14 @@
+import logging
 from typing import Any
 from sympy import simplify_logic, Equivalent
 from lf_toolkit.evaluation import Result, Params
 from lf_toolkit.parse.set import SetParser, LatexPrinter, SymPyBooleanTransformer, ASCIIPrinter
 
 from .parse import parse_with_feedback, FeedbackException
 
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG, format="%(levelname)s [%(name)s] %(message)s")
+
 def evaluation_function(
     response: Any,
     answer: Any,
@@ -33,36 +37,58 @@ def evaluation_function(
     to output the evaluation response.
     """
 
+    logger.debug("evaluation_function called")
+    logger.debug("response type=%s value=%r", type(response).__name__, response)
+    logger.debug("answer   type=%s value=%r", type(answer).__name__, answer)
+    logger.debug("params   value=%r", params)
+
     parser = SetParser.instance()
     sympyTransformer = SymPyBooleanTransformer()
 
     # here we want to compare the response set with the example solution set.
     # we have to do the following steps
 
     try:
+        is_latex = params.get("is_latex", False)
+        logger.debug("is_latex=%r", is_latex)
+
         # 1. convert the `response`, which may be a latex string, to a sympy expression
-        responseSet = parse_with_feedback(response, latex=params.get("is_latex", False))
+        logger.debug("parsing response...")
+        responseSet = parse_with_feedback(response, latex=is_latex)
+        logger.debug("responseSet=%r", responseSet)
         responseSetSympy = sympyTransformer.transform(responseSet)
+        logger.debug("responseSetSympy=%r", responseSetSympy)
 
         # 2. convert the `answer`, which may be a latex string, to a sympy expression
         # TODO: what if answer is also in latex? how do we know?
-        answerSet = parser.parse(answer, latex=False)
+        logger.debug("parsing answer...")
+        try:
+            answerSet = parser.parse(answer, latex=False)
+        except Exception as e:
+            logger.error("failed to parse answer: type=%s value=%r error=%r", type(answer).__name__, answer, e)
+            raise FeedbackException() from e
+        logger.debug("answerSet=%r", answerSet)
         answerSetSympy = sympyTransformer.transform(answerSet)
+        logger.debug("answerSetSympy=%r", answerSetSympy)
 
         # 3. compare the two sympy expressions w/ simplification enabled.
         #    If they are equal, the sets produced by the two expressions are
         #    semantically equal. However, the expressions may not be equal.
         semantic_equal = simplify_logic(Equivalent(responseSetSympy, answerSetSympy)) == True
+        logger.debug("semantic_equal=%r", semantic_equal)
 
         # 4. compare the two sympy expressions w/ simplifaction disabled.
         #    If they are equal, the expressions are also equal in syntax.
         #    This respects laws of commutativity, e.g. A u B == B u A.
         syntactic_equal = responseSetSympy == answerSetSympy
+        logger.debug("syntactic_equal=%r", syntactic_equal)
 
         enforce_expression_equality = params.get("enforce_expression_equality", False)
+        logger.debug("enforce_expression_equality=%r", enforce_expression_equality)
 
         # 5. `is_correct` is True, iff 3) is True, and either 4) or `enforce_expression_equality` is True
         is_correct = semantic_equal and (syntactic_equal or not enforce_expression_equality)
+        logger.debug("is_correct=%r", is_correct)
 
         feedback_items=[]
 
@@ -84,6 +110,7 @@ def evaluation_function(
             feedback_items=feedback_items,
         )
     except FeedbackException as e:
+        logger.error("FeedbackException: %r", e)
         return Result(
             is_correct=False,
             feedback_items=[("parse_error", str(e))]