Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/production-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ jobs:
uses: lambda-feedback/evaluation-function-workflows/.github/workflows/deploy.yml@main
with:
template-repository-name: 'lambda-feedback/evaluation-function-boilerplate-python'
build-file: "app/Dockerfile"
build-context: "./app"
environment: "production"
version-bump: ${{ inputs.version-bump }}
branch: ${{ inputs.branch }}
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/staging-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ jobs:
uses: lambda-feedback/evaluation-function-workflows/.github/workflows/deploy.yml@main
with:
template-repository-name: "lambda-feedback/evaluation-function-boilerplate-python"
build-file: "app/Dockerfile"
build-context: "./app"
build-platforms: "aws"
environment: "staging"
lfs: false
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ This repository contains the code for an evaluation function that compares two s

This evaluation function is written in Python and uses [SymPy](https://www.sympy.org/en/index.html) in order to evaluate the set expressions. As SymPy does not support parsing set expressions, the function uses a custom parser based on [Lark](https://lark-parser.readthedocs.io/en/latest/). The parser is able to recognize set expressions written in both [asciimath](https://asciimath.org/) or [LaTeX](https://www.latex-project.org/) and convert them into SymPy expressions.

## Deployment
[![Create Release Request](https://img.shields.io/badge/Create%20Release%20Request-blue?style=for-the-badge)](https://github.com/lambda-feedback/compareSets/issues/new?template=release-request.yml)

## Repository Structure

```bash
Expand Down
31 changes: 29 additions & 2 deletions evaluation_function/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import logging
from typing import Any
from sympy import simplify_logic, Equivalent
from lf_toolkit.evaluation import Result, Params
from lf_toolkit.parse.set import SetParser, LatexPrinter, SymPyBooleanTransformer, ASCIIPrinter

from .parse import parse_with_feedback, FeedbackException

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG, format="%(levelname)s [%(name)s] %(message)s")

def evaluation_function(
response: Any,
answer: Any,
Expand Down Expand Up @@ -33,36 +37,58 @@ def evaluation_function(
to output the evaluation response.
"""

logger.debug("evaluation_function called")
logger.debug("response type=%s value=%r", type(response).__name__, response)
logger.debug("answer type=%s value=%r", type(answer).__name__, answer)
logger.debug("params value=%r", params)

parser = SetParser.instance()
sympyTransformer = SymPyBooleanTransformer()

# here we want to compare the response set with the example solution set.
# we have to do the following steps

try:
is_latex = params.get("is_latex", False)
logger.debug("is_latex=%r", is_latex)

# 1. convert the `response`, which may be a latex string, to a sympy expression
responseSet = parse_with_feedback(response, latex=params.get("is_latex", False))
logger.debug("parsing response...")
responseSet = parse_with_feedback(response, latex=is_latex)
logger.debug("responseSet=%r", responseSet)
responseSetSympy = sympyTransformer.transform(responseSet)
logger.debug("responseSetSympy=%r", responseSetSympy)

# 2. convert the `answer`, which may be a latex string, to a sympy expression
# TODO: what if answer is also in latex? how do we know?
answerSet = parser.parse(answer, latex=False)
logger.debug("parsing answer...")
try:
answerSet = parser.parse(answer, latex=False)
except Exception as e:
logger.error("failed to parse answer: type=%s value=%r error=%r", type(answer).__name__, answer, e)
raise FeedbackException() from e
logger.debug("answerSet=%r", answerSet)
answerSetSympy = sympyTransformer.transform(answerSet)
logger.debug("answerSetSympy=%r", answerSetSympy)

# 3. compare the two sympy expressions w/ simplification enabled.
# If they are equal, the sets produced by the two expressions are
# semantically equal. However, the expressions may not be equal.
semantic_equal = simplify_logic(Equivalent(responseSetSympy, answerSetSympy)) == True
logger.debug("semantic_equal=%r", semantic_equal)

# 4. compare the two sympy expressions w/ simplifaction disabled.
# If they are equal, the expressions are also equal in syntax.
# This respects laws of commutativity, e.g. A u B == B u A.
syntactic_equal = responseSetSympy == answerSetSympy
logger.debug("syntactic_equal=%r", syntactic_equal)

enforce_expression_equality = params.get("enforce_expression_equality", False)
logger.debug("enforce_expression_equality=%r", enforce_expression_equality)

# 5. `is_correct` is True, iff 3) is True, and either 4) or `enforce_expression_equality` is True
is_correct = semantic_equal and (syntactic_equal or not enforce_expression_equality)
logger.debug("is_correct=%r", is_correct)

feedback_items=[]

Expand All @@ -84,6 +110,7 @@ def evaluation_function(
feedback_items=feedback_items,
)
except FeedbackException as e:
logger.error("FeedbackException: %r", e)
return Result(
is_correct=False,
feedback_items=[("parse_error", str(e))]
Expand Down
Loading
Loading