Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions bayesian_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,20 @@
from bayesian_agent.core.context import SkillContextBuilder
from bayesian_agent.core.evidence import TrajectoryEvidence
from bayesian_agent.core.policy import RewritePolicy
from bayesian_agent.core.ranking import RankingStrategy, get_strategy
from bayesian_agent.core.registry import BayesianSkillRegistry
from bayesian_agent.core.standards import DEFAULT_AGENTIC_STANDARDS, WorkflowStandard, evaluate_standards

__all__ = [
"BayesianSkillRegistry",
"RewriteDecision",
"DEFAULT_AGENTIC_STANDARDS",
"RankingStrategy",
"RewritePolicy",
"SkillBelief",
"SkillContextBuilder",
"TrajectoryEvidence",
"WorkflowStandard",
"evaluate_standards",
"get_strategy",
]
3 changes: 2 additions & 1 deletion bayesian_agent/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@

from bayesian_agent.adapters.base import AgentAdapter
from bayesian_agent.adapters.generic_agent import GenericAgentAdapter
from bayesian_agent.adapters.workflow_log import evidence_from_jsonl, workflow_record_to_evidence

__all__ = ["AgentAdapter", "GenericAgentAdapter"]
__all__ = ["AgentAdapter", "GenericAgentAdapter", "evidence_from_jsonl", "workflow_record_to_evidence"]
65 changes: 65 additions & 0 deletions bayesian_agent/adapters/workflow_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Utilities for converting generic assistant workflow logs into trajectory evidence."""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Iterable, Iterator, Mapping, Optional

from bayesian_agent.core.evidence import TrajectoryEvidence


SUCCESS_VALUES = {"success", "succeeded", "ok", "passed", "complete", "completed", True}


def workflow_record_to_evidence(
record: Mapping[str, Any],
*,
default_skill_id: str = "workflow/default",
default_context: str = "workflow",
) -> TrajectoryEvidence:
"""Convert an OpenClaw/Hermes-like workflow record into `TrajectoryEvidence`.

The function intentionally accepts several common field names so external
harnesses can integrate without adopting Bayesian-Agent internals first.
"""

task_id = str(record.get("task_id") or record.get("id") or record.get("run_id") or "")
skill_id = str(record.get("skill_id") or record.get("sop_id") or record.get("workflow_id") or default_skill_id)
context = str(record.get("context") or record.get("task_family") or record.get("workflow") or default_context)
raw_outcome = record.get("outcome", record.get("status", record.get("success")))
outcome = "success" if raw_outcome in SUCCESS_VALUES else "failure"
return TrajectoryEvidence(
task_id=task_id,
skill_id=skill_id,
context=context,
outcome=outcome,
input_tokens=int(record.get("input_tokens") or record.get("prompt_tokens") or 0),
output_tokens=int(record.get("output_tokens") or record.get("completion_tokens") or 0),
total_tokens=int(record.get("total_tokens") or 0),
turns=int(record.get("turns") or record.get("steps") or 0),
elapsed_seconds=float(record.get("elapsed_seconds") or record.get("duration_seconds") or 0.0),
failure_mode=str(record.get("failure_mode") or record.get("error_type") or record.get("error") or ""),
summary=str(record.get("summary") or record.get("title") or task_id),
metadata={k: v for k, v in record.items() if k not in {"transcript", "messages"}},
)


def iter_jsonl(path: str | Path) -> Iterator[Mapping[str, Any]]:
"""Yield JSON objects from a JSONL file, skipping blank lines."""

for line in Path(path).read_text(encoding="utf-8").splitlines():
if line.strip():
yield json.loads(line)


def evidence_from_jsonl(
path: str | Path,
*,
default_skill_id: str = "workflow/default",
default_context: str = "workflow",
) -> Iterable[TrajectoryEvidence]:
"""Read assistant workflow records from JSONL and yield trajectory evidence."""

for record in iter_jsonl(path):
yield workflow_record_to_evidence(record, default_skill_id=default_skill_id, default_context=default_context)
32 changes: 31 additions & 1 deletion bayesian_agent/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from bayesian_agent.core.context import SkillContextBuilder
from bayesian_agent.core.evidence import TrajectoryEvidence
from bayesian_agent.core.registry import BayesianSkillRegistry
from bayesian_agent.core.repair import failed_task_ids, normalize_results, summarize, summarize_incremental_lift
from bayesian_agent.core.repair import failed_task_ids, normalize_results, repair_report, summarize, summarize_incremental_lift
from bayesian_agent.adapters.workflow_log import evidence_from_jsonl


def _read_json(path: str) -> Mapping[str, Any]:
Expand Down Expand Up @@ -44,6 +45,14 @@ def build_parser() -> argparse.ArgumentParser:
evolve.add_argument("--registry", required=True, help="Output registry JSON path.")
evolve.add_argument("--context-out", default="", help="Optional rendered Skill context path.")

evolve_log = sub.add_parser("evolve-workflow-log", help="Update a registry from generic assistant workflow JSONL records.")
evolve_log.add_argument("--jsonl", action="append", required=True, help="Path to a workflow JSONL file.")
evolve_log.add_argument("--registry", required=True, help="Output registry JSON path.")
evolve_log.add_argument("--context-out", default="", help="Optional rendered Skill context path.")
evolve_log.add_argument("--default-skill-id", default="workflow/default")
evolve_log.add_argument("--default-context", default="workflow")
evolve_log.add_argument("--strategy", default="exploit", help="Context ranking strategy.")

summarize_cmd = sub.add_parser("summarize", help="Summarize a results JSON file.")
summarize_cmd.add_argument("--results", required=True)
summarize_cmd.add_argument("--out", required=True)
Expand All @@ -52,6 +61,10 @@ def build_parser() -> argparse.ArgumentParser:
repair.add_argument("--baseline", required=True)
repair.add_argument("--out", required=True)

repair_report_cmd = sub.add_parser("repair-report", help="Summarize failed task ids and failure-mode clusters.")
repair_report_cmd.add_argument("--baseline", required=True)
repair_report_cmd.add_argument("--out", required=True)

lift = sub.add_parser("incremental-summary", help="Summarize baseline plus repair traces.")
lift.add_argument("--baseline", required=True)
lift.add_argument("--repairs", required=True)
Expand All @@ -70,13 +83,30 @@ def main(argv: Sequence[str] = None) -> int:
if args.context_out:
Path(args.context_out).write_text(SkillContextBuilder(registry).render(), encoding="utf-8")
return 0
if args.command == "evolve-workflow-log":
registry = BayesianSkillRegistry(args.registry)
for jsonl_path in args.jsonl:
registry.record_many(
evidence_from_jsonl(
jsonl_path,
default_skill_id=args.default_skill_id,
default_context=args.default_context,
)
)
registry.save()
if args.context_out:
Path(args.context_out).write_text(SkillContextBuilder(registry).render(strategy=args.strategy), encoding="utf-8")
return 0
if args.command == "summarize":
_write_json(args.out, summarize(normalize_results(_read_json(args.results))))
return 0
if args.command == "repair-plan":
failures = {k: sorted(v) for k, v in failed_task_ids(normalize_results(_read_json(args.baseline))).items()}
_write_json(args.out, failures)
return 0
if args.command == "repair-report":
_write_json(args.out, repair_report(normalize_results(_read_json(args.baseline))))
return 0
if args.command == "incremental-summary":
baseline = normalize_results(_read_json(args.baseline))
repairs = normalize_results(_read_json(args.repairs))
Expand Down
17 changes: 17 additions & 0 deletions bayesian_agent/core/belief.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,21 @@ def success_probability(self) -> float:
denom = self.alpha + self.beta
return self.alpha / denom if denom else 0.0

@property
def posterior_variance(self) -> float:
"""Variance of the Beta posterior over Skill success probability."""

denom = self.alpha + self.beta
if denom <= 0:
return 0.0
return (self.alpha * self.beta) / ((denom**2) * (denom + 1.0))

@property
def posterior_std(self) -> float:
"""Standard deviation of the Beta posterior."""

return self.posterior_variance**0.5

def update(self, event: TrajectoryEvidence) -> "SkillBelief":
outcome = event.outcome.strip().lower()
if outcome == "success":
Expand Down Expand Up @@ -72,6 +87,8 @@ def to_dict(self) -> Dict[str, Any]:
"alpha": self.alpha,
"beta": self.beta,
"posterior_success": self.success_probability,
"posterior_variance": self.posterior_variance,
"posterior_std": self.posterior_std,
"contexts": self.contexts,
"failure_modes": self.failure_modes,
"evidence": self.evidence[-MAX_EVIDENCE:],
Expand Down
6 changes: 4 additions & 2 deletions bayesian_agent/core/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,22 @@ def __init__(self, registry: BayesianSkillRegistry, policy: RewritePolicy = None
self.registry = registry
self.policy = policy or RewritePolicy()

def render(self, task_context: str = "", limit: int = 5) -> str:
beliefs = self.registry.top(limit=limit, context=task_context)
def render(self, task_context: str = "", limit: int = 5, strategy: str = "exploit") -> str:
beliefs = self.registry.top(limit=limit, context=task_context, strategy=strategy)
if not beliefs:
return ""
lines = [
"### Bayesian Skill Context",
"Use these posterior-weighted Skills/SOPs as hypotheses, not as unquestioned instructions.",
f"Ranking strategy: {strategy}.",
]
for belief in beliefs:
decision = self.policy.decide(belief)
failures = ", ".join(f"{k}={v}" for k, v in sorted(belief.failure_modes.items())[:3]) or "none"
lines.append(
"- "
f"{belief.skill_id}: posterior_success={belief.success_probability:.3f}, "
f"posterior_std={belief.posterior_std:.3f}, "
f"alpha={belief.alpha:.1f}, beta={belief.beta:.1f}, "
f"observations={belief.observations}, mean_tokens={belief.mean_tokens:.1f}, "
f"rewrite={decision.action}, failures={failures}"
Expand Down
75 changes: 75 additions & 0 deletions bayesian_agent/core/ranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Skill ranking strategies for posterior-weighted context selection."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Callable, Dict

from bayesian_agent.core.belief import SkillBelief


@dataclass(frozen=True)
class RankingStrategy:
"""A named strategy for ranking Skill beliefs."""

name: str
description: str
scorer: Callable[[SkillBelief, str], float]

def score(self, belief: SkillBelief, context: str = "") -> float:
return float(self.scorer(belief, context))


def _context_bonus(belief: SkillBelief, context: str) -> float:
if not context:
return 0.0
if context in belief.contexts:
return 1.0
# Lightweight partial match for hierarchical contexts such as "openclaw/grading".
return 0.25 if any(context in known or known in context for known in belief.contexts) else 0.0


def _safe_mean_tokens(belief: SkillBelief) -> float:
return max(float(belief.mean_tokens or 0.0), 1.0)


def exploit_score(belief: SkillBelief, context: str = "") -> float:
"""Prefer proven, context-matching, low-uncertainty Skills."""

return belief.success_probability + (0.15 * _context_bonus(belief, context)) - (0.25 * belief.posterior_std)


def explore_score(belief: SkillBelief, context: str = "") -> float:
"""Prefer uncertain Skills with some contextual relevance."""

return belief.posterior_std + (0.10 * _context_bonus(belief, context)) + min(belief.observations, 3) * 0.01


def cost_aware_score(belief: SkillBelief, context: str = "") -> float:
"""Prefer success per token, while retaining a small context bonus."""

return (belief.success_probability / _safe_mean_tokens(belief)) * 1000.0 + (0.10 * _context_bonus(belief, context))


def context_aware_score(belief: SkillBelief, context: str = "") -> float:
"""Prefer Skills proven in the same or nearby task context."""

return belief.success_probability + (0.35 * _context_bonus(belief, context)) - (0.10 * belief.posterior_std)


STRATEGIES: Dict[str, RankingStrategy] = {
"exploit": RankingStrategy("exploit", "Prefer proven, low-uncertainty Skills.", exploit_score),
"explore": RankingStrategy("explore", "Prefer Skills that need more evidence.", explore_score),
"cost_aware": RankingStrategy("cost_aware", "Prefer high-success, low-token Skills.", cost_aware_score),
"context_aware": RankingStrategy("context_aware", "Prefer Skills proven in similar contexts.", context_aware_score),
}


def get_strategy(name: str = "exploit") -> RankingStrategy:
"""Return a ranking strategy by name."""

normalized = (name or "exploit").strip().lower().replace("-", "_")
if normalized not in STRATEGIES:
available = ", ".join(sorted(STRATEGIES))
raise ValueError(f"Unknown ranking strategy '{name}'. Available: {available}")
return STRATEGIES[normalized]
7 changes: 4 additions & 3 deletions bayesian_agent/core/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from bayesian_agent.core.belief import SkillBelief
from bayesian_agent.core.evidence import TrajectoryEvidence, utc_now
from bayesian_agent.core.ranking import get_strategy


class BayesianSkillRegistry:
Expand Down Expand Up @@ -60,11 +61,11 @@ def record_many(self, events: Iterable[TrajectoryEvidence]) -> List[SkillBelief]
def beliefs(self) -> List[SkillBelief]:
return [SkillBelief.from_dict(skill_id, raw) for skill_id, raw in self.data.get("skills", {}).items()]

def top(self, limit: int = 5, context: str = "") -> List[SkillBelief]:
def top(self, limit: int = 5, context: str = "", strategy: str = "exploit") -> List[SkillBelief]:
beliefs = self.beliefs()
ranking = get_strategy(strategy)

def score(belief: SkillBelief):
context_bonus = 1 if context and context in belief.contexts else 0
return (context_bonus, belief.success_probability, belief.observations, -belief.mean_tokens)
return (ranking.score(belief, context), belief.observations, -belief.mean_tokens, belief.skill_id)

return sorted(beliefs, key=score, reverse=True)[:limit]
36 changes: 35 additions & 1 deletion bayesian_agent/core/repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Set
from typing import Any, Dict, Iterable, List, Mapping, Set


BenchmarkResults = Dict[str, List[Dict[str, Any]]]
Expand All @@ -23,6 +23,40 @@ def failed_task_ids(results: Mapping[str, Iterable[Mapping[str, Any]]]) -> Dict[
return failed


def failure_mode_clusters(results: Mapping[str, Iterable[Mapping[str, Any]]]) -> Dict[str, Dict[str, List[str]]]:
"""Group failed task ids by normalized failure mode for targeted repair."""

clusters: Dict[str, Dict[str, List[str]]] = {}
for benchmark, runs in results.items():
for run in runs:
task_id = run.get("task_id")
if not task_id or run.get("success"):
continue
mode = str(run.get("failure_mode") or run.get("error") or "unknown_failure")
clusters.setdefault(str(benchmark), {}).setdefault(mode, []).append(str(task_id))
return clusters


def repair_report(results: Mapping[str, Iterable[Mapping[str, Any]]]) -> Dict[str, Dict[str, Any]]:
"""Create a repair-oriented summary with failed ids and failure clusters."""

normalized = normalize_results(results)
failed = failed_task_ids(normalized)
clusters = failure_mode_clusters(normalized)
report: Dict[str, Dict[str, Any]] = {}
for benchmark, runs in normalized.items():
runs = list(runs)
failed_ids = sorted(failed.get(benchmark, set()))
report[benchmark] = {
"tasks": len(runs),
"failed_tasks": failed_ids,
"failure_count": len(failed_ids),
"failure_modes": {mode: sorted(ids) for mode, ids in sorted(clusters.get(benchmark, {}).items())},
"recommended_action": "clustered_repair" if clusters.get(benchmark) else "none",
}
return report


def dedupe_by_task_id(runs: Iterable[Mapping[str, Any]]) -> List[Dict[str, Any]]:
order: List[str] = []
by_id: Dict[str, Dict[str, Any]] = {}
Expand Down
Loading