Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
<!-- markdownlint-disable MD013 MD043 -->
<!-- markdownlint-disable MD043 -->

# OpenSSA-FinanceBench Lite benchmarking

This is a lite version of the benchmarking of `OpenSSA` performance
on the `FinanceBench` dataset. We will use 1 question from the dataset to demonstrate the use of `OpenSSA` with `DANA` architecture.
on the `FinanceBench` dataset. We will use 1 question from the dataset to demonstrate the use of `OpenSSA` with `DANA` architecture.

## [`FinanceBench` Dataset](https://github.com/patronus-ai/financebench/blob/main/financebench_sample_150.csv)

Expand All @@ -19,26 +19,26 @@ Create `.env` file following the `.env.template` and fill in necessary credentia
__Solve__ the problem corresponding to a problem `00807` `financebench_id`:
__`make dana-solve id=00807`__.

### Question

**Question**:

`Does 3M have a reasonably healthy liquidity profile based on its quick ratio for Q2 of FY2023? If the quick ratio is not relevant to measure liquidity, please state that and explain why.`

### Knowledge
**Knowledge**

To solve this question, you can add knowledge related to `liquidity`. See the example below:

- Liquidity Metric Formulas
- `(Net) Working Capital` = `(Total) Current Assets` - `(Total) Current Liabilities`
- `Working Capital Ratio` = `(Total) Current Assets` / `(Total) Current Liabilities`
- `(Net) Working Capital` = `(Total) Current Assets` - `(Total) Current Liabilities`
- `Working Capital Ratio` = `(Total) Current Assets` / `(Total) Current Liabilities`

Go to `knowledge-store.txt` to add relevant knowledge yourself and see how it helps the agent to solve this question.

### Program

With the above-provided knowledge, the program we can provide to the agent could be as below:
**Program**

With the above-provided knowledge, the program we can provide to the agent could be as below:
- Goal: To assess liquidity health of a company, calculate `quick ratio`
- Task: To calculate `quick ratio`, use this formula
- Task: To calculate `quick ratio`, use this formula
`Quick Ratio` = (
(`Cash & Cash Equivalents` +
`Short-Term Investments or (Current) Marketable Securities` +
Expand All @@ -53,6 +53,5 @@ With the above-provided knowledge, the program we can provide to the agent could
Go to `program-store.yml` to see details of the program yourself! You can experimenting with different plans to see how it helps the agent solve the problem as well.

## Advancing DANA Agent with Domain Knowledge and Program Store

- To solve the question with added domain knowledge, run `make dana-solve-w-knowledge id=00807`
- To solve the question with added domain knowledge and program store, run `make dana-solve-w-knowledge-and-prog-store id=00807`
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# pylint: disable=wrong-import-order,wrong-import-position
from data_and_knowledge import (DocName, FbId, Answer, Doc, FB_ID_COL_NAME, DOC_NAMES_BY_FB_ID, QS_BY_FB_ID,
EXPERT_KNOWLEDGE, EXPERT_PROGRAMS, EXPERT_HTP_COMPANY_KEY, EXPERT_HTP_PERIOD_KEY)
from util import QAFunc, enable_batch_qa_and_eval, log_qa_and_update_output_file
from util import QAFunc, log_qa_and_update_output_file


@cache
Expand Down Expand Up @@ -51,63 +51,55 @@ def get_or_create_adaptations(doc_name: DocName) -> dict[str, str]:
return {EXPERT_HTP_COMPANY_KEY: (doc := Doc(name=doc_name)).company, EXPERT_HTP_PERIOD_KEY: doc.period}


@enable_batch_qa_and_eval(output_name='DANA')
@log_qa_and_update_output_file(output_name='DANA')
def solve(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id]).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='DANA-wKnowledge')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge')
def solve_with_knowledge(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='DANA-wProgStore')
@log_qa_and_update_output_file(output_name='DANA-wProgStore')
def solve_with_program_store(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore')
def solve_with_knowledge_and_program_store(fb_id: FbId) -> Answer:
return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='DANA-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wLlama3')
def solve_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], use_llama3=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wLlama3')
def solve_with_knowledge_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, use_llama3=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='DANA-wProgStore-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wProgStore-wLlama3')
def solve_with_program_store_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True, use_llama3=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore-wLlama3')
def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True, use_llama3=True).solve( # noqa: E501
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from tqdm import tqdm

from data_and_knowledge import FbId, Answer, FB_IDS, DOC_NAMES_BY_FB_ID, QS_BY_FB_ID, OUTPUT_FILE_PATH, get_or_create_output_df # noqa: E501
from eval import eval_correctness, eval_all
from log import switch_log_file

if TYPE_CHECKING:
Expand All @@ -19,38 +18,6 @@
type QAFunc = Callable[[FbId], Answer]


@dataclass
class enable_batch_qa_and_eval: # noqa: N801
output_name: str

def __call__(self, qa_func: QAFunc) -> QAFunc:
@wraps(wrapped=qa_func)
def decorated_qa_func(fb_id: FbId) -> Answer | None:
if 'all' in fb_id.lower():
for _fb_id in tqdm(FB_IDS):
# run inferencing and preliminarily evaluate
eval_correctness(fb_id=_fb_id, answer=qa_func(_fb_id), output_name=self.output_name, human=False)

# rigorously evaluate again, including human evaluation for difficult cases
eval_all(output_name=self.output_name, refresh=True)
return None

if 'from:' in fb_id.lower():
for _fb_id in tqdm(FB_IDS[FB_IDS.index(fb_id[5:]):]):
# run inferencing and preliminarily evaluate
eval_correctness(fb_id=_fb_id, answer=qa_func(_fb_id), output_name=self.output_name, human=False)

# rigorously evaluate again, including human evaluation for difficult cases
eval_all(output_name=self.output_name, refresh=True)
return None

# run inferencing and evaluate
eval_correctness(fb_id=fb_id, answer=(answer := qa_func(fb_id)), output_name=self.output_name, human=True)
return answer

return decorated_qa_func


@dataclass
class log_qa_and_update_output_file: # noqa: N801
output_name: str
Expand Down
Loading