diff --git a/codedog/adapters/github_adapter.py b/codedog/adapters/github_adapter.py
index f38e792..1107732 100644
--- a/codedog/adapters/github_adapter.py
+++ b/codedog/adapters/github_adapter.py
@@ -38,8 +38,7 @@
# used for github app
github_app_id = env.get("GITHUB_APP_ID", 0)
-github_private_key = load_private_key(
- env.get("GITHUB_PRIVATE_KEY_PATH", "/app/private_key.pem"))
+github_private_key = load_private_key(env.get("GITHUB_PRIVATE_KEY_PATH", "/app/private_key.pem"))
issue_pattern = re.compile(r"#[0-9]+")
@@ -53,7 +52,7 @@ class GithubEvent(BaseModel):
installation: dict = {}
-def handle_github_event(event: GithubEvent, local=False, **args) -> str:
+def handle_github_event(event: GithubEvent, local=False, **kwargs) -> str:
# TODO: parse event related exception
_event_filter(event)
@@ -64,40 +63,42 @@ def handle_github_event(event: GithubEvent, local=False, **args) -> str:
assert repository_id
assert pull_request_number
# TODO: config
- return handle_pull_request(repository_id, pull_request_number, installation_id, local, get_ttl_hash(120), **args)
+ return handle_pull_request(repository_id, pull_request_number, installation_id, local, get_ttl_hash(120), **kwargs)
def get_github_client(installation_id: int):
if installation_id is None or installation_id == 0:
return default_gh
jwt_token = get_jwt_token(github_private_key, github_app_id)
- access_token = get_access_token_by_installation_id(
- installation_id, jwt_token)
+ access_token = get_access_token_by_installation_id(installation_id, jwt_token)
github_client = Github(access_token)
return github_client
@lru_cache()
-def handle_pull_request(repository_id: int, pull_request_number: int, installation_id: int,
- local=False, ttl_hash=None, **args):
+def handle_pull_request(
+ repository_id: int, pull_request_number: int, installation_id: int, local=False, ttl_hash=None, **kwargs
+):
del ttl_hash
logger.info(
"Retrive pull request from Github",
- extra={"github.repo.id": repository_id,
- "github.pull.number": pull_request_number,
- "github.installation_id": installation_id},
+ extra={
+ "github.repo.id": repository_id,
+ "github.pull.number": pull_request_number,
+ "github.installation_id": installation_id,
+ },
)
- github_client = get_github_client(installation_id)
+ github_client = default_gh if local else get_github_client(installation_id)
+
pr = get_pr(github_client, repository_id, pull_request_number)
changes = pr.changes
callbacks = []
if not local:
- callbacks = [_comment_callback(github_client.get_repo(
- repository_id).get_pull(pull_request_number))]
+ callbacks = [_comment_callback(github_client.get_repo(repository_id).get_pull(pull_request_number))]
- thread = threading.Thread(target=asyncio.run, args=(_review_wrapper(pr, changes, callbacks, **args),))
+ thread = threading.Thread(target=asyncio.run, args=(_review_wrapper(pr, changes, callbacks, **kwargs),))
thread.start()
return "Review Submitted."
@@ -192,8 +193,8 @@ def get_potential_issue(repo: Repository, pull: GithubPullRequest) -> Issue:
return issue
-async def _review_wrapper(pr: PullRequest, changes: list[Change], callbacks: list[callable] = [], **args):
- review = Review(pr=pr, changes=changes, callbacks=callbacks, **args)
+async def _review_wrapper(pr: PullRequest, changes: list[Change], callbacks: list[callable] = [], **kwargs):
+ review = Review(pr=pr, changes=changes, callbacks=callbacks, **kwargs)
await review.execute()
diff --git a/codedog/chains.py b/codedog/chains.py
index 95f2de3..9948f01 100644
--- a/codedog/chains.py
+++ b/codedog/chains.py
@@ -6,20 +6,20 @@
from langchain.output_parsers import OutputFixingParser, PydanticOutputParser
from codedog.model import ChangeSummary
-from codedog.templates import grimoire_cn
-
-GRIMOIRE = grimoire_cn
+from codedog.templates import grimoire_cn, grimoire_en
class Chains:
- def __init__(self, llm: BaseChatModel):
- self._review_fallback_prompt = PromptTemplate.from_template(GRIMOIRE.PR_CHANGE_REVIEW_FALLBACK_TEMPLATE)
- self._summary_prompt = PromptTemplate.from_template(GRIMOIRE.PR_SUMMARIZE_TEMPLATE)
- self._feedback_prompt = PromptTemplate.from_template(GRIMOIRE.PR_SIMPLE_FEEDBACK_TEMPLATE)
+ def __init__(self, llm: BaseChatModel, lang: str = "cn"):
+ grimoire = grimoire_cn if lang == "cn" else grimoire_en
+
+ self._review_fallback_prompt = PromptTemplate.from_template(grimoire.PR_CHANGE_REVIEW_FALLBACK_TEMPLATE)
+ self._summary_prompt = PromptTemplate.from_template(grimoire.PR_SUMMARIZE_TEMPLATE)
+ self._feedback_prompt = PromptTemplate.from_template(grimoire.PR_SIMPLE_FEEDBACK_TEMPLATE)
self._raw_review_parser = PydanticOutputParser(pydantic_object=ChangeSummary)
self._review_parser = OutputFixingParser.from_llm(llm=llm, parser=self._raw_review_parser)
self._review_prompt = PromptTemplate(
- template=GRIMOIRE.PR_CHANGE_REVIEW_TEMPLATE,
+ template=grimoire.PR_CHANGE_REVIEW_TEMPLATE,
input_variables=["text", "name"],
partial_variables={"format_instructions": self._raw_review_parser.get_format_instructions()},
)
@@ -30,9 +30,9 @@ def __init__(self, llm: BaseChatModel):
self._feedback_chain = LLMChain(llm=llm, prompt=self._feedback_prompt)
@staticmethod
- def init_chains():
+ def init_chains(lang: str = "cn"):
llm = load_llm()
- return Chains(llm=llm)
+ return Chains(llm=llm, lang=lang)
@property
def llm(self):
diff --git a/codedog/report.py b/codedog/report.py
deleted file mode 100644
index b287c7a..0000000
--- a/codedog/report.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from codedog.model import Change
-from codedog.templates.template_cn import (
- CHANGE_SUMMARY,
- T3_TITLE_LINE,
- TABLE_LINE,
- TABLE_LINE_NODATA,
-)
-
-
-def generate_change_summary(changes: list[Change]) -> str:
- """format change summary
-
- Args:
- changes (list[Change]): pr changes and reviews
- Returns:
- str: format markdown table string of change summary
- """
-
- important_changes = []
- housekeeping_changes = []
-
- important_idx = 1
- housekeeping_idx = 1
- for change in changes:
- file_name = change.file_name or ""
- url = change.url or ""
- summary = change.summary or ""
-
- text = summary.replace("\n", "
") if summary else ""
- text_template: str = TABLE_LINE.format(file_name=file_name, url=url, text=text)
-
- if not change.major:
- text = text_template.format(idx=important_idx)
- important_idx += 1
- important_changes.append(text)
- else:
- text = text_template.format(idx=housekeeping_idx)
- housekeeping_idx += 1
- housekeeping_changes.append(text)
-
- important_changes = "\n".join(important_changes) if important_changes else TABLE_LINE_NODATA
- housekeeping_changes = "\n".join(housekeeping_changes) if housekeeping_changes else TABLE_LINE_NODATA
- text = CHANGE_SUMMARY.format(important_changes=important_changes, housekeeping_changes=housekeeping_changes)
- return text
-
-
-def generate_feedback(changes: list[Change]) -> str:
- """format feedback
-
- Args:
- changes (list[Change]): pr changes and reviews
- Returns:
- str: format markdown table string of feedback
- """
- texts = []
-
- idx = 1
- for change in changes:
- file_name = change.file_name
- url = change.url
-
- feedback = change.feedback
- if (
- not feedback
- or feedback in ("ok", "OK")
- or (len(feedback) < 30 and "ok" in feedback.lower()) # 移除ok + 其他短语的回复
- ):
- continue
-
- text = f"{T3_TITLE_LINE.format(idx=idx, file_name=file_name, url=url)}\n\n{feedback}"
-
- texts.append(text)
- idx += 1
-
- concat_feedback_text = "\n\n".join(texts) if texts else TABLE_LINE_NODATA
- return concat_feedback_text
diff --git a/codedog/review.py b/codedog/review.py
index 1047103..c193067 100644
--- a/codedog/review.py
+++ b/codedog/review.py
@@ -5,18 +5,16 @@
import json
import logging
import time
+import traceback
from langchain.callbacks import get_openai_callback
from langchain.schema import OutputParserException
from codedog.chains import Chains
from codedog.model import Change, ChangeSummary, PullRequest
-from codedog.report import generate_change_summary, generate_feedback
-from codedog.templates import grimoire_cn, template_cn
+from codedog.templates import template_cn, template_en
from codedog.version import VERSION
-GRIMOIRE = grimoire_cn
-TEMPLATE = template_cn
logger = logging.getLogger(__name__)
# TODO: unit test
@@ -30,11 +28,13 @@ def __init__(
pr: PullRequest,
changes: list[Change],
callbacks: list | None = None,
- chains: Chains = None,
+ lang: str = "cn",
**kwargs,
):
- self._chains: Chains = chains if isinstance(chains, Chains) else Chains.init_chains()
-
+ assert lang in ("cn", "en")
+ self.language = lang
+ self._chains = Chains.init_chains(lang=lang)
+ self._template = template_cn if lang == "cn" else template_en
# --- data --------------------
self._pr = pr
self._changes: list[Change] = changes
@@ -84,7 +84,7 @@ async def execute(self) -> None:
logger.info("Success code review %s", self.json_str())
except Exception as ex:
- logger.warn("Fail code review %s %s", ex, self.json_str())
+ logger.warn("Fail code review %s %s %s", ex, self.json_str(), traceback.format_exc().replace("\n", "\\n"))
def print_report(self) -> None:
print(self.report())
@@ -148,11 +148,11 @@ async def _feedback(self):
self._meter_api_call_tokens(cb.total_tokens, cb.total_cost)
def _generate_report(self) -> str:
- header: str = TEMPLATE.REPORT_HEADER.format(
+ header: str = self._template.REPORT_HEADER.format(
repo_name=self._pr.repository_name, pr_number=self._pr.pr_id, url=self._pr.url, version=VERSION
)
- telemetry: str = TEMPLATE.REPORT_TELEMETRY.format(
+ telemetry: str = self._template.REPORT_TELEMETRY.format(
start_time=datetime.datetime.fromtimestamp(self._telemetry["start_time"]).strftime("%Y-%m-%d %H:%M:%S"),
time_usage=int(self._telemetry["time_usage"]),
files=self._telemetry["files"],
@@ -160,15 +160,88 @@ def _generate_report(self) -> str:
cost=self._telemetry.get("cost", 0),
)
- summary: str = TEMPLATE.REPORT_PR_SUMMARY.format(
+ summary: str = self._template.REPORT_PR_SUMMARY.format(
pr_summary=self._pr_summary,
- pr_changes_summary=generate_change_summary(self._changes),
+ pr_changes_summary=self._generate_change_summary(),
)
- feedback: str = TEMPLATE.REPORT_FEEDBACK.format(feedback=generate_feedback(self._changes))
+ feedback: str = self._template.REPORT_FEEDBACK.format(feedback=self._generate_feedback())
report = "\n".join([header, telemetry, summary, feedback])
return report
+ def _generate_change_summary(self) -> str:
+ """format change summary
+
+ Args:
+ changes (list[Change]): pr changes and reviews
+ Returns:
+ str: format markdown table string of change summary
+ """
+ changes = self._changes
+ important_changes = []
+ housekeeping_changes = []
+
+ important_idx = 1
+ housekeeping_idx = 1
+ for change in changes:
+ file_name = change.file_name or ""
+ url = change.url or ""
+ summary = change.summary or ""
+
+ text = summary.replace("\n", "
") if summary else ""
+ text_template: str = self._template.TABLE_LINE.format(file_name=file_name, url=url, text=text)
+
+ if not change.major:
+ text = text_template.format(idx=important_idx)
+ important_idx += 1
+ important_changes.append(text)
+ else:
+ text = text_template.format(idx=housekeeping_idx)
+ housekeeping_idx += 1
+ housekeeping_changes.append(text)
+
+ important_changes = "\n".join(important_changes) if important_changes else self._template.TABLE_LINE_NODATA
+ housekeeping_changes = (
+ "\n".join(housekeeping_changes) if housekeeping_changes else self._template.TABLE_LINE_NODATA
+ )
+ text = self._template.CHANGE_SUMMARY.format(
+ important_changes=important_changes, housekeeping_changes=housekeeping_changes
+ )
+ return text
+
+ def _generate_feedback(self) -> str:
+ """format feedback
+
+ Args:
+ changes (list[Change]): pr changes and reviews
+ Returns:
+ str: format markdown table string of feedback
+ """
+ changes = self._changes
+
+ texts = []
+
+ idx = 1
+ for change in changes:
+ file_name = change.file_name
+ url = change.url
+
+ feedback = change.feedback
+ if (
+ not feedback
+ or feedback in ("ok", "OK")
+ or (len(feedback) < 30 and "ok" in feedback.lower()) # 移除ok + 其他短语的回复
+ ):
+ continue
+
+ text = f"{self._template.T3_TITLE_LINE.format(idx=idx, file_name=file_name, url=url)}\n\n{feedback}"
+
+ texts.append(text)
+ idx += 1
+
+ concat_feedback_text = "\n\n".join(texts) if texts else self._template.TABLE_LINE_NODATA
+ return concat_feedback_text
+
async def _execute_callback(self):
if not self._callbacks:
self.print_report()
diff --git a/codedog/server.py b/codedog/server.py
index a27a257..6492285 100644
--- a/codedog/server.py
+++ b/codedog/server.py
@@ -56,9 +56,7 @@ async def gitlab(event: GitlabEvent, url: str, token: str, source: str = "") ->
@app.post("/v1/webhook/github", response_model=Response)
-async def github(
- event: GithubEvent,
-) -> Response:
+async def github(event: GithubEvent, lang: str = "en") -> Response:
"""Github webhook.
Args:
@@ -67,7 +65,7 @@ async def github(
Response: message.
"""
try:
- message = handle_github_event(event)
+ message = handle_github_event(event, lang=lang)
except CodedogError as e:
return Response(message=e.message, code=e.code)
except Exception:
diff --git a/codedog/templates/grimoire_cn.py b/codedog/templates/grimoire_cn.py
index c08fe2b..085480c 100644
--- a/codedog/templates/grimoire_cn.py
+++ b/codedog/templates/grimoire_cn.py
@@ -1,35 +1,7 @@
# flake8: noqa
-
"""Grimoire of CodeDog. Chinese version."""
-# -- 通用模版 -----------------------------------------------------------
-
-# this template is used with langchain PydanticOutputParser
-FORMAT_OUTPUT_JSON_TEMPLATE = """
-请输出符合以下JSON模式的JSON实例
-
-例如,对于模式{{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items":
-{{"type": "string"}}}}}}, "required": ["foo"]}}}}
-对象{{"foo": ["bar", "baz"]}}是模式的格式良好的实例。对象{{"properties": {{"foo": ["bar", "baz"]}}}}不是格式良好的。
-
-以下是输出JSON模式:
-```
-{schema}
-```"""
-
-# this template is used for convert text.
-CONVERT_TEXT_TEMPLATE = """
-我会提供一些文本内容,请按以下格式说明要求将文本内容转换为架构化数据格式:
-{instructions}
-
-请对以下文本转换格式进行输出:
-```
-{text}
-```
-输出结果:"""
-
-
# -- PR Review 模版 ---------------------------------------------------
# this template is used for format diff file summary list seperate important and housekeeping changes.
@@ -102,6 +74,3 @@
2. 不要用主观的口吻对代码质量进行评价
3. 你的回复内容应当尽量的精确、简洁,每句话都做到言之有物
"""
-
-
-# -- PR Report Template -------------------------------------------------------
diff --git a/codedog/templates/grimoire_en.py b/codedog/templates/grimoire_en.py
index 47da874..dee8fee 100644
--- a/codedog/templates/grimoire_en.py
+++ b/codedog/templates/grimoire_en.py
@@ -2,13 +2,89 @@
"""Grimoire of CodeDog. English version."""
-format_output = """
-The output should be formatted as a JSON instance that conforms to the JSON schema below.
+# -- PR Review Prompt Template ---------------------------------------------------
-As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
-the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
+# this template is used for format diff file summary list seperate important and housekeeping changes.
+PR_FILES_SUMMARY_HEADER = """
+**Main changes**
+{important_changes}
+**Secondary changes**
+{unimportant_changes}
+"""
-Here is the output schema:
+PR_FILE_SUMMARY_HEADER = "{path}: {summary}"
+
+
+# this template is used for review single file change.
+PR_CHANGE_REVIEW_SUMMARY = "summary of diff"
+PR_CHANGE_REVIEW_MAIN_CHANGE = """this diff contains the major part of logical changes in this change list"""
+
+PR_CHANGE_REVIEW_TEMPLATE = """
+Act as a code reviewer, I will be your assistant, provide you a file diff in a change list,
+please review the code change according to the following requirements:
+
+1. Determine whether the file is a code file containing major logic changes. Generally speaking,
+such files often have some function logic changes
+
+2. Briefly summarize the content of the diff change in Chinese, no more than 100 words,
+do not include the results of the first step, just summarize the content of the change.
+
+{format_instructions}
+
+Please act as a code reviewer, review the file {name} change. I want you to give:
+1. Determine whether the file contains major logic changes. Generally speaking,
+2. A brief summary of the diff change, no more than 100 words. Do not include the results of the first step
+
+review the code according to the instructions:
+
+{format_instructions}
+
+here is the diff content:
+```
+{text}
+```"""
+
+PR_CHANGE_REVIEW_FALLBACK_TEMPLATE = """
+Please act as a code reviewer, review the file {name} change. I want you to give:
+
+give a brief summary of the diff change, no more than 100 words.
+
+here is the diff content:
```
-{schema}
+{text}
```"""
+
+# this template is for starting sequentially summarize PR content.
+PR_SUMMARIZE_TEMPLATE = """
+Summarize a git pull request by the given information:
+
+pull request information (for better understand the context, not part of the pull request):
+```
+{pull_request_info}
+```
+related issue information (for better understand the context, not part of the pull request):
+```
+{issue_info}
+```
+
+changes summary:
+```
+{summary}
+```
+
+Please note that I want you to summarize the entire pull request, not specific files.
+The summary should be no more than 200 words:"""
+
+
+PR_SIMPLE_FEEDBACK_TEMPLATE = """
+Act as a code reviewer, I will be your assistant, provide you a file diff from a change list,
+please review the code change according to the following requirements:
+
+1. Don't give subjective comments on the code quality, such as "this code is bad", "this code is good", etc.
+2. Don't give general suggestions that are not specific to the code, such as "this code needs to be refactored", "this code needs to be optimized", etc.
+
+If you can't judge whether the code is good or bad, please reply "ok" and don't reply any other content except "ok".
+
+Here's the code:
+{text}
+"""
diff --git a/codedog/templates/template_cn.py b/codedog/templates/template_cn.py
index 0b22f36..4c102e6 100644
--- a/codedog/templates/template_cn.py
+++ b/codedog/templates/template_cn.py
@@ -14,8 +14,7 @@
T3_TITLE_LINE = """### {idx}. [{file_name}]({url})"""
-
-REPORT_HEADER = """# [{repo_name} #{pr_number}]({url}) 代码审查报告\n\n*powered by GPT3.5-Turbo and Codedog {version}*\n\n"""
+REPORT_HEADER = """# [{repo_name} #{pr_number}]({url}) 代码审查报告\n\n*powered by GPT and Codedog {version}*\n\n"""
REPORT_TELEMETRY = """## 执行记录
@@ -25,7 +24,6 @@
- OPENAI API TOKEN数量: {tokens} (约${cost:.4f})\n\n
"""
-
REPORT_PR_SUMMARY = """## PR概要
{pr_summary}
diff --git a/codedog/templates/template_en.py b/codedog/templates/template_en.py
index e69de29..08e64f6 100644
--- a/codedog/templates/template_en.py
+++ b/codedog/templates/template_en.py
@@ -0,0 +1,54 @@
+CHANGE_SUMMARY = """
+| Main Change |
+|---|
+{important_changes}
+
+| Secondary Change |
+|---|
+{housekeeping_changes}
+"""
+
+TABLE_LINE = """| {{idx}}. **[{file_name}]({url})** |\n| {text} |"""
+
+TABLE_LINE_NODATA = "-"
+
+T3_TITLE_LINE = """### {idx}. [{file_name}]({url})"""
+
+
+REPORT_HEADER = (
+ """# [{repo_name} #{pr_number}]({url}) Code Review Report\n\n*powered by GPT and Codedog {version}*\n\n"""
+)
+
+REPORT_TELEMETRY = """## Execution Record
+- Start at: {start_time}
+- Time usage: {time_usage}s
+- Reviewed files: {files}
+- Openai api tokens: {tokens} (${cost:.4f})\n\n
+"""
+
+
+REPORT_PR_SUMMARY = """## PR Summary
+{pr_summary}
+
+{pr_changes_summary}\n\n
+"""
+REPORT_PR_CHANGES_SUMMARY = """
+**Main Change**
+
+{important_changes}
+
+**Secondary Change**
+
+{housekeeping_changes}\n\n
+"""
+
+REPORT_PR_CHANGE_SUMMARY = """{idx}. [{path}]({url})\n\n{summary}\n\n"""
+
+REPORT_NO_CHANGES = ""
+
+REPORT_FEEDBACK = """## Suggestions (preview)
+
+** Suggestions are still under development, please use with caution **
+
+{feedback}\n\n
+"""
diff --git a/examples/github/github_review.py b/examples/github/github_review.py
index 78de2b9..c799bdf 100644
--- a/examples/github/github_review.py
+++ b/examples/github/github_review.py
@@ -29,4 +29,4 @@
event = build_pull_request_event(repository_name_or_id=repository_name_or_id, pull_request_number=pull_request_number)
# handle_github_event(event, local=False)
-handle_github_event(event, local=True)
+handle_github_event(event, local=True, lang="en")