diff --git a/codedog/adapters/github_adapter.py b/codedog/adapters/github_adapter.py index f38e792..1107732 100644 --- a/codedog/adapters/github_adapter.py +++ b/codedog/adapters/github_adapter.py @@ -38,8 +38,7 @@ # used for github app github_app_id = env.get("GITHUB_APP_ID", 0) -github_private_key = load_private_key( - env.get("GITHUB_PRIVATE_KEY_PATH", "/app/private_key.pem")) +github_private_key = load_private_key(env.get("GITHUB_PRIVATE_KEY_PATH", "/app/private_key.pem")) issue_pattern = re.compile(r"#[0-9]+") @@ -53,7 +52,7 @@ class GithubEvent(BaseModel): installation: dict = {} -def handle_github_event(event: GithubEvent, local=False, **args) -> str: +def handle_github_event(event: GithubEvent, local=False, **kwargs) -> str: # TODO: parse event related exception _event_filter(event) @@ -64,40 +63,42 @@ def handle_github_event(event: GithubEvent, local=False, **args) -> str: assert repository_id assert pull_request_number # TODO: config - return handle_pull_request(repository_id, pull_request_number, installation_id, local, get_ttl_hash(120), **args) + return handle_pull_request(repository_id, pull_request_number, installation_id, local, get_ttl_hash(120), **kwargs) def get_github_client(installation_id: int): if installation_id is None or installation_id == 0: return default_gh jwt_token = get_jwt_token(github_private_key, github_app_id) - access_token = get_access_token_by_installation_id( - installation_id, jwt_token) + access_token = get_access_token_by_installation_id(installation_id, jwt_token) github_client = Github(access_token) return github_client @lru_cache() -def handle_pull_request(repository_id: int, pull_request_number: int, installation_id: int, - local=False, ttl_hash=None, **args): +def handle_pull_request( + repository_id: int, pull_request_number: int, installation_id: int, local=False, ttl_hash=None, **kwargs +): del ttl_hash logger.info( "Retrive pull request from Github", - extra={"github.repo.id": repository_id, - "github.pull.number": pull_request_number, - "github.installation_id": installation_id}, + extra={ + "github.repo.id": repository_id, + "github.pull.number": pull_request_number, + "github.installation_id": installation_id, + }, ) - github_client = get_github_client(installation_id) + github_client = default_gh if local else get_github_client(installation_id) + pr = get_pr(github_client, repository_id, pull_request_number) changes = pr.changes callbacks = [] if not local: - callbacks = [_comment_callback(github_client.get_repo( - repository_id).get_pull(pull_request_number))] + callbacks = [_comment_callback(github_client.get_repo(repository_id).get_pull(pull_request_number))] - thread = threading.Thread(target=asyncio.run, args=(_review_wrapper(pr, changes, callbacks, **args),)) + thread = threading.Thread(target=asyncio.run, args=(_review_wrapper(pr, changes, callbacks, **kwargs),)) thread.start() return "Review Submitted." @@ -192,8 +193,8 @@ def get_potential_issue(repo: Repository, pull: GithubPullRequest) -> Issue: return issue -async def _review_wrapper(pr: PullRequest, changes: list[Change], callbacks: list[callable] = [], **args): - review = Review(pr=pr, changes=changes, callbacks=callbacks, **args) +async def _review_wrapper(pr: PullRequest, changes: list[Change], callbacks: list[callable] = [], **kwargs): + review = Review(pr=pr, changes=changes, callbacks=callbacks, **kwargs) await review.execute() diff --git a/codedog/chains.py b/codedog/chains.py index 95f2de3..9948f01 100644 --- a/codedog/chains.py +++ b/codedog/chains.py @@ -6,20 +6,20 @@ from langchain.output_parsers import OutputFixingParser, PydanticOutputParser from codedog.model import ChangeSummary -from codedog.templates import grimoire_cn - -GRIMOIRE = grimoire_cn +from codedog.templates import grimoire_cn, grimoire_en class Chains: - def __init__(self, llm: BaseChatModel): - self._review_fallback_prompt = PromptTemplate.from_template(GRIMOIRE.PR_CHANGE_REVIEW_FALLBACK_TEMPLATE) - self._summary_prompt = PromptTemplate.from_template(GRIMOIRE.PR_SUMMARIZE_TEMPLATE) - self._feedback_prompt = PromptTemplate.from_template(GRIMOIRE.PR_SIMPLE_FEEDBACK_TEMPLATE) + def __init__(self, llm: BaseChatModel, lang: str = "cn"): + grimoire = grimoire_cn if lang == "cn" else grimoire_en + + self._review_fallback_prompt = PromptTemplate.from_template(grimoire.PR_CHANGE_REVIEW_FALLBACK_TEMPLATE) + self._summary_prompt = PromptTemplate.from_template(grimoire.PR_SUMMARIZE_TEMPLATE) + self._feedback_prompt = PromptTemplate.from_template(grimoire.PR_SIMPLE_FEEDBACK_TEMPLATE) self._raw_review_parser = PydanticOutputParser(pydantic_object=ChangeSummary) self._review_parser = OutputFixingParser.from_llm(llm=llm, parser=self._raw_review_parser) self._review_prompt = PromptTemplate( - template=GRIMOIRE.PR_CHANGE_REVIEW_TEMPLATE, + template=grimoire.PR_CHANGE_REVIEW_TEMPLATE, input_variables=["text", "name"], partial_variables={"format_instructions": self._raw_review_parser.get_format_instructions()}, ) @@ -30,9 +30,9 @@ def __init__(self, llm: BaseChatModel): self._feedback_chain = LLMChain(llm=llm, prompt=self._feedback_prompt) @staticmethod - def init_chains(): + def init_chains(lang: str = "cn"): llm = load_llm() - return Chains(llm=llm) + return Chains(llm=llm, lang=lang) @property def llm(self): diff --git a/codedog/report.py b/codedog/report.py deleted file mode 100644 index b287c7a..0000000 --- a/codedog/report.py +++ /dev/null @@ -1,76 +0,0 @@ -from codedog.model import Change -from codedog.templates.template_cn import ( - CHANGE_SUMMARY, - T3_TITLE_LINE, - TABLE_LINE, - TABLE_LINE_NODATA, -) - - -def generate_change_summary(changes: list[Change]) -> str: - """format change summary - - Args: - changes (list[Change]): pr changes and reviews - Returns: - str: format markdown table string of change summary - """ - - important_changes = [] - housekeeping_changes = [] - - important_idx = 1 - housekeeping_idx = 1 - for change in changes: - file_name = change.file_name or "" - url = change.url or "" - summary = change.summary or "" - - text = summary.replace("\n", "
") if summary else "" - text_template: str = TABLE_LINE.format(file_name=file_name, url=url, text=text) - - if not change.major: - text = text_template.format(idx=important_idx) - important_idx += 1 - important_changes.append(text) - else: - text = text_template.format(idx=housekeeping_idx) - housekeeping_idx += 1 - housekeeping_changes.append(text) - - important_changes = "\n".join(important_changes) if important_changes else TABLE_LINE_NODATA - housekeeping_changes = "\n".join(housekeeping_changes) if housekeeping_changes else TABLE_LINE_NODATA - text = CHANGE_SUMMARY.format(important_changes=important_changes, housekeeping_changes=housekeeping_changes) - return text - - -def generate_feedback(changes: list[Change]) -> str: - """format feedback - - Args: - changes (list[Change]): pr changes and reviews - Returns: - str: format markdown table string of feedback - """ - texts = [] - - idx = 1 - for change in changes: - file_name = change.file_name - url = change.url - - feedback = change.feedback - if ( - not feedback - or feedback in ("ok", "OK") - or (len(feedback) < 30 and "ok" in feedback.lower()) # 移除ok + 其他短语的回复 - ): - continue - - text = f"{T3_TITLE_LINE.format(idx=idx, file_name=file_name, url=url)}\n\n{feedback}" - - texts.append(text) - idx += 1 - - concat_feedback_text = "\n\n".join(texts) if texts else TABLE_LINE_NODATA - return concat_feedback_text diff --git a/codedog/review.py b/codedog/review.py index 1047103..c193067 100644 --- a/codedog/review.py +++ b/codedog/review.py @@ -5,18 +5,16 @@ import json import logging import time +import traceback from langchain.callbacks import get_openai_callback from langchain.schema import OutputParserException from codedog.chains import Chains from codedog.model import Change, ChangeSummary, PullRequest -from codedog.report import generate_change_summary, generate_feedback -from codedog.templates import grimoire_cn, template_cn +from codedog.templates import template_cn, template_en from codedog.version import VERSION -GRIMOIRE = grimoire_cn -TEMPLATE = template_cn logger = logging.getLogger(__name__) # TODO: unit test @@ -30,11 +28,13 @@ def __init__( pr: PullRequest, changes: list[Change], callbacks: list | None = None, - chains: Chains = None, + lang: str = "cn", **kwargs, ): - self._chains: Chains = chains if isinstance(chains, Chains) else Chains.init_chains() - + assert lang in ("cn", "en") + self.language = lang + self._chains = Chains.init_chains(lang=lang) + self._template = template_cn if lang == "cn" else template_en # --- data -------------------- self._pr = pr self._changes: list[Change] = changes @@ -84,7 +84,7 @@ async def execute(self) -> None: logger.info("Success code review %s", self.json_str()) except Exception as ex: - logger.warn("Fail code review %s %s", ex, self.json_str()) + logger.warn("Fail code review %s %s %s", ex, self.json_str(), traceback.format_exc().replace("\n", "\\n")) def print_report(self) -> None: print(self.report()) @@ -148,11 +148,11 @@ async def _feedback(self): self._meter_api_call_tokens(cb.total_tokens, cb.total_cost) def _generate_report(self) -> str: - header: str = TEMPLATE.REPORT_HEADER.format( + header: str = self._template.REPORT_HEADER.format( repo_name=self._pr.repository_name, pr_number=self._pr.pr_id, url=self._pr.url, version=VERSION ) - telemetry: str = TEMPLATE.REPORT_TELEMETRY.format( + telemetry: str = self._template.REPORT_TELEMETRY.format( start_time=datetime.datetime.fromtimestamp(self._telemetry["start_time"]).strftime("%Y-%m-%d %H:%M:%S"), time_usage=int(self._telemetry["time_usage"]), files=self._telemetry["files"], @@ -160,15 +160,88 @@ def _generate_report(self) -> str: cost=self._telemetry.get("cost", 0), ) - summary: str = TEMPLATE.REPORT_PR_SUMMARY.format( + summary: str = self._template.REPORT_PR_SUMMARY.format( pr_summary=self._pr_summary, - pr_changes_summary=generate_change_summary(self._changes), + pr_changes_summary=self._generate_change_summary(), ) - feedback: str = TEMPLATE.REPORT_FEEDBACK.format(feedback=generate_feedback(self._changes)) + feedback: str = self._template.REPORT_FEEDBACK.format(feedback=self._generate_feedback()) report = "\n".join([header, telemetry, summary, feedback]) return report + def _generate_change_summary(self) -> str: + """format change summary + + Args: + changes (list[Change]): pr changes and reviews + Returns: + str: format markdown table string of change summary + """ + changes = self._changes + important_changes = [] + housekeeping_changes = [] + + important_idx = 1 + housekeeping_idx = 1 + for change in changes: + file_name = change.file_name or "" + url = change.url or "" + summary = change.summary or "" + + text = summary.replace("\n", "
") if summary else "" + text_template: str = self._template.TABLE_LINE.format(file_name=file_name, url=url, text=text) + + if not change.major: + text = text_template.format(idx=important_idx) + important_idx += 1 + important_changes.append(text) + else: + text = text_template.format(idx=housekeeping_idx) + housekeeping_idx += 1 + housekeeping_changes.append(text) + + important_changes = "\n".join(important_changes) if important_changes else self._template.TABLE_LINE_NODATA + housekeeping_changes = ( + "\n".join(housekeeping_changes) if housekeeping_changes else self._template.TABLE_LINE_NODATA + ) + text = self._template.CHANGE_SUMMARY.format( + important_changes=important_changes, housekeeping_changes=housekeeping_changes + ) + return text + + def _generate_feedback(self) -> str: + """format feedback + + Args: + changes (list[Change]): pr changes and reviews + Returns: + str: format markdown table string of feedback + """ + changes = self._changes + + texts = [] + + idx = 1 + for change in changes: + file_name = change.file_name + url = change.url + + feedback = change.feedback + if ( + not feedback + or feedback in ("ok", "OK") + or (len(feedback) < 30 and "ok" in feedback.lower()) # 移除ok + 其他短语的回复 + ): + continue + + text = f"{self._template.T3_TITLE_LINE.format(idx=idx, file_name=file_name, url=url)}\n\n{feedback}" + + texts.append(text) + idx += 1 + + concat_feedback_text = "\n\n".join(texts) if texts else self._template.TABLE_LINE_NODATA + return concat_feedback_text + async def _execute_callback(self): if not self._callbacks: self.print_report() diff --git a/codedog/server.py b/codedog/server.py index a27a257..6492285 100644 --- a/codedog/server.py +++ b/codedog/server.py @@ -56,9 +56,7 @@ async def gitlab(event: GitlabEvent, url: str, token: str, source: str = "") -> @app.post("/v1/webhook/github", response_model=Response) -async def github( - event: GithubEvent, -) -> Response: +async def github(event: GithubEvent, lang: str = "en") -> Response: """Github webhook. Args: @@ -67,7 +65,7 @@ async def github( Response: message. """ try: - message = handle_github_event(event) + message = handle_github_event(event, lang=lang) except CodedogError as e: return Response(message=e.message, code=e.code) except Exception: diff --git a/codedog/templates/grimoire_cn.py b/codedog/templates/grimoire_cn.py index c08fe2b..085480c 100644 --- a/codedog/templates/grimoire_cn.py +++ b/codedog/templates/grimoire_cn.py @@ -1,35 +1,7 @@ # flake8: noqa - """Grimoire of CodeDog. Chinese version.""" -# -- 通用模版 ----------------------------------------------------------- - -# this template is used with langchain PydanticOutputParser -FORMAT_OUTPUT_JSON_TEMPLATE = """ -请输出符合以下JSON模式的JSON实例 - -例如,对于模式{{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": -{{"type": "string"}}}}}}, "required": ["foo"]}}}} -对象{{"foo": ["bar", "baz"]}}是模式的格式良好的实例。对象{{"properties": {{"foo": ["bar", "baz"]}}}}不是格式良好的。 - -以下是输出JSON模式: -``` -{schema} -```""" - -# this template is used for convert text. -CONVERT_TEXT_TEMPLATE = """ -我会提供一些文本内容,请按以下格式说明要求将文本内容转换为架构化数据格式: -{instructions} - -请对以下文本转换格式进行输出: -``` -{text} -``` -输出结果:""" - - # -- PR Review 模版 --------------------------------------------------- # this template is used for format diff file summary list seperate important and housekeeping changes. @@ -102,6 +74,3 @@ 2. 不要用主观的口吻对代码质量进行评价 3. 你的回复内容应当尽量的精确、简洁,每句话都做到言之有物 """ - - -# -- PR Report Template ------------------------------------------------------- diff --git a/codedog/templates/grimoire_en.py b/codedog/templates/grimoire_en.py index 47da874..dee8fee 100644 --- a/codedog/templates/grimoire_en.py +++ b/codedog/templates/grimoire_en.py @@ -2,13 +2,89 @@ """Grimoire of CodeDog. English version.""" -format_output = """ -The output should be formatted as a JSON instance that conforms to the JSON schema below. +# -- PR Review Prompt Template --------------------------------------------------- -As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}} -the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted. +# this template is used for format diff file summary list seperate important and housekeeping changes. +PR_FILES_SUMMARY_HEADER = """ +**Main changes** +{important_changes} +**Secondary changes** +{unimportant_changes} +""" -Here is the output schema: +PR_FILE_SUMMARY_HEADER = "{path}: {summary}" + + +# this template is used for review single file change. +PR_CHANGE_REVIEW_SUMMARY = "summary of diff" +PR_CHANGE_REVIEW_MAIN_CHANGE = """this diff contains the major part of logical changes in this change list""" + +PR_CHANGE_REVIEW_TEMPLATE = """ +Act as a code reviewer, I will be your assistant, provide you a file diff in a change list, +please review the code change according to the following requirements: + +1. Determine whether the file is a code file containing major logic changes. Generally speaking, +such files often have some function logic changes + +2. Briefly summarize the content of the diff change in Chinese, no more than 100 words, +do not include the results of the first step, just summarize the content of the change. + +{format_instructions} + +Please act as a code reviewer, review the file {name} change. I want you to give: +1. Determine whether the file contains major logic changes. Generally speaking, +2. A brief summary of the diff change, no more than 100 words. Do not include the results of the first step + +review the code according to the instructions: + +{format_instructions} + +here is the diff content: +``` +{text} +```""" + +PR_CHANGE_REVIEW_FALLBACK_TEMPLATE = """ +Please act as a code reviewer, review the file {name} change. I want you to give: + +give a brief summary of the diff change, no more than 100 words. + +here is the diff content: ``` -{schema} +{text} ```""" + +# this template is for starting sequentially summarize PR content. +PR_SUMMARIZE_TEMPLATE = """ +Summarize a git pull request by the given information: + +pull request information (for better understand the context, not part of the pull request): +``` +{pull_request_info} +``` +related issue information (for better understand the context, not part of the pull request): +``` +{issue_info} +``` + +changes summary: +``` +{summary} +``` + +Please note that I want you to summarize the entire pull request, not specific files. +The summary should be no more than 200 words:""" + + +PR_SIMPLE_FEEDBACK_TEMPLATE = """ +Act as a code reviewer, I will be your assistant, provide you a file diff from a change list, +please review the code change according to the following requirements: + +1. Don't give subjective comments on the code quality, such as "this code is bad", "this code is good", etc. +2. Don't give general suggestions that are not specific to the code, such as "this code needs to be refactored", "this code needs to be optimized", etc. + +If you can't judge whether the code is good or bad, please reply "ok" and don't reply any other content except "ok". + +Here's the code: +{text} +""" diff --git a/codedog/templates/template_cn.py b/codedog/templates/template_cn.py index 0b22f36..4c102e6 100644 --- a/codedog/templates/template_cn.py +++ b/codedog/templates/template_cn.py @@ -14,8 +14,7 @@ T3_TITLE_LINE = """### {idx}. [{file_name}]({url})""" - -REPORT_HEADER = """# [{repo_name} #{pr_number}]({url}) 代码审查报告\n\n*powered by GPT3.5-Turbo and Codedog {version}*\n\n""" +REPORT_HEADER = """# [{repo_name} #{pr_number}]({url}) 代码审查报告\n\n*powered by GPT and Codedog {version}*\n\n""" REPORT_TELEMETRY = """## 执行记录 @@ -25,7 +24,6 @@ - OPENAI API TOKEN数量: {tokens} (约${cost:.4f})\n\n """ - REPORT_PR_SUMMARY = """## PR概要 {pr_summary} diff --git a/codedog/templates/template_en.py b/codedog/templates/template_en.py index e69de29..08e64f6 100644 --- a/codedog/templates/template_en.py +++ b/codedog/templates/template_en.py @@ -0,0 +1,54 @@ +CHANGE_SUMMARY = """ +| Main Change | +|---| +{important_changes} + +| Secondary Change | +|---| +{housekeeping_changes} +""" + +TABLE_LINE = """| {{idx}}. **[{file_name}]({url})** |\n| {text} |""" + +TABLE_LINE_NODATA = "-" + +T3_TITLE_LINE = """### {idx}. [{file_name}]({url})""" + + +REPORT_HEADER = ( + """# [{repo_name} #{pr_number}]({url}) Code Review Report\n\n*powered by GPT and Codedog {version}*\n\n""" +) + +REPORT_TELEMETRY = """## Execution Record +- Start at: {start_time} +- Time usage: {time_usage}s +- Reviewed files: {files} +- Openai api tokens: {tokens} (${cost:.4f})\n\n +""" + + +REPORT_PR_SUMMARY = """## PR Summary +{pr_summary} + +{pr_changes_summary}\n\n +""" +REPORT_PR_CHANGES_SUMMARY = """ +**Main Change** + +{important_changes} + +**Secondary Change** + +{housekeeping_changes}\n\n +""" + +REPORT_PR_CHANGE_SUMMARY = """{idx}. [{path}]({url})\n\n{summary}\n\n""" + +REPORT_NO_CHANGES = "" + +REPORT_FEEDBACK = """## Suggestions (preview) + +** Suggestions are still under development, please use with caution ** + +{feedback}\n\n +""" diff --git a/examples/github/github_review.py b/examples/github/github_review.py index 78de2b9..c799bdf 100644 --- a/examples/github/github_review.py +++ b/examples/github/github_review.py @@ -29,4 +29,4 @@ event = build_pull_request_event(repository_name_or_id=repository_name_or_id, pull_request_number=pull_request_number) # handle_github_event(event, local=False) -handle_github_event(event, local=True) +handle_github_event(event, local=True, lang="en")