From 794bcf3d16e483cb1552ac80455e4300de7a2cfd Mon Sep 17 00:00:00 2001 From: Mauro ABIDAL CARRER Date: Wed, 29 Nov 2023 16:40:16 +0100 Subject: [PATCH 1/4] resolved rebase conflicts --- src/bot_backend.py | 76 +++++++++++++++++++++++++++++++++++++- src/functional.py | 1 + src/notebook_serializer.py | 40 +++++++++++++++++++- src/response_parser.py | 1 - src/web_ui.py | 4 +- 5 files changed, 117 insertions(+), 5 deletions(-) diff --git a/src/bot_backend.py b/src/bot_backend.py index 1f8b9a1..f2902d8 100644 --- a/src/bot_backend.py +++ b/src/bot_backend.py @@ -5,7 +5,8 @@ import shutil from jupyter_backend import * from typing import * -from notebook_serializer import add_markdown_to_notebook, add_code_cell_to_notebook +from notebook_serializer import add_markdown_to_notebook, add_code_cell_to_notebook, nb +from bs4 import BeautifulSoup functions = [ { @@ -123,6 +124,78 @@ def __init__(self): self._init_api_config() self._init_kwargs_for_chat_completion() + for cell in nb['cells']: + print("======================", cell['cell_type']) + if cell['cell_type'] == 'code': + print("=========") + print(cell['source']) + _, _ = self.jupyter_kernel.execute_code(cell['source']) + self.conversation.append( + {'role': "function", 'name': "python", 'content': cell['source']} + ) + + for output in cell['outputs']: + if output['output_type'] == 'display_data': + for mime_type, output_data in output['data'].items(): + if 'text' in mime_type: + if mime_type == 'text/html': + soup = BeautifulSoup(output_data, 'html.parser') + text_output = soup.get_text().strip() + else: + text_output = output_data + print("=========output data") + print(text_output) + self.conversation.append( + { + "role": "function", + 'name': "python", + "content": text_output, + } + ) + if 'image' in mime_type: + print("=========image") + self.conversation.append( + { + "role": "function", + 'name': "python", + "content": "[image]", + } + ) + if output['output_type'] == 'error': + for tracebak in output['traceback']: + print("=========traceback") + print(text_output) + self.conversation.append( + { + "role": "function", + "name": "tracebak", + "content": tracebak, + } + ) + + if cell['cell_type'] == 'markdown': + source = cell['source'] + print(source) + if source.startswith("##### User:\n"): + stripped_source = source[len("#####User:\n")+1:] + self.conversation.append( + {'role': "user", 'content': stripped_source} + ) + print("=========user") + print(stripped_source) + if source.startswith("##### Assistant:\n"): + stripped_source = source[len("#####Assistant:\n")+1:] + self.conversation.append( + {'role': 'assistant', 'content': stripped_source} + ) + print("=========assistant") + print(stripped_source) + + print("=========================bot conversation") + print(json.dumps(self.conversation, indent=1)) + + + def _init_conversation(self): first_system_msg = {'role': 'system', 'content': system_msg} self.context_window_tokens = 0 # num of tokens actually sent to GPT @@ -175,6 +248,7 @@ def add_text_message(self, user_text): self.revocable_files.clear() self.update_finish_reason(finish_reason='new_input') add_markdown_to_notebook(user_text, title="User") + def add_file_message(self, path, bot_msg): filename = os.path.basename(path) diff --git a/src/functional.py b/src/functional.py index 1caf1ba..8401361 100644 --- a/src/functional.py +++ b/src/functional.py @@ -43,6 +43,7 @@ def chat_completion(bot_backend: BotBackend): assert model_name in config['model_context_window'], \ f"{model_name} lacks context window information. Please check the config.json file." + print(json.dumps(kwargs_for_chat_completion, indent=1)) response = openai.ChatCompletion.create(**kwargs_for_chat_completion) return response diff --git a/src/notebook_serializer.py b/src/notebook_serializer.py index 2d8da56..bb4c96d 100644 --- a/src/notebook_serializer.py +++ b/src/notebook_serializer.py @@ -2,20 +2,56 @@ from nbformat import v4 as nbf import ansi2html import os +import json import argparse # main code parser = argparse.ArgumentParser() parser.add_argument("-n", "--notebook", help="Path to the output notebook", default=None, type=str) args = parser.parse_args() +nb = nbf.new_notebook() + if args.notebook: notebook_path = os.path.join(os.getcwd(), args.notebook) base, ext = os.path.splitext(notebook_path) if ext.lower() != '.ipynb': notebook_path += '.ipynb' + + if os.path.isfile(notebook_path): + with open(notebook_path, 'r') as notebook_file: + nb = nbformat.read(notebook_file, as_version=4) -# Global variable for code cells -nb = nbf.new_notebook() +def desirialize_notebook_into_conv_history(): + history = [] + for cell in nb['cells']: + # Handle markdown + if cell['cell_type'] == 'markdown': + append_to_history(history, cell['source'], cell) + # Handle code + if cell['cell_type'] == 'code': + append_to_history(history, "```python\n" + cell['source'] + "\n```", cell) + # Handle outputs + for output in cell['outputs']: + # print(json.dumps(output, indent=1)) + # Handle display data + if output['output_type'] == 'display_data': + for mime_type, output_data in output['data'].items(): + if 'text' in mime_type: + append_to_history(history, output_data, cell) + # Handle error + if output['output_type'] == 'error': + for tracebak in output['traceback']: + append_to_history(history, ansi_to_html(tracebak), cell) + return history + +def append_to_history(history, obj, cell): + is_from_user = 'author' in cell['metadata'] and cell['metadata']['author'] == 'user' + if is_from_user: + history.append((obj, None)) + else: + history.append((None, obj)) + # print(obj) + # print("=========================") def ansi_to_html(ansi_text): converter = ansi2html.Ansi2HTMLConverter() diff --git a/src/response_parser.py b/src/response_parser.py index 0e13773..3d63741 100644 --- a/src/response_parser.py +++ b/src/response_parser.py @@ -129,7 +129,6 @@ def execute(self, bot_backend: BotBackend, history: List, whether_exit: bool): bot_backend.function_name ](code_str) - # add function call to conversion bot_backend.add_function_call_response_message(function_response=text_to_gpt, save_tokens=True) add_function_response_to_bot_history( diff --git a/src/web_ui.py b/src/web_ui.py index 1e98fa2..01a1479 100644 --- a/src/web_ui.py +++ b/src/web_ui.py @@ -1,4 +1,5 @@ import gradio as gr +from notebook_serializer import desirialize_notebook_into_conv_history from response_parser import * @@ -137,7 +138,8 @@ def bot(state_dict: Dict, history: List) -> List: # UI components state = gr.State(value={"bot_backend": None}) with gr.Tab("Chat"): - chatbot = gr.Chatbot([], elem_id="chatbot", label="Local Code Interpreter", height=750) + history = desirialize_notebook_into_conv_history() + chatbot = gr.Chatbot(history, elem_id="chatbot", label="Local Code Interpreter", height=750) with gr.Row(): with gr.Column(scale=0.85): text_box = gr.Textbox( From ced150ffc2881293c8139f0aa2d90eda785c5b42 Mon Sep 17 00:00:00 2001 From: Mauro ABIDAL CARRER Date: Wed, 29 Nov 2023 16:41:37 +0100 Subject: [PATCH 2/4] resolved rebase conflicts --- src/bot_backend.py | 36 ++++++++++++++++++++---------------- src/jupyter_backend.py | 21 ++++++++++++++------- src/notebook_serializer.py | 4 +--- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/bot_backend.py b/src/bot_backend.py index f2902d8..099ba67 100644 --- a/src/bot_backend.py +++ b/src/bot_backend.py @@ -6,6 +6,7 @@ from jupyter_backend import * from typing import * from notebook_serializer import add_markdown_to_notebook, add_code_cell_to_notebook, nb +from notebook_serializer import notebook_path from bs4 import BeautifulSoup functions = [ @@ -116,7 +117,10 @@ class BotBackend(GPTResponseLog): def __init__(self): super().__init__() self.unique_id = hash(id(self)) - self.jupyter_work_dir = f'cache/work_dir_{self.unique_id}' + if notebook_path: + self.jupyter_work_dir = os.path.dirname(notebook_path) + else: + self.jupyter_work_dir = f'cache/work_dir_{self.unique_id}' self.jupyter_kernel = JupyterKernel(work_dir=self.jupyter_work_dir) self.gpt_model_choice = "GPT-3.5" self.revocable_files = [] @@ -125,10 +129,10 @@ def __init__(self): self._init_kwargs_for_chat_completion() for cell in nb['cells']: - print("======================", cell['cell_type']) + # print("======================", cell['cell_type']) if cell['cell_type'] == 'code': - print("=========") - print(cell['source']) + # print("=========") + # print(cell['source']) _, _ = self.jupyter_kernel.execute_code(cell['source']) self.conversation.append( {'role': "function", 'name': "python", 'content': cell['source']} @@ -143,8 +147,8 @@ def __init__(self): text_output = soup.get_text().strip() else: text_output = output_data - print("=========output data") - print(text_output) + # print("=========output data") + # print(text_output) self.conversation.append( { "role": "function", @@ -153,7 +157,7 @@ def __init__(self): } ) if 'image' in mime_type: - print("=========image") + # print("=========image") self.conversation.append( { "role": "function", @@ -163,8 +167,8 @@ def __init__(self): ) if output['output_type'] == 'error': for tracebak in output['traceback']: - print("=========traceback") - print(text_output) + # print("=========traceback") + # print(text_output) self.conversation.append( { "role": "function", @@ -175,24 +179,24 @@ def __init__(self): if cell['cell_type'] == 'markdown': source = cell['source'] - print(source) + # print(source) if source.startswith("##### User:\n"): stripped_source = source[len("#####User:\n")+1:] self.conversation.append( {'role': "user", 'content': stripped_source} ) - print("=========user") - print(stripped_source) + # print("=========user") + # print(stripped_source) if source.startswith("##### Assistant:\n"): stripped_source = source[len("#####Assistant:\n")+1:] self.conversation.append( {'role': 'assistant', 'content': stripped_source} ) - print("=========assistant") - print(stripped_source) + # print("=========assistant") + # print(stripped_source) - print("=========================bot conversation") - print(json.dumps(self.conversation, indent=1)) + # print("=========================bot conversation") + # print(json.dumps(self.conversation, indent=1)) diff --git a/src/jupyter_backend.py b/src/jupyter_backend.py index c080d8a..4075d72 100644 --- a/src/jupyter_backend.py +++ b/src/jupyter_backend.py @@ -1,6 +1,7 @@ import jupyter_client import re - +import os +from notebook_serializer import notebook_path def delete_color_control_char(string): ansi_escape = re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]') @@ -86,12 +87,18 @@ def execute_code(self, code): return '\n'.join(text_to_gpt), content_to_display def _create_work_dir(self): - # set work dir in jupyter environment - init_code = f"import os\n" \ - f"if not os.path.exists('{self.work_dir}'):\n" \ - f" os.mkdir('{self.work_dir}')\n" \ - f"os.chdir('{self.work_dir}')\n" \ - f"del os" + if notebook_path: + init_code = f""" + import os + os.chdir('{self.work_dir}') + """ + else: + # set work dir in jupyter environment + init_code = f"import os\n" \ + f"if not os.path.exists('{self.work_dir}'):\n" \ + f" os.mkdir('{self.work_dir}')\n" \ + f"os.chdir('{self.work_dir}')\n" \ + f"del os" self.execute_code_(init_code) def restart_jupyter_kernel(self): diff --git a/src/notebook_serializer.py b/src/notebook_serializer.py index bb4c96d..d115630 100644 --- a/src/notebook_serializer.py +++ b/src/notebook_serializer.py @@ -10,6 +10,7 @@ parser.add_argument("-n", "--notebook", help="Path to the output notebook", default=None, type=str) args = parser.parse_args() nb = nbf.new_notebook() +notebook_path = "" if args.notebook: notebook_path = os.path.join(os.getcwd(), args.notebook) @@ -32,7 +33,6 @@ def desirialize_notebook_into_conv_history(): append_to_history(history, "```python\n" + cell['source'] + "\n```", cell) # Handle outputs for output in cell['outputs']: - # print(json.dumps(output, indent=1)) # Handle display data if output['output_type'] == 'display_data': for mime_type, output_data in output['data'].items(): @@ -50,8 +50,6 @@ def append_to_history(history, obj, cell): history.append((obj, None)) else: history.append((None, obj)) - # print(obj) - # print("=========================") def ansi_to_html(ansi_text): converter = ansi2html.Ansi2HTMLConverter() From 14a0b8b1ed304894345e200ed6c049755cf236a9 Mon Sep 17 00:00:00 2001 From: Mauro Abidal Carrer Date: Fri, 24 Nov 2023 18:31:46 +0100 Subject: [PATCH 3/4] Removed comments --- src/bot_backend.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/bot_backend.py b/src/bot_backend.py index 099ba67..bd0b11f 100644 --- a/src/bot_backend.py +++ b/src/bot_backend.py @@ -129,10 +129,7 @@ def __init__(self): self._init_kwargs_for_chat_completion() for cell in nb['cells']: - # print("======================", cell['cell_type']) if cell['cell_type'] == 'code': - # print("=========") - # print(cell['source']) _, _ = self.jupyter_kernel.execute_code(cell['source']) self.conversation.append( {'role': "function", 'name': "python", 'content': cell['source']} @@ -147,8 +144,6 @@ def __init__(self): text_output = soup.get_text().strip() else: text_output = output_data - # print("=========output data") - # print(text_output) self.conversation.append( { "role": "function", @@ -157,7 +152,6 @@ def __init__(self): } ) if 'image' in mime_type: - # print("=========image") self.conversation.append( { "role": "function", @@ -167,8 +161,6 @@ def __init__(self): ) if output['output_type'] == 'error': for tracebak in output['traceback']: - # print("=========traceback") - # print(text_output) self.conversation.append( { "role": "function", @@ -179,24 +171,16 @@ def __init__(self): if cell['cell_type'] == 'markdown': source = cell['source'] - # print(source) if source.startswith("##### User:\n"): stripped_source = source[len("#####User:\n")+1:] self.conversation.append( {'role': "user", 'content': stripped_source} ) - # print("=========user") - # print(stripped_source) if source.startswith("##### Assistant:\n"): stripped_source = source[len("#####Assistant:\n")+1:] self.conversation.append( {'role': 'assistant', 'content': stripped_source} ) - # print("=========assistant") - # print(stripped_source) - - # print("=========================bot conversation") - # print(json.dumps(self.conversation, indent=1)) From 75682bce96923b064a94878fcddcccbcfe5f7263 Mon Sep 17 00:00:00 2001 From: Mauro ABIDAL CARRER Date: Wed, 29 Nov 2023 16:42:57 +0100 Subject: [PATCH 4/4] resolved rebase conflicts --- src/bot_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bot_backend.py b/src/bot_backend.py index bd0b11f..1dae292 100644 --- a/src/bot_backend.py +++ b/src/bot_backend.py @@ -182,6 +182,7 @@ def __init__(self): {'role': 'assistant', 'content': stripped_source} ) + print("conversation:", json.dumps(self.conversation, indent=1)) def _init_conversation(self):