@@ -28,29 +28,29 @@ def make_client(api_key=None, async_client=False):
2828 client_class = openai .AsyncOpenAI if async_client else openai .OpenAI
2929 return client_class (api_key = api_key )
3030
31- def reply (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , ** kwargs ):
31+ def reply (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , max_tokens = None , ** kwargs ):
3232 client = make_client (api_key = api_key )
3333 payload = make_payload (query , image = image , prediction = prediction , system = system , history = history )
34- response = client .chat .completions .create (model = model , ** payload , ** kwargs )
34+ response = client .chat .completions .create (model = model , max_completion_tokens = max_tokens , ** payload , ** kwargs )
3535 return response_openai_native (response )
3636
37- async def reply_async (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , ** kwargs ):
37+ async def reply_async (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , max_tokens = None , ** kwargs ):
3838 client = make_client (api_key = api_key , async_client = True )
3939 payload = make_payload (query , image = image , prediction = prediction , system = system , history = history )
40- response = await client .chat .completions .create (model = model , ** payload , ** kwargs )
40+ response = await client .chat .completions .create (model = model , max_completion_tokens = max_tokens , ** payload , ** kwargs )
4141 return response_openai_native (response )
4242
43- def stream (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , ** kwargs ):
43+ def stream (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , max_tokens = None , ** kwargs ):
4444 client = make_client (api_key = api_key )
4545 payload = make_payload (query , image = image , prediction = prediction , system = system , history = history )
46- response = client .chat .completions .create (model = model , stream = True , ** payload , ** kwargs )
46+ response = client .chat .completions .create (model = model , stream = True , max_completion_tokens = max_tokens , ** payload , ** kwargs )
4747 for chunk in response :
4848 yield stream_openai_native (chunk )
4949
50- async def stream_async (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , ** kwargs ):
50+ async def stream_async (query , image = None , history = None , prefill = None , prediction = None , system = DEFAULT_SYSTEM , api_key = None , model = OPENAI_MODEL , max_tokens = None , ** kwargs ):
5151 client = make_client (api_key = api_key , async_client = True )
5252 payload = make_payload (query , image = image , prediction = prediction , system = system , history = history )
53- response = await client .chat .completions .create (model = model , stream = True , ** payload , ** kwargs )
53+ response = await client .chat .completions .create (model = model , stream = True , max_completion_tokens = max_tokens , ** payload , ** kwargs )
5454 async for chunk in response :
5555 yield stream_openai_native (chunk )
5656
0 commit comments