@@ -61,14 +61,22 @@ public void init() {
6161 }
6262 }
6363
64+ /**
65+ * Generate response with default parameters.
66+ */
6467 public String generateResponse (String message , String systemMessage ) {
6568 return generateResponse (message , systemMessage , 150 , 0.7 , 0.9 );
6669 }
6770
6871 public String generateResponse (String message , String systemMessage , int maxTokens , double temperature , double topP ) {
72+ return generateResponse (message , systemMessage , maxTokens , temperature , topP , null );
73+ }
74+
75+ public String generateResponse (String message , String systemMessage , int maxTokens , double temperature , double topP , Long seed ) {
6976 try {
7077 // Create sampler and state like runInstructOnce
71- Sampler sampler = selectSampler (model .configuration ().vocabularySize (), (float ) temperature , (float ) topP , System .currentTimeMillis ());
78+ long actualSeed = seed != null ? seed : System .currentTimeMillis ();
79+ Sampler sampler = selectSampler (model .configuration ().vocabularySize (), (float ) temperature , (float ) topP , actualSeed );
7280 State state = model .createNewState ();
7381
7482 // Use model's ChatFormat
@@ -115,7 +123,6 @@ public String generateResponse(String message, String systemMessage, int maxToke
115123 System .out .printf ("COMPLETED tokens=%d duration=%dms rate=%.1f tok/s%n" ,
116124 generatedTokens .size (), duration , tokensPerSecond );
117125
118-
119126 String responseText = model .tokenizer ().decode (generatedTokens );
120127
121128 // Add reasoning prefix for non-streaming if needed
@@ -132,9 +139,20 @@ public String generateResponse(String message, String systemMessage, int maxToke
132139 }
133140
134141 public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ) {
142+ generateStreamingResponse (message , systemMessage , emitter , 150 , 0.7 , 0.9 );
143+ }
144+
145+ public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ,
146+ int maxTokens , double temperature , double topP ) {
147+ generateStreamingResponse (message , systemMessage , emitter , maxTokens , temperature , topP , null );
148+ }
149+
150+ public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ,
151+ int maxTokens , double temperature , double topP , Long seed ) {
135152 CompletableFuture .runAsync (() -> {
136153 try {
137- Sampler sampler = selectSampler (model .configuration ().vocabularySize (), 0.7f , 0.9f , System .currentTimeMillis ());
154+ long actualSeed = seed != null ? seed : System .currentTimeMillis ();
155+ Sampler sampler = selectSampler (model .configuration ().vocabularySize (), (float ) temperature , (float ) topP , actualSeed );
138156 State state = model .createNewState ();
139157
140158 // Use proper chat format like in runInstructOnce
@@ -164,13 +182,14 @@ public void generateStreamingResponse(String message, String systemMessage, SseE
164182 final int [] tokenCount = {0 };
165183 long startTime = System .currentTimeMillis ();
166184 List <Integer > generatedTokens = model .generateTokens (
167- state , 0 , promptTokens , stopTokens , 150 , sampler , false ,
185+ state , 0 , promptTokens , stopTokens , maxTokens , sampler , false ,
168186 token -> {
169187 try {
170188 // Only display tokens that should be displayed (like in your original)
171189 if (model .tokenizer ().shouldDisplayToken (token )) {
172190 String tokenText = model .tokenizer ().decode (List .of (token ));
173191 emitter .send (SseEmitter .event ().data (tokenText ));
192+ //emitter.send(SseEmitter.event().comment("flush"));
174193 tokenCount [0 ]++;
175194 }
176195 } catch (Exception e ) {
0 commit comments