Browse Source

add support for thinking levels

For supported models (currently only the gpt-oss models), we allow
passing in strings for the thinking level. As more models add support
for this sort of thing, we'll probably relax which strings can be
provided
Devon Rifkin 1 month ago
parent
commit
c84c15bb2f
3 changed files with 40 additions and 4 deletions
  1. 2 2
      README.md
  2. 36 0
      examples/thinking/thinking-levels.ts
  3. 2 2
      src/interfaces.ts

+ 2 - 2
README.md

@@ -66,7 +66,7 @@ ollama.chat(request)
     - `tool_name` `<string>`: (Optional) Add the name of the tool that was executed to inform the model of the result 
   - `format` `<string>`: (Optional) Set the expected format of the response (`json`).
   - `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
-  - `think` `<boolean>`: (Optional) When true, the model will think about the response before responding. Requires thinking support from the model.
+  - `think` `<boolean | "high" | "medium" | "low">`: (Optional) Enable model thinking. Use `true`/`false` or specify a level. Requires model support.
   - `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
   - `tools` `<Tool[]>`: (Optional) A list of tool calls the model may make.
   - `options` `<Options>`: (Optional) Options to configure the runtime.
@@ -89,7 +89,7 @@ ollama.generate(request)
   - `images` `<Uint8Array[] | string[]>`: (Optional) Images to be included, either as Uint8Array or base64 encoded strings.
   - `format` `<string>`: (Optional) Set the expected format of the response (`json`).
   - `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
-  - `think` `<boolean>`: (Optional) When true, the model will think about the response before responding. Requires thinking support from the model.
+  - `think` `<boolean | "high" | "medium" | "low">`: (Optional) Enable model thinking. Use `true`/`false` or specify a level. Requires model support.
   - `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
   - `options` `<Options>`: (Optional) Options to configure the runtime.
 - Returns: `<GenerateResponse>`

+ 36 - 0
examples/thinking/thinking-levels.ts

@@ -0,0 +1,36 @@
+import ollama from 'ollama'
+
+function printHeading(text: string) {
+  console.log(text)
+  console.log('='.repeat(text.length))
+}
+
+async function main() {
+  const messages = [{ role: 'user', content: 'What is 10 + 23?' }]
+
+  // gpt-oss supports 'low', 'medium', 'high'
+  const thinkingLevels = ['low', 'medium', 'high'] as const
+
+  for (const [index, level] of thinkingLevels.entries()) {
+    const response = await ollama.chat({
+      model: 'gpt-oss:20b',
+      messages,
+      think: level,
+    })
+
+    printHeading(`Thinking (${level})`)
+    console.log(response.message.thinking ?? '')
+    console.log('\n')
+
+    printHeading('Response')
+    console.log(response.message.content)
+    console.log('\n')
+
+    if (index < thinkingLevels.length - 1) {
+      console.log('-'.repeat(20))
+      console.log('\n')
+    }
+  }
+}
+
+main()

+ 2 - 2
src/interfaces.ts

@@ -56,7 +56,7 @@ export interface GenerateRequest {
   format?: string | object
   images?: Uint8Array[] | string[]
   keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
-  think?: boolean
+  think?: boolean | 'high' | 'medium' | 'low'
 
   options?: Partial<Options>
 }
@@ -109,7 +109,7 @@ export interface ChatRequest {
   format?: string | object
   keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
   tools?: Tool[]
-  think?: boolean
+  think?: boolean | 'high' | 'medium' | 'low'
 
   options?: Partial<Options>
 }