structured-outputs-image.ts 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import { Ollama } from '../../src/index.js';
  2. import { z } from 'zod';
  3. import { zodToJsonSchema } from 'zod-to-json-schema';
  4. import { readFileSync } from 'fs';
  5. import { resolve } from 'path';
  6. import { createInterface } from 'readline';
  7. const ollama = new Ollama();
  8. // Define the schema for image objects
  9. const ObjectSchema = z.object({
  10. name: z.string(),
  11. confidence: z.number(),
  12. attributes: z.record(z.any()).optional()
  13. });
  14. // Define the schema for image description
  15. const ImageDescriptionSchema = z.object({
  16. summary: z.string(),
  17. objects: z.array(ObjectSchema),
  18. scene: z.string(),
  19. colors: z.array(z.string()),
  20. time_of_day: z.enum(['Morning', 'Afternoon', 'Evening', 'Night']),
  21. setting: z.enum(['Indoor', 'Outdoor', 'Unknown']),
  22. text_content: z.string().optional()
  23. });
  24. async function run() {
  25. // Create readline interface for user input
  26. const rl = createInterface({
  27. input: process.stdin,
  28. output: process.stdout
  29. });
  30. // Get path from user input
  31. const path = await new Promise<string>(resolve => {
  32. rl.question('Enter the path to your image: ', resolve);
  33. });
  34. rl.close();
  35. // Verify the file exists and read it
  36. try {
  37. const imagePath = resolve(path);
  38. const imageBuffer = readFileSync(imagePath);
  39. const base64Image = imageBuffer.toString('base64');
  40. // Convert the Zod schema to JSON Schema format
  41. const jsonSchema = zodToJsonSchema(ImageDescriptionSchema);
  42. const messages = [{
  43. role: 'user',
  44. content: 'Analyze this image and return a detailed JSON description including objects, scene, colors and any text detected. If you cannot determine certain details, leave those fields empty.',
  45. images: [base64Image]
  46. }];
  47. const response = await ollama.chat({
  48. model: 'llama3.2-vision',
  49. messages: messages,
  50. format: jsonSchema,
  51. options: {
  52. temperature: 0 // Make responses more deterministic
  53. }
  54. });
  55. // Parse and validate the response
  56. try {
  57. const imageAnalysis = ImageDescriptionSchema.parse(JSON.parse(response.message.content));
  58. console.log('\nImage Analysis:', imageAnalysis, '\n');
  59. } catch (error) {
  60. console.error("Generated invalid response:", error);
  61. }
  62. } catch (error) {
  63. console.error("Error reading image file:", error);
  64. }
  65. }
  66. run().catch(console.error);