AI Agents in Discussion on documents

In the following section, we present a fictional dialogue between two advanced AI agents — Goodle Gen AI and OpenAI. These agents engage in an analytical discussion about the contents of a shared document. This dialogue format is intended to showcase how collaborative AI systems can interpret and critique complex information in a structured, conversational manner.

from openai import OpenAI
import google.generativeai as genai
import dotenv
dotenv.load_dotenv() 
import os

from google import genai
from google.genai import types
import os

client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))

original_task = """Extract the sections from the file provided without altering it, and use the following json format on the output:
    [
        {
            section_name: <NAME_OF_SECTION>,
            section_short_description: <SECTION_SHORT_DESCRIPTION>,
            start: <SECTION_START_PAGE_NUMBER>,
            end: <SECTION_END_PAGE_NUMBER>,
        }
    ]
"""

import requests
import pathlib

# Download a pdf file
PDF = "https://storage.googleapis.com/generativeai-downloads/data/Smoothly%20editing%20material%20properties%20of%20objects%20with%20text-to-image%20models%20and%20synthetic%20data.pdf"  # @param {type: "string"}
pdf_bytes = requests.get(PDF).content

pdf_path = pathlib.Path('article.pdf')
pdf_path.write_bytes(pdf_bytes)

# Define an OpenAI agent that analyzes files
def openai_file_analyzer(file_path, prompt):
    # Initilize client
    client = OpenAI()
    
    # Get file details
    file = client.files.create(file=open(file_path, "rb"), purpose="user_data")

    # Create Open AI client response by passing document and prompt
    response = client.responses.create(
        model="gpt-4.1",
        input=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "input_file",
                        "file_id": file.id,
                    },
                    {
                        "type": "input_text",
                        "text": prompt,
                    },
                ]
            }
        ]
    )
    return response.output[0].content[0].text

# Analyze text with Gemini 1.5 Flash
def google_genai_file_analyzer(file_path, prompt):
    # Set your Google API Key
    genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))

    # Upload the file using the API
    file_upload = client.files.upload(file=file_path)

    response = client.models.generate_content(model='gemini-2.0-flash-lite', contents=[file_upload,prompt])

    return response.text

print(google_genai_file_analyzer(pdf_path, original_task))

```json
[
  {
    "section_name": "Introduction",
    "section_short_description": "Introduction to Smoothly editing material properties of objects with text-to-image models and synthetic data",
    "start": 1,
    "end": 1
  },
  {
    "section_name": "method",
    "section_short_description": "Describes the methods used in the research",
    "start": 2,
    "end": 2
  },
  {
    "section_name": "Results",
    "section_short_description": "Presents the findings of the research",
    "start": 2,
    "end": 2
  },
  {
    "section_name": "Visual examples of edits",
    "section_short_description": "Visual examples of edits that show the application of the method",
    "start": 3,
    "end": 3
  },
  {
    "section_name": "More results",
    "section_short_description": "More results including images and a user study",
    "start": 4,
    "end": 4
  },
  {
    "section_name": "Applications",
    "section_short_description": "Illustrates the applications of the research",
    "start": 4,
    "end": 4
  },
  {
    "section_name": "Conclusion",
    "section_short_description": "Summarizes the research and its potential",
    "start": 5,
    "end": 5
  },
  {
    "section_name": "Acknowledgements",
    "section_short_description": "Lists the people who helped with the research",
    "start": 5,
    "end": 5
  }
]
```

print(openai_file_analyzer(pdf_path, original_task))

[
    {
        "section_name": "Smoothly editing material properties of objects with text-to-image models and synthetic data",
        "section_short_description": "Introduction of the method and problem statement, describing the challenge of editing material properties in images and an overview of previous approaches.",
        "start": 1,
        "end": 1
    },
    {
        "section_name": "The method",
        "section_short_description": "Description of the synthetic dataset generation, use of traditional computer graphics and rendering, model adaptation (Stable Diffusion 1.5), and training protocol for parametric editing.",
        "start": 2,
        "end": 2
    },
    {
        "section_name": "Results",
        "section_short_description": "Presentation of qualitative results, including examples of edited real-world images, and discussion of the model’s ability to alter material properties while preserving shape and lighting.",
        "start": 2,
        "end": 3
    },
    {
        "section_name": "Smooth editing of material attributes",
        "section_short_description": "Demonstration of smooth control of material properties with varying edit strength, comparison against a baseline, and summary of user study findings.",
        "start": 4,
        "end": 4
    },
    {
        "section_name": "Applications",
        "section_short_description": "Discussion of use-cases, downstream tasks, and the application of material property edits to 3D scene reconstruction (NeRF).",
        "start": 4,
        "end": 5
    },
    {
        "section_name": "Conclusion",
        "section_short_description": "Summary of contributions, limitations, and acknowledgements. Provides links to the paper and project site for more information.",
        "start": 5,
        "end": 5
    }
]