From 052028658302a6eb8b20327caa981e6e2a795c09 Mon Sep 17 00:00:00 2001
From: kunalkushwahatg <kunalkushwahatg@gmail.com>
Date: Tue, 18 Mar 2025 21:26:35 +0530
Subject: [PATCH 1/6] Implemented ReAct agent structure with two tools

---
 main.py | 317 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 212 insertions(+), 105 deletions(-)

diff --git a/main.py b/main.py
index dc45b8b..a6f11f7 100644
--- a/main.py
+++ b/main.py
@@ -1,128 +1,235 @@
-from langchain.agents import initialize_agent, AgentType
-from langchain.llms import HuggingFaceHub
-from langchain.tools import Tool
-import fitz
-from googleapiclient.discovery import build
 import os
+import re
+import json
+import fitz
+import ast
+import logging
 from dotenv import load_dotenv
+from langchain.agents import create_react_agent, AgentExecutor
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.tools import Tool
+from langchain_core.tools import tool
+from langchain import hub
+from typing import List, Dict
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+
+
 
 # Load environment variables
 load_dotenv()
-
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
+
+# paths
 PDF_PATH = "resume.pdf"
 JOB_DESCRIPTION_PDF_PATH = "JD.pdf"
 
+# Logging setup
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+if not HUGGINGFACEHUB_API_TOKEN:
+    raise ValueError("Hugging Face API token is missing.")
+
+if not YOUTUBE_API_KEY:
+    raise ValueError("YouTube API key is missing. Please set YOUTUBE_API_KEY in your .env file.")
+
+
 # Initialize Llama 3.2 model from Hugging Face Hub
-llama_llm = HuggingFaceHub(
-    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",  # Adjust if needed for Llama 3.2
-    model_kwargs={"temperature": 0.7, "max_length": 500},
-    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
+llama_llm = HuggingFaceEndpoint(
+    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
+    temperature=0.5,
+    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
 )
 
-def extract_text_from_pdf(pdf_path):
-    """Extract text from a PDF using PyMuPDF."""
-    doc = fitz.open(pdf_path)
-    text = ""
-    for page in doc:
-        text += page.get_text("text")
-    return text
-
-def analyze_resume_with_llama(combined_text: str):
-    """Use Llama 3.2 to analyze a resume against a job description."""
-    analysis_prompt = f"""
-    Based on the resume and job description below, provide EXACTLY 3 specific areas for improvement.
-    Prioritize technical skills over managerial or soft skills.
-    If there are no more technical skills to improve, then suggest managerial or soft skills.
-    
-    Format each area as a new line starting with '- IMPROVE: '
-    Focus only on listing the improvement areas, do not provide any other analysis.
+#initialize youtube api
+youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
+
+def extract_json_from_text(text: str) -> Dict:
+    """Extract and parse JSON from a text string safely."""
+    try:
+        match = re.search(r"\{.*\}", text, re.DOTALL)  # Extract JSON block
+        if match:
+            json_string = match.group(0)  
+            return json.loads(json_string)  # Parse JSON
+    except json.JSONDecodeError as e:
+        logger.error(f"Failed to parse JSON: {e}")
     
-    {combined_text}
+    logger.error("No valid JSON found in the text.")
+    return {}
+
+
+def extract_text_from_pdf(pdf_path: str) -> str:
+    """Extract text from a PDF using PyMuPDF."""
+    # Check if file exists before processing 
+    if not os.path.exists(pdf_path):
+        logging.error(f"File not found: {pdf_path}")
+        return ""
+    try:
+        with fitz.open(pdf_path) as doc:
+            text = "\n".join(page.get_text("text") for page in doc)
+        return text
+    except Exception as e:
+        logging.error(f"Error extracting text from {pdf_path}: {e}")
+        return "" 
+
+
+def combine_text(resume_text: str, job_description_text: str) -> str:
+    """Combine resume and job description text into a single formatted string."""
+    return f"Resume: {resume_text}\n\nJob Description: {job_description_text}"
+
+
+def extract_list_from_text(text: str) -> list | None:
+    """Extract a Python list from text using regex and safe parsing."""
+    match = re.search(r"\[.*?\]", text)
+    if match:
+        try:
+            extracted_list = ast.literal_eval(match.group(0))
+            return extracted_list if isinstance(extracted_list, list) else None
+        except (SyntaxError, ValueError):
+            logging.warning("Failed to parse list from text.")
+    return None
+
+@tool
+def analyze_resume(combined_text: str) -> List[Dict[str, str]]:
     """
-    return llama_llm.predict(analysis_prompt)
-
-def extract_improvement_areas(analysis_text):
-    """Extract improvement areas from the analysis text."""
-    improvements = []
-    for line in analysis_text.split('\n'):
-        if line.strip().startswith('- IMPROVE:'):
-            improvement = line.replace('- IMPROVE:', '').strip()
-            if improvement:
-                improvements.append(improvement)
-    return improvements
-
-def search_youtube_videos(query):
-    """Search YouTube for videos related to the improvement areas."""
-    youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
-    request = youtube.search().list(
-        part="snippet",
-        q=query,
-        maxResults=3
-    )
-    response = request.execute()
-    
-    video_urls = []
-    for item in response.get("items", []):
-        if "videoId" in item["id"]:
-            video_urls.append(f"https://www.youtube.com/watch?v={item['id']['videoId']}")
+    Analyze a resume against a job description and return 3 technical improvement areas.
+    """
+    prompt = f"""
+Analyze the following resume and job description to identify **EXACTLY 3 key technical improvement areas**.  
+Focus strictly on **technical skills**, not managerial or soft skills.  
+
+### Input:  
+{combined_text}  
+
+### Output Format (STRICTLY FOLLOW THIS):  
+Return **only one valid JSON object** without any extra text, explanations, or multiple outputs.  
+Ensure the JSON structure exactly matches the format below:
+
+{{
+    "Improvement Areas": [
+        {{
+            "Title": "Improvement Area 1",
+            "Description": "Brief description of the first improvement area."
+        }},
+        {{
+            "Title": "Improvement Area 2",
+            "Description": "Brief description of the second improvement area."
+        }},
+        {{
+            "Title": "Improvement Area 3",
+            "Description": "Brief description of the third improvement area."
+        }}
+    ]
+}}
+"""
+
+    try:
+        analysis = llama_llm.invoke(prompt) 
+        improvement_areas = extract_json_from_text(analysis).get("Improvement Areas", [])
+        
+        if not improvement_areas:
+            logger.warning("No improvement areas found in the analysis.")
+            return []
+
+        return improvement_areas
     
-    return video_urls
+    except Exception as e:
+        logger.error(f"Error in resume analysis: {e}")
+        return [{"Title": "Error", "Description": "Failed to analyze resume."}]
+
+
+
+@tool
+def search_youtube_videos(queries: List[str]) -> Dict[str, List[str]]:
+    """
+    Search YouTube for videos related to the given queries and return links to relevant videos.
+    """
+    recommended_videos = {}
+
+    for query in queries:
+        try:
+            request = youtube.search().list(
+                part="snippet", q=query, maxResults=10 # get top 7 results may contain video or playlist
+            )
+            response = request.execute()
+
+            video_urls = [
+                f"https://www.youtube.com/watch?v={item['id']['videoId']}"
+                for item in response.get("items", [])
+                if "videoId" in item["id"]
+            ]
+
+            if not video_urls:
+                logging.warning(f"No video results for query: {query}")
+                continue  # Skip retrying the exact same query
+
+            # Get top 3 videos out of the 10 results
+            recommended_videos[query] = video_urls[:3]
+
+        except HttpError as e:
+            logging.error(f"YouTube API error for query '{query}': {e}")
+            recommended_videos[query] = ["YouTube API error occurred"]
+
+        except Exception as e:
+            logging.error(f"Unexpected error for query '{query}': {e}")
+            recommended_videos[query] = ["An error occurred"]
+
+    return recommended_videos
+
+
+def create_tools(combined_text: str) -> list:
+    """Create Langchain tools for resume analysis and YouTube search."""
+    return [
+        Tool(
+            name="analyze_resume",
+            func=lambda _: analyze_resume(combined_text),
+            description="Analyze a resume against a job description and return improvement areas.",
+        ),
+        Tool(
+            name="search_youtube_videos",
+            func=lambda queries: search_youtube_videos({"queries": extract_list_from_text(queries)}),
+            description="Search YouTube for videos related to the improvement areas.",
+        ),
+    ]
+
 
 def main():
+    """Main function to run the AI agent."""
     try:
-        print("Extracting text from PDFs...")
         resume_text = extract_text_from_pdf(PDF_PATH)
         job_description_text = extract_text_from_pdf(JOB_DESCRIPTION_PDF_PATH)
-        
-        analysis_prompt = f"""
-        You are an AI that evaluates resumes based on job descriptions.
-        
-        Resume:
-        {resume_text}
-        
-        Job Description:
-        {job_description_text}
-        
-        Provide EXACTLY 3 specific areas for improvement.
-        Prioritize technical skills first.
-        Each area must be on a new line starting with '- IMPROVE: '
-        """
-        
-        print("\nAnalyzing resume...")
-        improvements = analyze_resume_with_llama(analysis_prompt)
-        improvement_areas = extract_improvement_areas(improvements)
-        
-        if not improvement_areas:
-            print("\nNo specific improvement areas identified. Retrying with a stricter prompt...")
-            retry_prompt = f"""
-            Reanalyze this resume and job description.
-            List EXACTLY 3 areas for improvement, prioritizing technical skills first.
-            Each area MUST start with '- IMPROVE: '
-            
-            Resume: {resume_text}
-            Job Description: {job_description_text}
-            """
-            improvements = analyze_resume_with_llama(retry_prompt)
-            improvement_areas = extract_improvement_areas(improvements)
-        
-        print("\nStored Improvement Areas:")
-        for area in improvement_areas:
-            print(f"- {area}")
-        
-        print("\nSearching YouTube for relevant videos...")
-        video_recommendations = {}
-        for area in improvement_areas:
-            video_recommendations[area] = search_youtube_videos(area)
-        
-        for area, videos in video_recommendations.items():
-            print(f"\nVideos for improvement area: {area}")
-            for video in videos:
-                print(video)
-        
+        combined_text = combine_text(resume_text, job_description_text)
+
+        tools = create_tools(combined_text)
+
+        #load react template from langchain hub
+        prompt_template = hub.pull("hwchase17/react")
+
+        react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template)
+        agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=False, return_intermediate_steps=True)
+
+        query = (
+            "You are an AI assistant that evaluates resumes based on job descriptions. "
+            "Use the analyze_resume tool to analyze the combined resume and job description text, "
+            "and search YouTube for relevant resources using the search_youtube_videos tool. "
+            "Provide short descriptions of the improvement areas as the final answer."
+        )
+
+        response = agent_executor.invoke({"input": query})
+        intermediate_steps = response["intermediate_steps"]
+
+        improvement_areas = intermediate_steps[0][1]
+        youtube_links = intermediate_steps[1][1]
+
+        print("\nIMPROVEMENT AREAS:\n", improvement_areas)
+        print("\nYOUTUBE LINKS:\n", youtube_links)
+
     except Exception as e:
-        print(f"\nAn error occurred: {str(e)}")
-        raise
+        logging.error("An error occurred: %s", str(e))
+
 
-main()
\ No newline at end of file
+if __name__ == "__main__":
+    main()

From cb9dd4763c7aacd563d6707258aaa7ea1148657d Mon Sep 17 00:00:00 2001
From: kunalkushwahatg <kunalkushwahatg@gmail.com>
Date: Tue, 18 Mar 2025 22:54:13 +0530
Subject: [PATCH 2/6] Implemented ReAct agent structure with two tools

---
 main.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/main.py b/main.py
index a6f11f7..d665831 100644
--- a/main.py
+++ b/main.py
@@ -8,7 +8,6 @@
 from langchain.agents import create_react_agent, AgentExecutor
 from langchain_huggingface import HuggingFaceEndpoint
 from langchain_core.tools import Tool
-from langchain_core.tools import tool
 from langchain import hub
 from typing import List, Dict
 from googleapiclient.discovery import build
@@ -23,7 +22,7 @@
 
 # paths
 PDF_PATH = "resume.pdf"
-JOB_DESCRIPTION_PDF_PATH = "JD.pdf"
+JOB_DESCRIPTION_PDF_PATH = "jd2.pdf"
 
 # Logging setup
 logging.basicConfig(level=logging.INFO)
@@ -92,7 +91,7 @@ def extract_list_from_text(text: str) -> list | None:
             logging.warning("Failed to parse list from text.")
     return None
 
-@tool
+
 def analyze_resume(combined_text: str) -> List[Dict[str, str]]:
     """
     Analyze a resume against a job description and return 3 technical improvement areas.
@@ -142,7 +141,7 @@ def analyze_resume(combined_text: str) -> List[Dict[str, str]]:
 
 
 
-@tool
+
 def search_youtube_videos(queries: List[str]) -> Dict[str, List[str]]:
     """
     Search YouTube for videos related to the given queries and return links to relevant videos.
@@ -190,7 +189,7 @@ def create_tools(combined_text: str) -> list:
         ),
         Tool(
             name="search_youtube_videos",
-            func=lambda queries: search_youtube_videos({"queries": extract_list_from_text(queries)}),
+            func=lambda queries: search_youtube_videos(extract_list_from_text(queries)),
             description="Search YouTube for videos related to the improvement areas.",
         ),
     ]
@@ -209,7 +208,7 @@ def main():
         prompt_template = hub.pull("hwchase17/react")
 
         react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template)
-        agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=False, return_intermediate_steps=True)
+        agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True)
 
         query = (
             "You are an AI assistant that evaluates resumes based on job descriptions. "

From 6991beec3fc987554b873c8b1836260af994fb62 Mon Sep 17 00:00:00 2001
From: kunalkushwahatg <kunalkushwahatg@gmail.com>
Date: Thu, 20 Mar 2025 19:31:23 +0530
Subject: [PATCH 3/6] main

---
 main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.py b/main.py
index d665831..296fb75 100644
--- a/main.py
+++ b/main.py
@@ -15,13 +15,14 @@
 
 
 
+
 # Load environment variables
 load_dotenv()
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
 
 # paths
-PDF_PATH = "resume.pdf"
+PDF_PATH = "kunal_nitr_resume.pdf"
 JOB_DESCRIPTION_PDF_PATH = "jd2.pdf"
 
 # Logging setup

From 7262fa6438caf7c37cb52f429ab3ee5b6db2ca47 Mon Sep 17 00:00:00 2001
From: kunalkushwahatg <kunalkushwahatg@gmail.com>
Date: Fri, 21 Mar 2025 01:16:03 +0530
Subject: [PATCH 4/6] added flask endpoint

---
 main.py | 91 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/main.py b/main.py
index 296fb75..067c6a5 100644
--- a/main.py
+++ b/main.py
@@ -12,18 +12,15 @@
 from typing import List, Dict
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
-
-
-
+from flask import Flask, request, jsonify
 
 # Load environment variables
 load_dotenv()
 HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
 
-# paths
-PDF_PATH = "kunal_nitr_resume.pdf"
-JOB_DESCRIPTION_PDF_PATH = "jd2.pdf"
+#flask app setup
+app = Flask(__name__)
 
 # Logging setup
 logging.basicConfig(level=logging.INFO)
@@ -47,6 +44,9 @@
 #initialize youtube api
 youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
 
+#load react template from langchain hub
+prompt_template = hub.pull("hwchase17/react")
+
 def extract_json_from_text(text: str) -> Dict:
     """Extract and parse JSON from a text string safely."""
     try:
@@ -61,19 +61,15 @@ def extract_json_from_text(text: str) -> Dict:
     return {}
 
 
-def extract_text_from_pdf(pdf_path: str) -> str:
-    """Extract text from a PDF using PyMuPDF."""
-    # Check if file exists before processing 
-    if not os.path.exists(pdf_path):
-        logging.error(f"File not found: {pdf_path}")
-        return ""
+def extract_text_from_pdf(pdf_stream):
+    """Extract text from a PDF file stream using PyMuPDF."""
     try:
-        with fitz.open(pdf_path) as doc:
-            text = "\n".join(page.get_text("text") for page in doc)
+        doc = fitz.open(stream=pdf_stream, filetype="pdf")
+        text = "\n".join(page.get_text("text") for page in doc)
         return text
     except Exception as e:
-        logging.error(f"Error extracting text from {pdf_path}: {e}")
-        return "" 
+        logging.error(f"Error extracting text from PDF stream: {e}")
+        return ""
 
 
 def combine_text(resume_text: str, job_description_text: str) -> str:
@@ -195,41 +191,50 @@ def create_tools(combined_text: str) -> list:
         ),
     ]
 
+@app.route('/analyze', methods=['POST'])
+def analyze():
+    if request.method == 'POST':
+        if 'resume' not in request.files or 'jd' not in request.files:
+            return jsonify({"error": "Both resume and job description PDFs are required"}), 400
+        resume_file = request.files['resume']
+        jd_file = request.files['jd']
 
-def main():
-    """Main function to run the AI agent."""
-    try:
-        resume_text = extract_text_from_pdf(PDF_PATH)
-        job_description_text = extract_text_from_pdf(JOB_DESCRIPTION_PDF_PATH)
-        combined_text = combine_text(resume_text, job_description_text)
-
-        tools = create_tools(combined_text)
+        if resume_file.filename == '' or jd_file.filename == '':
+            return jsonify({"error": "Files must have valid names"}), 400
 
-        #load react template from langchain hub
-        prompt_template = hub.pull("hwchase17/react")
+        try:
+            resume_text = extract_text_from_pdf(resume_file.read())
+            job_description_text = extract_text_from_pdf(jd_file.read())
 
-        react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template)
-        agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True)
+            combined_text = combine_text(resume_text, job_description_text)
+            tools = create_tools(combined_text)
 
-        query = (
-            "You are an AI assistant that evaluates resumes based on job descriptions. "
-            "Use the analyze_resume tool to analyze the combined resume and job description text, "
-            "and search YouTube for relevant resources using the search_youtube_videos tool. "
-            "Provide short descriptions of the improvement areas as the final answer."
-        )
+            react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template)
+            agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True)
 
-        response = agent_executor.invoke({"input": query})
-        intermediate_steps = response["intermediate_steps"]
+            query = (
+                "You are an AI assistant that evaluates resumes based on job descriptions. "
+                "Use the analyze_resume tool to analyze the combined resume and job description text, "
+                "and search YouTube for relevant resources using the search_youtube_videos tool. "
+                "Provide short descriptions of the improvement areas as the final answer."
+            )
 
-        improvement_areas = intermediate_steps[0][1]
-        youtube_links = intermediate_steps[1][1]
+            response = agent_executor.invoke({"input": query})
+            intermediate_steps = response["intermediate_steps"]
 
-        print("\nIMPROVEMENT AREAS:\n", improvement_areas)
-        print("\nYOUTUBE LINKS:\n", youtube_links)
+            improvement_areas = intermediate_steps[0][1]
+            youtube_links = intermediate_steps[1][1]
 
-    except Exception as e:
-        logging.error("An error occurred: %s", str(e))
+            return jsonify({
+                "improvement_areas": improvement_areas,
+                "youtube_links": youtube_links
+            })
 
+        except Exception as e:
+            logger.error(f"Error during analysis: {e}")
+            return jsonify({"error": str(e)}), 500
 
 if __name__ == "__main__":
-    main()
+    app.run(debug=False, host='0.0.0.0', port=5000)
+
+

From 8f667c0d7bb0888b4e1091c3fff045d76cb6d466 Mon Sep 17 00:00:00 2001
From: kunalkushwahatg <kunalkushwahatg@gmail.com>
Date: Fri, 21 Mar 2025 01:20:33 +0530
Subject: [PATCH 5/6] global variable fix

---
 main.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/main.py b/main.py
index 067c6a5..c817f42 100644
--- a/main.py
+++ b/main.py
@@ -47,6 +47,14 @@
 #load react template from langchain hub
 prompt_template = hub.pull("hwchase17/react")
 
+#query for the react agent
+query = (
+                "You are an AI assistant that evaluates resumes based on job descriptions. "
+                "Use the analyze_resume tool to analyze the combined resume and job description text, "
+                "and search YouTube for relevant resources using the search_youtube_videos tool. "
+                "Provide short descriptions of the improvement areas as the final answer."
+            )
+
 def extract_json_from_text(text: str) -> Dict:
     """Extract and parse JSON from a text string safely."""
     try:
@@ -210,14 +218,7 @@ def analyze():
             tools = create_tools(combined_text)
 
             react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template)
-            agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True)
-
-            query = (
-                "You are an AI assistant that evaluates resumes based on job descriptions. "
-                "Use the analyze_resume tool to analyze the combined resume and job description text, "
-                "and search YouTube for relevant resources using the search_youtube_videos tool. "
-                "Provide short descriptions of the improvement areas as the final answer."
-            )
+            agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True)            
 
             response = agent_executor.invoke({"input": query})
             intermediate_steps = response["intermediate_steps"]

From d008a33cd8ca49c4ecf4abac12c9871b73d54571 Mon Sep 17 00:00:00 2001
From: kunalkushwahatg <kunalkushwahatg@gmail.com>
Date: Fri, 21 Mar 2025 01:31:45 +0530
Subject: [PATCH 6/6] added flask in requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 2a1fb23..818cb4e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -79,3 +79,4 @@ uritemplate==4.1.1
 urllib3==2.3.0
 yarl==1.18.3
 zstandard==0.23.0
+flask==3.1.0