From 052028658302a6eb8b20327caa981e6e2a795c09 Mon Sep 17 00:00:00 2001 From: kunalkushwahatg Date: Tue, 18 Mar 2025 21:26:35 +0530 Subject: [PATCH 1/6] Implemented ReAct agent structure with two tools --- main.py | 317 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 212 insertions(+), 105 deletions(-) diff --git a/main.py b/main.py index dc45b8b..a6f11f7 100644 --- a/main.py +++ b/main.py @@ -1,128 +1,235 @@ -from langchain.agents import initialize_agent, AgentType -from langchain.llms import HuggingFaceHub -from langchain.tools import Tool -import fitz -from googleapiclient.discovery import build import os +import re +import json +import fitz +import ast +import logging from dotenv import load_dotenv +from langchain.agents import create_react_agent, AgentExecutor +from langchain_huggingface import HuggingFaceEndpoint +from langchain_core.tools import Tool +from langchain_core.tools import tool +from langchain import hub +from typing import List, Dict +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + + # Load environment variables load_dotenv() - HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") + +# paths PDF_PATH = "resume.pdf" JOB_DESCRIPTION_PDF_PATH = "JD.pdf" +# Logging setup +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +if not HUGGINGFACEHUB_API_TOKEN: + raise ValueError("Hugging Face API token is missing.") + +if not YOUTUBE_API_KEY: + raise ValueError("YouTube API key is missing. Please set YOUTUBE_API_KEY in your .env file.") + + # Initialize Llama 3.2 model from Hugging Face Hub -llama_llm = HuggingFaceHub( - repo_id="meta-llama/Meta-Llama-3-8B-Instruct", # Adjust if needed for Llama 3.2 - model_kwargs={"temperature": 0.7, "max_length": 500}, - huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN +llama_llm = HuggingFaceEndpoint( + repo_id="meta-llama/Meta-Llama-3-8B-Instruct", + temperature=0.5, + huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN, ) -def extract_text_from_pdf(pdf_path): - """Extract text from a PDF using PyMuPDF.""" - doc = fitz.open(pdf_path) - text = "" - for page in doc: - text += page.get_text("text") - return text - -def analyze_resume_with_llama(combined_text: str): - """Use Llama 3.2 to analyze a resume against a job description.""" - analysis_prompt = f""" - Based on the resume and job description below, provide EXACTLY 3 specific areas for improvement. - Prioritize technical skills over managerial or soft skills. - If there are no more technical skills to improve, then suggest managerial or soft skills. - - Format each area as a new line starting with '- IMPROVE: ' - Focus only on listing the improvement areas, do not provide any other analysis. +#initialize youtube api +youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY) + +def extract_json_from_text(text: str) -> Dict: + """Extract and parse JSON from a text string safely.""" + try: + match = re.search(r"\{.*\}", text, re.DOTALL) # Extract JSON block + if match: + json_string = match.group(0) + return json.loads(json_string) # Parse JSON + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON: {e}") - {combined_text} + logger.error("No valid JSON found in the text.") + return {} + + +def extract_text_from_pdf(pdf_path: str) -> str: + """Extract text from a PDF using PyMuPDF.""" + # Check if file exists before processing + if not os.path.exists(pdf_path): + logging.error(f"File not found: {pdf_path}") + return "" + try: + with fitz.open(pdf_path) as doc: + text = "\n".join(page.get_text("text") for page in doc) + return text + except Exception as e: + logging.error(f"Error extracting text from {pdf_path}: {e}") + return "" + + +def combine_text(resume_text: str, job_description_text: str) -> str: + """Combine resume and job description text into a single formatted string.""" + return f"Resume: {resume_text}\n\nJob Description: {job_description_text}" + + +def extract_list_from_text(text: str) -> list | None: + """Extract a Python list from text using regex and safe parsing.""" + match = re.search(r"\[.*?\]", text) + if match: + try: + extracted_list = ast.literal_eval(match.group(0)) + return extracted_list if isinstance(extracted_list, list) else None + except (SyntaxError, ValueError): + logging.warning("Failed to parse list from text.") + return None + +@tool +def analyze_resume(combined_text: str) -> List[Dict[str, str]]: """ - return llama_llm.predict(analysis_prompt) - -def extract_improvement_areas(analysis_text): - """Extract improvement areas from the analysis text.""" - improvements = [] - for line in analysis_text.split('\n'): - if line.strip().startswith('- IMPROVE:'): - improvement = line.replace('- IMPROVE:', '').strip() - if improvement: - improvements.append(improvement) - return improvements - -def search_youtube_videos(query): - """Search YouTube for videos related to the improvement areas.""" - youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY) - request = youtube.search().list( - part="snippet", - q=query, - maxResults=3 - ) - response = request.execute() - - video_urls = [] - for item in response.get("items", []): - if "videoId" in item["id"]: - video_urls.append(f"https://www.youtube.com/watch?v={item['id']['videoId']}") + Analyze a resume against a job description and return 3 technical improvement areas. + """ + prompt = f""" +Analyze the following resume and job description to identify **EXACTLY 3 key technical improvement areas**. +Focus strictly on **technical skills**, not managerial or soft skills. + +### Input: +{combined_text} + +### Output Format (STRICTLY FOLLOW THIS): +Return **only one valid JSON object** without any extra text, explanations, or multiple outputs. +Ensure the JSON structure exactly matches the format below: + +{{ + "Improvement Areas": [ + {{ + "Title": "Improvement Area 1", + "Description": "Brief description of the first improvement area." + }}, + {{ + "Title": "Improvement Area 2", + "Description": "Brief description of the second improvement area." + }}, + {{ + "Title": "Improvement Area 3", + "Description": "Brief description of the third improvement area." + }} + ] +}} +""" + + try: + analysis = llama_llm.invoke(prompt) + improvement_areas = extract_json_from_text(analysis).get("Improvement Areas", []) + + if not improvement_areas: + logger.warning("No improvement areas found in the analysis.") + return [] + + return improvement_areas - return video_urls + except Exception as e: + logger.error(f"Error in resume analysis: {e}") + return [{"Title": "Error", "Description": "Failed to analyze resume."}] + + + +@tool +def search_youtube_videos(queries: List[str]) -> Dict[str, List[str]]: + """ + Search YouTube for videos related to the given queries and return links to relevant videos. + """ + recommended_videos = {} + + for query in queries: + try: + request = youtube.search().list( + part="snippet", q=query, maxResults=10 # get top 7 results may contain video or playlist + ) + response = request.execute() + + video_urls = [ + f"https://www.youtube.com/watch?v={item['id']['videoId']}" + for item in response.get("items", []) + if "videoId" in item["id"] + ] + + if not video_urls: + logging.warning(f"No video results for query: {query}") + continue # Skip retrying the exact same query + + # Get top 3 videos out of the 10 results + recommended_videos[query] = video_urls[:3] + + except HttpError as e: + logging.error(f"YouTube API error for query '{query}': {e}") + recommended_videos[query] = ["YouTube API error occurred"] + + except Exception as e: + logging.error(f"Unexpected error for query '{query}': {e}") + recommended_videos[query] = ["An error occurred"] + + return recommended_videos + + +def create_tools(combined_text: str) -> list: + """Create Langchain tools for resume analysis and YouTube search.""" + return [ + Tool( + name="analyze_resume", + func=lambda _: analyze_resume(combined_text), + description="Analyze a resume against a job description and return improvement areas.", + ), + Tool( + name="search_youtube_videos", + func=lambda queries: search_youtube_videos({"queries": extract_list_from_text(queries)}), + description="Search YouTube for videos related to the improvement areas.", + ), + ] + def main(): + """Main function to run the AI agent.""" try: - print("Extracting text from PDFs...") resume_text = extract_text_from_pdf(PDF_PATH) job_description_text = extract_text_from_pdf(JOB_DESCRIPTION_PDF_PATH) - - analysis_prompt = f""" - You are an AI that evaluates resumes based on job descriptions. - - Resume: - {resume_text} - - Job Description: - {job_description_text} - - Provide EXACTLY 3 specific areas for improvement. - Prioritize technical skills first. - Each area must be on a new line starting with '- IMPROVE: ' - """ - - print("\nAnalyzing resume...") - improvements = analyze_resume_with_llama(analysis_prompt) - improvement_areas = extract_improvement_areas(improvements) - - if not improvement_areas: - print("\nNo specific improvement areas identified. Retrying with a stricter prompt...") - retry_prompt = f""" - Reanalyze this resume and job description. - List EXACTLY 3 areas for improvement, prioritizing technical skills first. - Each area MUST start with '- IMPROVE: ' - - Resume: {resume_text} - Job Description: {job_description_text} - """ - improvements = analyze_resume_with_llama(retry_prompt) - improvement_areas = extract_improvement_areas(improvements) - - print("\nStored Improvement Areas:") - for area in improvement_areas: - print(f"- {area}") - - print("\nSearching YouTube for relevant videos...") - video_recommendations = {} - for area in improvement_areas: - video_recommendations[area] = search_youtube_videos(area) - - for area, videos in video_recommendations.items(): - print(f"\nVideos for improvement area: {area}") - for video in videos: - print(video) - + combined_text = combine_text(resume_text, job_description_text) + + tools = create_tools(combined_text) + + #load react template from langchain hub + prompt_template = hub.pull("hwchase17/react") + + react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template) + agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=False, return_intermediate_steps=True) + + query = ( + "You are an AI assistant that evaluates resumes based on job descriptions. " + "Use the analyze_resume tool to analyze the combined resume and job description text, " + "and search YouTube for relevant resources using the search_youtube_videos tool. " + "Provide short descriptions of the improvement areas as the final answer." + ) + + response = agent_executor.invoke({"input": query}) + intermediate_steps = response["intermediate_steps"] + + improvement_areas = intermediate_steps[0][1] + youtube_links = intermediate_steps[1][1] + + print("\nIMPROVEMENT AREAS:\n", improvement_areas) + print("\nYOUTUBE LINKS:\n", youtube_links) + except Exception as e: - print(f"\nAn error occurred: {str(e)}") - raise + logging.error("An error occurred: %s", str(e)) + -main() \ No newline at end of file +if __name__ == "__main__": + main() From cb9dd4763c7aacd563d6707258aaa7ea1148657d Mon Sep 17 00:00:00 2001 From: kunalkushwahatg Date: Tue, 18 Mar 2025 22:54:13 +0530 Subject: [PATCH 2/6] Implemented ReAct agent structure with two tools --- main.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index a6f11f7..d665831 100644 --- a/main.py +++ b/main.py @@ -8,7 +8,6 @@ from langchain.agents import create_react_agent, AgentExecutor from langchain_huggingface import HuggingFaceEndpoint from langchain_core.tools import Tool -from langchain_core.tools import tool from langchain import hub from typing import List, Dict from googleapiclient.discovery import build @@ -23,7 +22,7 @@ # paths PDF_PATH = "resume.pdf" -JOB_DESCRIPTION_PDF_PATH = "JD.pdf" +JOB_DESCRIPTION_PDF_PATH = "jd2.pdf" # Logging setup logging.basicConfig(level=logging.INFO) @@ -92,7 +91,7 @@ def extract_list_from_text(text: str) -> list | None: logging.warning("Failed to parse list from text.") return None -@tool + def analyze_resume(combined_text: str) -> List[Dict[str, str]]: """ Analyze a resume against a job description and return 3 technical improvement areas. @@ -142,7 +141,7 @@ def analyze_resume(combined_text: str) -> List[Dict[str, str]]: -@tool + def search_youtube_videos(queries: List[str]) -> Dict[str, List[str]]: """ Search YouTube for videos related to the given queries and return links to relevant videos. @@ -190,7 +189,7 @@ def create_tools(combined_text: str) -> list: ), Tool( name="search_youtube_videos", - func=lambda queries: search_youtube_videos({"queries": extract_list_from_text(queries)}), + func=lambda queries: search_youtube_videos(extract_list_from_text(queries)), description="Search YouTube for videos related to the improvement areas.", ), ] @@ -209,7 +208,7 @@ def main(): prompt_template = hub.pull("hwchase17/react") react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template) - agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=False, return_intermediate_steps=True) + agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True) query = ( "You are an AI assistant that evaluates resumes based on job descriptions. " From 6991beec3fc987554b873c8b1836260af994fb62 Mon Sep 17 00:00:00 2001 From: kunalkushwahatg Date: Thu, 20 Mar 2025 19:31:23 +0530 Subject: [PATCH 3/6] main --- main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index d665831..296fb75 100644 --- a/main.py +++ b/main.py @@ -15,13 +15,14 @@ + # Load environment variables load_dotenv() HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") # paths -PDF_PATH = "resume.pdf" +PDF_PATH = "kunal_nitr_resume.pdf" JOB_DESCRIPTION_PDF_PATH = "jd2.pdf" # Logging setup From 7262fa6438caf7c37cb52f429ab3ee5b6db2ca47 Mon Sep 17 00:00:00 2001 From: kunalkushwahatg Date: Fri, 21 Mar 2025 01:16:03 +0530 Subject: [PATCH 4/6] added flask endpoint --- main.py | 91 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/main.py b/main.py index 296fb75..067c6a5 100644 --- a/main.py +++ b/main.py @@ -12,18 +12,15 @@ from typing import List, Dict from googleapiclient.discovery import build from googleapiclient.errors import HttpError - - - +from flask import Flask, request, jsonify # Load environment variables load_dotenv() HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") -# paths -PDF_PATH = "kunal_nitr_resume.pdf" -JOB_DESCRIPTION_PDF_PATH = "jd2.pdf" +#flask app setup +app = Flask(__name__) # Logging setup logging.basicConfig(level=logging.INFO) @@ -47,6 +44,9 @@ #initialize youtube api youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY) +#load react template from langchain hub +prompt_template = hub.pull("hwchase17/react") + def extract_json_from_text(text: str) -> Dict: """Extract and parse JSON from a text string safely.""" try: @@ -61,19 +61,15 @@ def extract_json_from_text(text: str) -> Dict: return {} -def extract_text_from_pdf(pdf_path: str) -> str: - """Extract text from a PDF using PyMuPDF.""" - # Check if file exists before processing - if not os.path.exists(pdf_path): - logging.error(f"File not found: {pdf_path}") - return "" +def extract_text_from_pdf(pdf_stream): + """Extract text from a PDF file stream using PyMuPDF.""" try: - with fitz.open(pdf_path) as doc: - text = "\n".join(page.get_text("text") for page in doc) + doc = fitz.open(stream=pdf_stream, filetype="pdf") + text = "\n".join(page.get_text("text") for page in doc) return text except Exception as e: - logging.error(f"Error extracting text from {pdf_path}: {e}") - return "" + logging.error(f"Error extracting text from PDF stream: {e}") + return "" def combine_text(resume_text: str, job_description_text: str) -> str: @@ -195,41 +191,50 @@ def create_tools(combined_text: str) -> list: ), ] +@app.route('/analyze', methods=['POST']) +def analyze(): + if request.method == 'POST': + if 'resume' not in request.files or 'jd' not in request.files: + return jsonify({"error": "Both resume and job description PDFs are required"}), 400 + resume_file = request.files['resume'] + jd_file = request.files['jd'] -def main(): - """Main function to run the AI agent.""" - try: - resume_text = extract_text_from_pdf(PDF_PATH) - job_description_text = extract_text_from_pdf(JOB_DESCRIPTION_PDF_PATH) - combined_text = combine_text(resume_text, job_description_text) - - tools = create_tools(combined_text) + if resume_file.filename == '' or jd_file.filename == '': + return jsonify({"error": "Files must have valid names"}), 400 - #load react template from langchain hub - prompt_template = hub.pull("hwchase17/react") + try: + resume_text = extract_text_from_pdf(resume_file.read()) + job_description_text = extract_text_from_pdf(jd_file.read()) - react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template) - agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True) + combined_text = combine_text(resume_text, job_description_text) + tools = create_tools(combined_text) - query = ( - "You are an AI assistant that evaluates resumes based on job descriptions. " - "Use the analyze_resume tool to analyze the combined resume and job description text, " - "and search YouTube for relevant resources using the search_youtube_videos tool. " - "Provide short descriptions of the improvement areas as the final answer." - ) + react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template) + agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True) - response = agent_executor.invoke({"input": query}) - intermediate_steps = response["intermediate_steps"] + query = ( + "You are an AI assistant that evaluates resumes based on job descriptions. " + "Use the analyze_resume tool to analyze the combined resume and job description text, " + "and search YouTube for relevant resources using the search_youtube_videos tool. " + "Provide short descriptions of the improvement areas as the final answer." + ) - improvement_areas = intermediate_steps[0][1] - youtube_links = intermediate_steps[1][1] + response = agent_executor.invoke({"input": query}) + intermediate_steps = response["intermediate_steps"] - print("\nIMPROVEMENT AREAS:\n", improvement_areas) - print("\nYOUTUBE LINKS:\n", youtube_links) + improvement_areas = intermediate_steps[0][1] + youtube_links = intermediate_steps[1][1] - except Exception as e: - logging.error("An error occurred: %s", str(e)) + return jsonify({ + "improvement_areas": improvement_areas, + "youtube_links": youtube_links + }) + except Exception as e: + logger.error(f"Error during analysis: {e}") + return jsonify({"error": str(e)}), 500 if __name__ == "__main__": - main() + app.run(debug=False, host='0.0.0.0', port=5000) + + From 8f667c0d7bb0888b4e1091c3fff045d76cb6d466 Mon Sep 17 00:00:00 2001 From: kunalkushwahatg Date: Fri, 21 Mar 2025 01:20:33 +0530 Subject: [PATCH 5/6] global variable fix --- main.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 067c6a5..c817f42 100644 --- a/main.py +++ b/main.py @@ -47,6 +47,14 @@ #load react template from langchain hub prompt_template = hub.pull("hwchase17/react") +#query for the react agent +query = ( + "You are an AI assistant that evaluates resumes based on job descriptions. " + "Use the analyze_resume tool to analyze the combined resume and job description text, " + "and search YouTube for relevant resources using the search_youtube_videos tool. " + "Provide short descriptions of the improvement areas as the final answer." + ) + def extract_json_from_text(text: str) -> Dict: """Extract and parse JSON from a text string safely.""" try: @@ -210,14 +218,7 @@ def analyze(): tools = create_tools(combined_text) react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template) - agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True) - - query = ( - "You are an AI assistant that evaluates resumes based on job descriptions. " - "Use the analyze_resume tool to analyze the combined resume and job description text, " - "and search YouTube for relevant resources using the search_youtube_videos tool. " - "Provide short descriptions of the improvement areas as the final answer." - ) + agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True) response = agent_executor.invoke({"input": query}) intermediate_steps = response["intermediate_steps"] From d008a33cd8ca49c4ecf4abac12c9871b73d54571 Mon Sep 17 00:00:00 2001 From: kunalkushwahatg Date: Fri, 21 Mar 2025 01:31:45 +0530 Subject: [PATCH 6/6] added flask in requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2a1fb23..818cb4e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -79,3 +79,4 @@ uritemplate==4.1.1 urllib3==2.3.0 yarl==1.18.3 zstandard==0.23.0 +flask==3.1.0