Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
331 changes: 222 additions & 109 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,128 +1,241 @@
from langchain.agents import initialize_agent, AgentType
from langchain.llms import HuggingFaceHub
from langchain.tools import Tool
import fitz
from googleapiclient.discovery import build
import os
import re
import json
import fitz
import ast
import logging
from dotenv import load_dotenv
from langchain.agents import create_react_agent, AgentExecutor
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.tools import Tool
from langchain import hub
from typing import List, Dict
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from flask import Flask, request, jsonify

# Load environment variables
load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
PDF_PATH = "resume.pdf"
JOB_DESCRIPTION_PDF_PATH = "JD.pdf"

#flask app setup
app = Flask(__name__)

# Logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


if not HUGGINGFACEHUB_API_TOKEN:
raise ValueError("Hugging Face API token is missing.")

if not YOUTUBE_API_KEY:
raise ValueError("YouTube API key is missing. Please set YOUTUBE_API_KEY in your .env file.")


# Initialize Llama 3.2 model from Hugging Face Hub
llama_llm = HuggingFaceHub(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct", # Adjust if needed for Llama 3.2
model_kwargs={"temperature": 0.7, "max_length": 500},
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
llama_llm = HuggingFaceEndpoint(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
temperature=0.5,
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
)

def extract_text_from_pdf(pdf_path):
"""Extract text from a PDF using PyMuPDF."""
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text("text")
return text

def analyze_resume_with_llama(combined_text: str):
"""Use Llama 3.2 to analyze a resume against a job description."""
analysis_prompt = f"""
Based on the resume and job description below, provide EXACTLY 3 specific areas for improvement.
Prioritize technical skills over managerial or soft skills.
If there are no more technical skills to improve, then suggest managerial or soft skills.

Format each area as a new line starting with '- IMPROVE: '
Focus only on listing the improvement areas, do not provide any other analysis.
#initialize youtube api
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)

#load react template from langchain hub
prompt_template = hub.pull("hwchase17/react")

#query for the react agent
query = (
"You are an AI assistant that evaluates resumes based on job descriptions. "
"Use the analyze_resume tool to analyze the combined resume and job description text, "
"and search YouTube for relevant resources using the search_youtube_videos tool. "
"Provide short descriptions of the improvement areas as the final answer."
)

def extract_json_from_text(text: str) -> Dict:
"""Extract and parse JSON from a text string safely."""
try:
match = re.search(r"\{.*\}", text, re.DOTALL) # Extract JSON block
if match:
json_string = match.group(0)
return json.loads(json_string) # Parse JSON
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON: {e}")

{combined_text}
logger.error("No valid JSON found in the text.")
return {}


def extract_text_from_pdf(pdf_stream):
"""Extract text from a PDF file stream using PyMuPDF."""
try:
doc = fitz.open(stream=pdf_stream, filetype="pdf")
text = "\n".join(page.get_text("text") for page in doc)
return text
except Exception as e:
logging.error(f"Error extracting text from PDF stream: {e}")
return ""


def combine_text(resume_text: str, job_description_text: str) -> str:
"""Combine resume and job description text into a single formatted string."""
return f"Resume: {resume_text}\n\nJob Description: {job_description_text}"


def extract_list_from_text(text: str) -> list | None:
"""Extract a Python list from text using regex and safe parsing."""
match = re.search(r"\[.*?\]", text)
if match:
try:
extracted_list = ast.literal_eval(match.group(0))
return extracted_list if isinstance(extracted_list, list) else None
except (SyntaxError, ValueError):
logging.warning("Failed to parse list from text.")
return None


def analyze_resume(combined_text: str) -> List[Dict[str, str]]:
"""
return llama_llm.predict(analysis_prompt)

def extract_improvement_areas(analysis_text):
"""Extract improvement areas from the analysis text."""
improvements = []
for line in analysis_text.split('\n'):
if line.strip().startswith('- IMPROVE:'):
improvement = line.replace('- IMPROVE:', '').strip()
if improvement:
improvements.append(improvement)
return improvements

def search_youtube_videos(query):
"""Search YouTube for videos related to the improvement areas."""
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
request = youtube.search().list(
part="snippet",
q=query,
maxResults=3
)
response = request.execute()

video_urls = []
for item in response.get("items", []):
if "videoId" in item["id"]:
video_urls.append(f"https://www.youtube.com/watch?v={item['id']['videoId']}")

return video_urls
Analyze a resume against a job description and return 3 technical improvement areas.
"""
prompt = f"""
Analyze the following resume and job description to identify **EXACTLY 3 key technical improvement areas**.
Focus strictly on **technical skills**, not managerial or soft skills.

### Input:
{combined_text}

### Output Format (STRICTLY FOLLOW THIS):
Return **only one valid JSON object** without any extra text, explanations, or multiple outputs.
Ensure the JSON structure exactly matches the format below:

{{
"Improvement Areas": [
{{
"Title": "Improvement Area 1",
"Description": "Brief description of the first improvement area."
}},
{{
"Title": "Improvement Area 2",
"Description": "Brief description of the second improvement area."
}},
{{
"Title": "Improvement Area 3",
"Description": "Brief description of the third improvement area."
}}
]
}}
"""

def main():
try:
print("Extracting text from PDFs...")
resume_text = extract_text_from_pdf(PDF_PATH)
job_description_text = extract_text_from_pdf(JOB_DESCRIPTION_PDF_PATH)

analysis_prompt = f"""
You are an AI that evaluates resumes based on job descriptions.

Resume:
{resume_text}

Job Description:
{job_description_text}

Provide EXACTLY 3 specific areas for improvement.
Prioritize technical skills first.
Each area must be on a new line starting with '- IMPROVE: '
"""

print("\nAnalyzing resume...")
improvements = analyze_resume_with_llama(analysis_prompt)
improvement_areas = extract_improvement_areas(improvements)
analysis = llama_llm.invoke(prompt)
improvement_areas = extract_json_from_text(analysis).get("Improvement Areas", [])

if not improvement_areas:
print("\nNo specific improvement areas identified. Retrying with a stricter prompt...")
retry_prompt = f"""
Reanalyze this resume and job description.
List EXACTLY 3 areas for improvement, prioritizing technical skills first.
Each area MUST start with '- IMPROVE: '

Resume: {resume_text}
Job Description: {job_description_text}
"""
improvements = analyze_resume_with_llama(retry_prompt)
improvement_areas = extract_improvement_areas(improvements)

print("\nStored Improvement Areas:")
for area in improvement_areas:
print(f"- {area}")

print("\nSearching YouTube for relevant videos...")
video_recommendations = {}
for area in improvement_areas:
video_recommendations[area] = search_youtube_videos(area)

for area, videos in video_recommendations.items():
print(f"\nVideos for improvement area: {area}")
for video in videos:
print(video)

logger.warning("No improvement areas found in the analysis.")
return []

return improvement_areas

except Exception as e:
print(f"\nAn error occurred: {str(e)}")
raise
logger.error(f"Error in resume analysis: {e}")
return [{"Title": "Error", "Description": "Failed to analyze resume."}]




def search_youtube_videos(queries: List[str]) -> Dict[str, List[str]]:
"""
Search YouTube for videos related to the given queries and return links to relevant videos.
"""
recommended_videos = {}

for query in queries:
try:
request = youtube.search().list(
part="snippet", q=query, maxResults=10 # get top 7 results may contain video or playlist
)
response = request.execute()

video_urls = [
f"https://www.youtube.com/watch?v={item['id']['videoId']}"
for item in response.get("items", [])
if "videoId" in item["id"]
]

if not video_urls:
logging.warning(f"No video results for query: {query}")
continue # Skip retrying the exact same query

# Get top 3 videos out of the 10 results
recommended_videos[query] = video_urls[:3]

except HttpError as e:
logging.error(f"YouTube API error for query '{query}': {e}")
recommended_videos[query] = ["YouTube API error occurred"]

except Exception as e:
logging.error(f"Unexpected error for query '{query}': {e}")
recommended_videos[query] = ["An error occurred"]

return recommended_videos


def create_tools(combined_text: str) -> list:
"""Create Langchain tools for resume analysis and YouTube search."""
return [
Tool(
name="analyze_resume",
func=lambda _: analyze_resume(combined_text),
description="Analyze a resume against a job description and return improvement areas.",
),
Tool(
name="search_youtube_videos",
func=lambda queries: search_youtube_videos(extract_list_from_text(queries)),
description="Search YouTube for videos related to the improvement areas.",
),
]

@app.route('/analyze', methods=['POST'])
def analyze():
if request.method == 'POST':
if 'resume' not in request.files or 'jd' not in request.files:
return jsonify({"error": "Both resume and job description PDFs are required"}), 400
resume_file = request.files['resume']
jd_file = request.files['jd']

if resume_file.filename == '' or jd_file.filename == '':
return jsonify({"error": "Files must have valid names"}), 400

try:
resume_text = extract_text_from_pdf(resume_file.read())
job_description_text = extract_text_from_pdf(jd_file.read())

combined_text = combine_text(resume_text, job_description_text)
tools = create_tools(combined_text)

react_agent = create_react_agent(llm=llama_llm, tools=tools, prompt=prompt_template)
agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True, return_intermediate_steps=True)

response = agent_executor.invoke({"input": query})
intermediate_steps = response["intermediate_steps"]

improvement_areas = intermediate_steps[0][1]
youtube_links = intermediate_steps[1][1]

return jsonify({
"improvement_areas": improvement_areas,
"youtube_links": youtube_links
})

except Exception as e:
logger.error(f"Error during analysis: {e}")
return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=5000)


main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,4 @@ uritemplate==4.1.1
urllib3==2.3.0
yarl==1.18.3
zstandard==0.23.0
flask==3.1.0