diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index d64db5c..c63cb54 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -7,7 +7,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
- python-version: '3.10'
+ python-version: '3.13'
- name: Install dependencies
run: pip install ruff
- name: Run ruff
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 90b139b..cea297e 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,16 +1,16 @@
-name: Test
+# name: Test
-on: [push, pull_request]
+# on: [push, pull_request]
-jobs:
- test:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: actions/setup-python@v5
- with:
- python-version: '3.10'
- - name: Install dependencies
- run: pip install -r requirements.txt
- - name: Run tests with pytest
- run: pytest
\ No newline at end of file
+# jobs:
+# test:
+# runs-on: ubuntu-latest
+# steps:
+# - uses: actions/checkout@v4
+# - uses: actions/setup-python@v5
+# with:
+# python-version: '3.10'
+# - name: Install dependencies
+# run: pip install -r requirements.txt
+# - name: Run tests with pytest
+# run: pytest
\ No newline at end of file
diff --git a/src/notebooks/ideal.ipynb b/src/notebooks/ideal.ipynb
index 52f0c37..250bf57 100644
--- a/src/notebooks/ideal.ipynb
+++ b/src/notebooks/ideal.ipynb
@@ -250,7 +250,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.2"
+ "version": "3.13.0"
}
},
"nbformat": 4,
diff --git a/src/notebooks/ideal_read_in_data.ipynb b/src/notebooks/ideal_read_in_data.ipynb
new file mode 100644
index 0000000..aa390dd
--- /dev/null
+++ b/src/notebooks/ideal_read_in_data.ipynb
@@ -0,0 +1,548 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "8d232fdb",
+ "metadata": {},
+ "source": [
+ "### **Table of Contents**\n",
+ " * [read in data](#read-in-data)\n",
+ " * [Update cleaning code](#update-cleaning-code)\n",
+ " * [Generate report](#generate-report)\n",
+ " * [Plots](#plots)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "d11a2343",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "# import matplotlib.pyplot as plt\n",
+ "# import plotly.express as px\n",
+ "# import dash\n",
+ "from pathlib import Path\n",
+ "from typing import Dict, Union\n",
+ "import os\n",
+ "import sys"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0764cac1",
+ "metadata": {},
+ "source": [
+ "## read in data\n",
+ "Psudo code:\n",
+ "- read in all the files in the data folder \n",
+ " - accounting for them being in xlsx or csv \n",
+ "- dataframe variable name should end up being file name minus extension\n",
+ "\n",
+ "- This allows us to just drop in any export with any name and it should run. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "7cd30f44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# for data in sys.path:\n",
+ "# if data.emndswith('.xlsx') or data.endswith('.csv'):\n",
+ "# df = pd.read_excel(data) if data.endswith('.xlsx') else pd.read_csv(data)\n",
+ "# print(f\"Data loaded from: {data}\")\n",
+ "# break\n",
+ "\n",
+ "def load_data_folder(folder_path=\"../../data\"):\n",
+ " dataframes = {}\n",
+ "\n",
+ " for file in os.listdir(folder_path):\n",
+ " if file.endswith(\".csv\") or file.endswith(\".xlsx\"):\n",
+ " file_path = os.path.join(folder_path, file)\n",
+ " file_name = os.path.splitext(file)[0] \n",
+ "\n",
+ " if file.endswith(\".csv\"):\n",
+ " df = pd.read_csv(file_path)\n",
+ " else:\n",
+ " df = pd.read_excel(file_path)\n",
+ "\n",
+ " dataframes[file_name] = df\n",
+ "\n",
+ " return dataframes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "fd40c062",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Auto Id | \n",
+ " First Name | \n",
+ " Last Name | \n",
+ " Gender | \n",
+ " Race | \n",
+ " Ethnicity Hispanic/Latino | \n",
+ " Outcome | \n",
+ " Veteran | \n",
+ " Ex-Offender | \n",
+ " Justice Involved | \n",
+ " Single Parent | \n",
+ " Program: Program Name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 202107-1206 | \n",
+ " name | \n",
+ " name | \n",
+ " Male | \n",
+ " Black or African American | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " No | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Reimage 21-22 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 202107-1206 | \n",
+ " name | \n",
+ " name | \n",
+ " Male | \n",
+ " Black or African American | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " No | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Reimage 21-22 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 202107-1206 | \n",
+ " name | \n",
+ " name | \n",
+ " Male | \n",
+ " Black or African American | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " No | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Reimage 21-22 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 202108-5167 | \n",
+ " name | \n",
+ " name | \n",
+ " Male | \n",
+ " Asian | \n",
+ " NaN | \n",
+ " Successfully Completed | \n",
+ " No | \n",
+ " NaN | \n",
+ " No | \n",
+ " NaN | \n",
+ " Tech Louisville 21-22 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 202108-5171 | \n",
+ " name | \n",
+ " name | \n",
+ " Male | \n",
+ " Black or African American | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Tech Louisville 21-22 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Auto Id First Name Last Name Gender Race \\\n",
+ "0 202107-1206 name name Male Black or African American \n",
+ "1 202107-1206 name name Male Black or African American \n",
+ "2 202107-1206 name name Male Black or African American \n",
+ "3 202108-5167 name name Male Asian \n",
+ "4 202108-5171 name name Male Black or African American \n",
+ "\n",
+ " Ethnicity Hispanic/Latino Outcome Veteran Ex-Offender \\\n",
+ "0 NaN NaN No NaN \n",
+ "1 NaN NaN No NaN \n",
+ "2 NaN NaN No NaN \n",
+ "3 NaN Successfully Completed No NaN \n",
+ "4 NaN NaN NaN NaN \n",
+ "\n",
+ " Justice Involved Single Parent Program: Program Name \n",
+ "0 NaN NaN Reimage 21-22 \n",
+ "1 NaN NaN Reimage 21-22 \n",
+ "2 NaN NaN Reimage 21-22 \n",
+ "3 No NaN Tech Louisville 21-22 \n",
+ "4 NaN NaN Tech Louisville 21-22 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = load_data_folder()\n",
+ "all_demo = df['All_demographics_and_programs']\n",
+ "all_demo.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "2735cb8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['ARC_Enrollments', 'ARC_Application', 'All_demographics_and_programs'])"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "c92c9717",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['ARC_Enrollments', 'ARC_Application', 'All_demographics_and_programs'])"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def load_data_folder(\n",
+ " folder_path: Union[str, os.PathLike] = \"../../data\"\n",
+ ") -> Dict[str, pd.DataFrame]:\n",
+ " \"\"\"\n",
+ " Load all CSV/XLS/XLSX files in a folder into pandas DataFrames.\n",
+ "\n",
+ " Parameters\n",
+ " ----------\n",
+ " folder_path : str | os.PathLike, optional\n",
+ " Path to the folder containing the files. Defaults to \"../../data\".\n",
+ "\n",
+ " Returns\n",
+ " -------\n",
+ " Dict[str, pandas.DataFrame]\n",
+ " A mapping from the file's stem (filename without extension) to its\n",
+ " loaded DataFrame. For example, \"employees.csv\" -> key \"employees\".\n",
+ "\n",
+ " Raises\n",
+ " ------\n",
+ " FileNotFoundError\n",
+ " If `folder_path` does not exist.\n",
+ " PermissionError\n",
+ " If the folder or files cannot be accessed due to permissions.\n",
+ " pd.errors.EmptyDataError\n",
+ " If a CSV file is empty and cannot be parsed.\n",
+ "\n",
+ " Notes\n",
+ " -----\n",
+ " - Supported extensions: .csv, .xls, .xlsx (case-insensitive).\n",
+ " - If both `name.csv` and `name.xlsx` exist, the later one encountered will\n",
+ " overwrite the earlier entry for key `name`.\n",
+ " \"\"\"\n",
+ " path = Path(folder_path)\n",
+ " if not path.exists():\n",
+ " raise FileNotFoundError(f\"Folder not found: {path.resolve()}\")\n",
+ "\n",
+ " dataframes: Dict[str, pd.DataFrame] = {}\n",
+ " for p in path.iterdir():\n",
+ " if not p.is_file():\n",
+ " continue\n",
+ "\n",
+ " ext = p.suffix.lower()\n",
+ " if ext == \".csv\":\n",
+ " df = pd.read_csv(p)\n",
+ " elif ext in {\".xlsx\", \".xls\"}:\n",
+ " df = pd.read_excel(p)\n",
+ " else:\n",
+ " continue\n",
+ "\n",
+ " dataframes[p.stem] = df\n",
+ "\n",
+ " return dataframes\n",
+ "\n",
+ "dfs = load_data_folder()\n",
+ "dfs.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "60e75468",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['ARC_Enrollments', 'ARC_Application', 'All_demographics_and_programs'])"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe6f5506",
+ "metadata": {},
+ "source": [
+ "## Update cleaning code \n",
+ "- Look at our cleaning code that we have. \n",
+ "- we should start to make changes to it to account for this. \n",
+ "- We need to make it so it so the program doesn't crash when something fails \n",
+ " - [Try Except logic updates](https://www.w3schools.com/python/python_try_except.asp)\n",
+ " - make the messages mean something meaningful\n",
+ "- Ideally we will not drop anything from our data \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "749ae60a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "'''\n",
+ "See the functions in files:\n",
+ "- src/Carmen_WORCEmployment.py\n",
+ "- src/cleaning_enrollments_data.py\n",
+ "- src/cleaning.py\n",
+ "'''"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ddbb4c0",
+ "metadata": {},
+ "source": [
+ "## Generate report \n",
+ "\n",
+ "- Overall completion of program only accounting for the new style of classes m1-m4\n",
+ "- completion by year \n",
+ "- completion over all by pathway \n",
+ "- completion by year by pathway \n",
+ "- Feel free to get creative here adding gender etc to get us a better understanding \n",
+ "- education level and the above... \n",
+ "- export this as a txt file "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8d6485e5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "859cf674",
+ "metadata": {},
+ "source": [
+ "## Plots \n",
+ "- Look at the various plots \n",
+ "- make a consistent color scheme\n",
+ "- pick the plots that go with the report above \n",
+ "- make missing plots \n",
+ "- make plots have the option to show & save in the functions\n",
+ "\n",
+ "see `src/notebooks/visualization_examples.ipynb`\n",
+ "See below from `src/Carmen_WORCEmployment_Plots.py`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "81009a87",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def plot_salary_by_gender(data):\n",
+ " plt.figure(figsize=(8, 5))\n",
+ " sns.boxplot(data=data, x='Gender', y='Salary')\n",
+ " plt.title(\"Salary Distribution by Gender\")\n",
+ " plt.show()\n",
+ "\n",
+ "\n",
+ "def plot_avg_salary_by_city(data):\n",
+ " region_salary = data.groupby('Mailing City')['Salary'].mean().sort_values()\n",
+ " region_salary.plot(kind='barh', figsize=(8, 5), title=\"Average Salary by KY Region\")\n",
+ " plt.xlabel(\"Average Salary\")\n",
+ " plt.show()\n",
+ "\n",
+ "\n",
+ "def plot_placements_over_time(data):\n",
+ " data.set_index('Start Date').resample('M').size().plot(kind='line', marker='o', figsize=(10, 4))\n",
+ " plt.title(\"Number of Placements Over Time\")\n",
+ " plt.ylabel(\"Placements\")\n",
+ " plt.show()\n",
+ "\n",
+ "\n",
+ "def plot_placement_type_by_program(data):\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " sns.countplot(data=data, x='ATP Placement Type', hue='Program: Program Name')\n",
+ " plt.xticks(rotation=45)\n",
+ " plt.title(\"Placement Type by Program\")\n",
+ " plt.show()\n",
+ "\n",
+ "\n",
+ "def plot_top_cities(data):\n",
+ " city_counts = data['Mailing City'].value_counts().head(10)\n",
+ " city_counts.plot(kind='bar', title='Top Cities by Participant Count', figsize=(8, 4))\n",
+ " plt.ylabel(\"Count\")\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f905708f",
+ "metadata": {},
+ "source": [
+ "TOC generator "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "d4fc7116",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- ✅ Copy the Markdown below and paste it into a new markdown cell ---\n",
+ "\n",
+ "### **Table of Contents**\n",
+ " * [read in data](#read-in-data)\n",
+ " * [Update cleaning code](#update-cleaning-code)\n",
+ " * [Generate report](#generate-report)\n",
+ " * [Plots](#plots)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "import json\n",
+ "import os\n",
+ "\n",
+ "\n",
+ "def generate_toc_from_notebook(notebook_path):\n",
+ " \"\"\"\n",
+ " Parses a local .ipynb file and generates Markdown for a Table of Contents.\n",
+ " \"\"\"\n",
+ " if not os.path.isfile(notebook_path):\n",
+ " print(f\"❌ Error: File not found at '{notebook_path}'\")\n",
+ " return\n",
+ "\n",
+ " with open(notebook_path, 'r', encoding='utf-8') as f:\n",
+ " notebook = json.load(f)\n",
+ "\n",
+ " toc_markdown = \"### **Table of Contents**\\n\"\n",
+ " for cell in notebook.get('cells', []):\n",
+ " if cell.get('cell_type') == 'markdown':\n",
+ " for line in cell.get('source', []):\n",
+ " if line.strip().startswith('#'):\n",
+ " level = line.count('#')\n",
+ " title = line.strip('#').strip()\n",
+ " link = title.lower().replace(' ', '-').strip('-.()')\n",
+ " indent = ' ' * (level - 1)\n",
+ " toc_markdown += f\"{indent}* [{title}](#{link})\\n\"\n",
+ "\n",
+ " print(\"\\n--- ✅ Copy the Markdown below and paste it \"\n",
+ " \"into a new markdown cell ---\\n\")\n",
+ " print(toc_markdown)\n",
+ "\n",
+ "\n",
+ "notebook_path = 'ideal.ipynb'\n",
+ "generate_toc_from_notebook(notebook_path)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv (3.12.2)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/notebooks/worc_cleaning.ipynb b/src/notebooks/worc_cleaning.ipynb
index 267af60..1ed7f9b 100644
--- a/src/notebooks/worc_cleaning.ipynb
+++ b/src/notebooks/worc_cleaning.ipynb
@@ -418,7 +418,7 @@
"outputs": [],
"source": [
"# Dropping multiple columns based including those with no unique values as well as those that seem unnecessary\n",
- "cols_to_drop = ['Auto Id','Employment History Name']\n",
+ "cols_to_drop = ['Employment History Name']\n",
"\n",
"worc_cols_dropped = worc.drop(columns=cols_to_drop, axis=1)"
]
diff --git a/src/notebooks/worc_employment_plots.ipynb b/src/notebooks/worc_employment_plots.ipynb
index 325e8cb..e5a0e3f 100644
--- a/src/notebooks/worc_employment_plots.ipynb
+++ b/src/notebooks/worc_employment_plots.ipynb
@@ -69,6 +69,173 @@
"metadata": {},
"outputs": [
{
+<<<<<<< HEAD
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Auto Id | \n",
+ " Full Name | \n",
+ " Email | \n",
+ " EnrollmentId | \n",
+ " Company Name | \n",
+ " Job Title | \n",
+ " Start Date | \n",
+ " Program: Program Name | \n",
+ " Mailing City | \n",
+ " Mailing Zip/Postal Code | \n",
+ " ATP Placement Type | \n",
+ " Salary | \n",
+ " Gender | \n",
+ " Race | \n",
+ " KY Region | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 20 | \n",
+ " 202410-17818 | \n",
+ " name name | \n",
+ " name@gmail.com | \n",
+ " Enrollment-11698 | \n",
+ " Centratel | \n",
+ " Remote Telephone Service Representative | \n",
+ " 2025-04-21 | \n",
+ " Code Kentucky 24-25 | \n",
+ " Sandy Hook | \n",
+ " 41171 | \n",
+ " First ATP Placement - New to Tech | \n",
+ " 23.42 | \n",
+ " Male | \n",
+ " White | \n",
+ " SOAR | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 202306-12312 | \n",
+ " name name | \n",
+ " name@gmail.com | \n",
+ " Enrollment-9608 | \n",
+ " Richard Harris, Inc. | \n",
+ " Logistics Technology & Training Specialist | \n",
+ " 2025-05-03 | \n",
+ " Code Kentucky 24-25 | \n",
+ " Busy | \n",
+ " 41723 | \n",
+ " First ATP Placement - Promotion | \n",
+ " 15.00 | \n",
+ " Male | \n",
+ " White | \n",
+ " SOAR | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 202412-19753 | \n",
+ " name name | \n",
+ " name@gmail.com | \n",
+ " Enrollment-11586 | \n",
+ " Delta Gas Company | \n",
+ " Office Manager | \n",
+ " 2025-05-19 | \n",
+ " Code Kentucky 24-25 | \n",
+ " Pikeville | \n",
+ " 41501 | \n",
+ " First ATP Placement - New to Tech | \n",
+ " 18.00 | \n",
+ " Female | \n",
+ " White | \n",
+ " SOAR | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 202406-15679 | \n",
+ " name name | \n",
+ " name@gmail.com | \n",
+ " Enrollment-9552 | \n",
+ " Burbio | \n",
+ " Generative AI Developer | \n",
+ " 2025-05-21 | \n",
+ " Code Kentucky 24-25 | \n",
+ " Loyall | \n",
+ " 40854 | \n",
+ " First ATP Placement - New to Tech | \n",
+ " 28.84 | \n",
+ " Male | \n",
+ " White | \n",
+ " SOAR | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 202412-19675 | \n",
+ " name name | \n",
+ " name@gmail.com | \n",
+ " Enrollment-11718 | \n",
+ " Mountain Association | \n",
+ " Technology Associate | \n",
+ " 2025-06-01 | \n",
+ " Code Kentucky 24-25 | \n",
+ " Brodhead | \n",
+ " 40409 | \n",
+ " First ATP Placement - Already in Tech | \n",
+ " 23.83 | \n",
+ " Female | \n",
+ " White | \n",
+ " SOAR | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Auto Id Full Name Email EnrollmentId \\\n",
+ "20 202410-17818 name name name@gmail.com Enrollment-11698 \n",
+ "21 202306-12312 name name name@gmail.com Enrollment-9608 \n",
+ "22 202412-19753 name name name@gmail.com Enrollment-11586 \n",
+ "23 202406-15679 name name name@gmail.com Enrollment-9552 \n",
+ "24 202412-19675 name name name@gmail.com Enrollment-11718 \n",
+ "\n",
+ " Company Name Job Title \\\n",
+ "20 Centratel Remote Telephone Service Representative \n",
+ "21 Richard Harris, Inc. Logistics Technology & Training Specialist \n",
+ "22 Delta Gas Company Office Manager \n",
+ "23 Burbio Generative AI Developer \n",
+ "24 Mountain Association Technology Associate \n",
+ "\n",
+ " Start Date Program: Program Name Mailing City Mailing Zip/Postal Code \\\n",
+ "20 2025-04-21 Code Kentucky 24-25 Sandy Hook 41171 \n",
+ "21 2025-05-03 Code Kentucky 24-25 Busy 41723 \n",
+ "22 2025-05-19 Code Kentucky 24-25 Pikeville 41501 \n",
+ "23 2025-05-21 Code Kentucky 24-25 Loyall 40854 \n",
+ "24 2025-06-01 Code Kentucky 24-25 Brodhead 40409 \n",
+ "\n",
+ " ATP Placement Type Salary Gender Race KY Region \n",
+ "20 First ATP Placement - New to Tech 23.42 Male White SOAR \n",
+ "21 First ATP Placement - Promotion 15.00 Male White SOAR \n",
+ "22 First ATP Placement - New to Tech 18.00 Female White SOAR \n",
+ "23 First ATP Placement - New to Tech 28.84 Male White SOAR \n",
+ "24 First ATP Placement - Already in Tech 23.83 Female White SOAR "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+=======
"ename": "NameError",
"evalue": "name 'worc_clean' is not defined",
"output_type": "error",
@@ -78,6 +245,7 @@
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mworc_clean\u001b[49m.tail()\n",
"\u001b[31mNameError\u001b[39m: name 'worc_clean' is not defined"
]
+>>>>>>> origin/main
}
],
"source": [
@@ -104,7 +272,11 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 4,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [],
"source": [
@@ -117,7 +289,11 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 5,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [
{
@@ -203,7 +379,11 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 6,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [
{
@@ -228,7 +408,11 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 7,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [
{
@@ -262,14 +446,18 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 8,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "/var/folders/sw/mf1x4fnn1jg2jq5n72k6mkm80000gn/T/ipykernel_25780/1675383775.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n",
+ "/var/folders/sw/mf1x4fnn1jg2jq5n72k6mkm80000gn/T/ipykernel_4812/1675383775.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n",
" worc_clean.set_index('Start Date').resample('M').size().plot(kind='line', marker='o', figsize=(10, 4))\n"
]
},
@@ -294,7 +482,11 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 9,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [
{
@@ -319,7 +511,11 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 10,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [
{
@@ -343,7 +539,62 @@
},
{
"cell_type": "code",
+<<<<<<< HEAD
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Gender\n",
+ "Male 13\n",
+ "Female 12\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Count of Gender\n",
+ "worc_clean['Gender'].value_counts()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ATP Placement Type Gender\n",
+ "First ATP Placement - Already in Tech Female 2\n",
+ " Male 1\n",
+ "First ATP Placement - New to Tech Female 9\n",
+ " Male 9\n",
+ "First ATP Placement - Promotion Female 1\n",
+ " Male 3\n",
+ "dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ " # Count of gender by ATP Placement Type\n",
+ "grouped = worc_clean.groupby(['ATP Placement Type', 'Gender']).size()\n",
+ "\n",
+ "print(grouped)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+=======
"execution_count": null,
+>>>>>>> origin/main
"metadata": {},
"outputs": [],
"source": [