diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d64db5c..c63cb54 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -7,7 +7,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.13' - name: Install dependencies run: pip install ruff - name: Run ruff diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 90b139b..cea297e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,16 +1,16 @@ -name: Test +# name: Test -on: [push, pull_request] +# on: [push, pull_request] -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install dependencies - run: pip install -r requirements.txt - - name: Run tests with pytest - run: pytest \ No newline at end of file +# jobs: +# test: +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v4 +# - uses: actions/setup-python@v5 +# with: +# python-version: '3.10' +# - name: Install dependencies +# run: pip install -r requirements.txt +# - name: Run tests with pytest +# run: pytest \ No newline at end of file diff --git a/src/notebooks/ideal.ipynb b/src/notebooks/ideal.ipynb index 52f0c37..250bf57 100644 --- a/src/notebooks/ideal.ipynb +++ b/src/notebooks/ideal.ipynb @@ -250,7 +250,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.13.0" } }, "nbformat": 4, diff --git a/src/notebooks/ideal_read_in_data.ipynb b/src/notebooks/ideal_read_in_data.ipynb new file mode 100644 index 0000000..aa390dd --- /dev/null +++ b/src/notebooks/ideal_read_in_data.ipynb @@ -0,0 +1,548 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8d232fdb", + "metadata": {}, + "source": [ + "### **Table of Contents**\n", + " * [read in data](#read-in-data)\n", + " * [Update cleaning code](#update-cleaning-code)\n", + " * [Generate report](#generate-report)\n", + " * [Plots](#plots)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d11a2343", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "# import matplotlib.pyplot as plt\n", + "# import plotly.express as px\n", + "# import dash\n", + "from pathlib import Path\n", + "from typing import Dict, Union\n", + "import os\n", + "import sys" + ] + }, + { + "cell_type": "markdown", + "id": "0764cac1", + "metadata": {}, + "source": [ + "## read in data\n", + "Psudo code:\n", + "- read in all the files in the data folder \n", + " - accounting for them being in xlsx or csv \n", + "- dataframe variable name should end up being file name minus extension\n", + "\n", + "- This allows us to just drop in any export with any name and it should run. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7cd30f44", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# for data in sys.path:\n", + "# if data.emndswith('.xlsx') or data.endswith('.csv'):\n", + "# df = pd.read_excel(data) if data.endswith('.xlsx') else pd.read_csv(data)\n", + "# print(f\"Data loaded from: {data}\")\n", + "# break\n", + "\n", + "def load_data_folder(folder_path=\"../../data\"):\n", + " dataframes = {}\n", + "\n", + " for file in os.listdir(folder_path):\n", + " if file.endswith(\".csv\") or file.endswith(\".xlsx\"):\n", + " file_path = os.path.join(folder_path, file)\n", + " file_name = os.path.splitext(file)[0] \n", + "\n", + " if file.endswith(\".csv\"):\n", + " df = pd.read_csv(file_path)\n", + " else:\n", + " df = pd.read_excel(file_path)\n", + "\n", + " dataframes[file_name] = df\n", + "\n", + " return dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "fd40c062", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Auto IdFirst NameLast NameGenderRaceEthnicity Hispanic/LatinoOutcomeVeteranEx-OffenderJustice InvolvedSingle ParentProgram: Program Name
0202107-1206namenameMaleBlack or African AmericanNaNNaNNoNaNNaNNaNReimage 21-22
1202107-1206namenameMaleBlack or African AmericanNaNNaNNoNaNNaNNaNReimage 21-22
2202107-1206namenameMaleBlack or African AmericanNaNNaNNoNaNNaNNaNReimage 21-22
3202108-5167namenameMaleAsianNaNSuccessfully CompletedNoNaNNoNaNTech Louisville 21-22
4202108-5171namenameMaleBlack or African AmericanNaNNaNNaNNaNNaNNaNTech Louisville 21-22
\n", + "
" + ], + "text/plain": [ + " Auto Id First Name Last Name Gender Race \\\n", + "0 202107-1206 name name Male Black or African American \n", + "1 202107-1206 name name Male Black or African American \n", + "2 202107-1206 name name Male Black or African American \n", + "3 202108-5167 name name Male Asian \n", + "4 202108-5171 name name Male Black or African American \n", + "\n", + " Ethnicity Hispanic/Latino Outcome Veteran Ex-Offender \\\n", + "0 NaN NaN No NaN \n", + "1 NaN NaN No NaN \n", + "2 NaN NaN No NaN \n", + "3 NaN Successfully Completed No NaN \n", + "4 NaN NaN NaN NaN \n", + "\n", + " Justice Involved Single Parent Program: Program Name \n", + "0 NaN NaN Reimage 21-22 \n", + "1 NaN NaN Reimage 21-22 \n", + "2 NaN NaN Reimage 21-22 \n", + "3 No NaN Tech Louisville 21-22 \n", + "4 NaN NaN Tech Louisville 21-22 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = load_data_folder()\n", + "all_demo = df['All_demographics_and_programs']\n", + "all_demo.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2735cb8e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['ARC_Enrollments', 'ARC_Application', 'All_demographics_and_programs'])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "c92c9717", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['ARC_Enrollments', 'ARC_Application', 'All_demographics_and_programs'])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def load_data_folder(\n", + " folder_path: Union[str, os.PathLike] = \"../../data\"\n", + ") -> Dict[str, pd.DataFrame]:\n", + " \"\"\"\n", + " Load all CSV/XLS/XLSX files in a folder into pandas DataFrames.\n", + "\n", + " Parameters\n", + " ----------\n", + " folder_path : str | os.PathLike, optional\n", + " Path to the folder containing the files. Defaults to \"../../data\".\n", + "\n", + " Returns\n", + " -------\n", + " Dict[str, pandas.DataFrame]\n", + " A mapping from the file's stem (filename without extension) to its\n", + " loaded DataFrame. For example, \"employees.csv\" -> key \"employees\".\n", + "\n", + " Raises\n", + " ------\n", + " FileNotFoundError\n", + " If `folder_path` does not exist.\n", + " PermissionError\n", + " If the folder or files cannot be accessed due to permissions.\n", + " pd.errors.EmptyDataError\n", + " If a CSV file is empty and cannot be parsed.\n", + "\n", + " Notes\n", + " -----\n", + " - Supported extensions: .csv, .xls, .xlsx (case-insensitive).\n", + " - If both `name.csv` and `name.xlsx` exist, the later one encountered will\n", + " overwrite the earlier entry for key `name`.\n", + " \"\"\"\n", + " path = Path(folder_path)\n", + " if not path.exists():\n", + " raise FileNotFoundError(f\"Folder not found: {path.resolve()}\")\n", + "\n", + " dataframes: Dict[str, pd.DataFrame] = {}\n", + " for p in path.iterdir():\n", + " if not p.is_file():\n", + " continue\n", + "\n", + " ext = p.suffix.lower()\n", + " if ext == \".csv\":\n", + " df = pd.read_csv(p)\n", + " elif ext in {\".xlsx\", \".xls\"}:\n", + " df = pd.read_excel(p)\n", + " else:\n", + " continue\n", + "\n", + " dataframes[p.stem] = df\n", + "\n", + " return dataframes\n", + "\n", + "dfs = load_data_folder()\n", + "dfs.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60e75468", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['ARC_Enrollments', 'ARC_Application', 'All_demographics_and_programs'])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "markdown", + "id": "fe6f5506", + "metadata": {}, + "source": [ + "## Update cleaning code \n", + "- Look at our cleaning code that we have. \n", + "- we should start to make changes to it to account for this. \n", + "- We need to make it so it so the program doesn't crash when something fails \n", + " - [Try Except logic updates](https://www.w3schools.com/python/python_try_except.asp)\n", + " - make the messages mean something meaningful\n", + "- Ideally we will not drop anything from our data \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "749ae60a", + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "See the functions in files:\n", + "- src/Carmen_WORCEmployment.py\n", + "- src/cleaning_enrollments_data.py\n", + "- src/cleaning.py\n", + "'''" + ] + }, + { + "cell_type": "markdown", + "id": "6ddbb4c0", + "metadata": {}, + "source": [ + "## Generate report \n", + "\n", + "- Overall completion of program only accounting for the new style of classes m1-m4\n", + "- completion by year \n", + "- completion over all by pathway \n", + "- completion by year by pathway \n", + "- Feel free to get creative here adding gender etc to get us a better understanding \n", + "- education level and the above... \n", + "- export this as a txt file " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d6485e5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "859cf674", + "metadata": {}, + "source": [ + "## Plots \n", + "- Look at the various plots \n", + "- make a consistent color scheme\n", + "- pick the plots that go with the report above \n", + "- make missing plots \n", + "- make plots have the option to show & save in the functions\n", + "\n", + "see `src/notebooks/visualization_examples.ipynb`\n", + "See below from `src/Carmen_WORCEmployment_Plots.py`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81009a87", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_salary_by_gender(data):\n", + " plt.figure(figsize=(8, 5))\n", + " sns.boxplot(data=data, x='Gender', y='Salary')\n", + " plt.title(\"Salary Distribution by Gender\")\n", + " plt.show()\n", + "\n", + "\n", + "def plot_avg_salary_by_city(data):\n", + " region_salary = data.groupby('Mailing City')['Salary'].mean().sort_values()\n", + " region_salary.plot(kind='barh', figsize=(8, 5), title=\"Average Salary by KY Region\")\n", + " plt.xlabel(\"Average Salary\")\n", + " plt.show()\n", + "\n", + "\n", + "def plot_placements_over_time(data):\n", + " data.set_index('Start Date').resample('M').size().plot(kind='line', marker='o', figsize=(10, 4))\n", + " plt.title(\"Number of Placements Over Time\")\n", + " plt.ylabel(\"Placements\")\n", + " plt.show()\n", + "\n", + "\n", + "def plot_placement_type_by_program(data):\n", + " plt.figure(figsize=(10, 6))\n", + " sns.countplot(data=data, x='ATP Placement Type', hue='Program: Program Name')\n", + " plt.xticks(rotation=45)\n", + " plt.title(\"Placement Type by Program\")\n", + " plt.show()\n", + "\n", + "\n", + "def plot_top_cities(data):\n", + " city_counts = data['Mailing City'].value_counts().head(10)\n", + " city_counts.plot(kind='bar', title='Top Cities by Participant Count', figsize=(8, 4))\n", + " plt.ylabel(\"Count\")\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "f905708f", + "metadata": {}, + "source": [ + "TOC generator " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d4fc7116", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- ✅ Copy the Markdown below and paste it into a new markdown cell ---\n", + "\n", + "### **Table of Contents**\n", + " * [read in data](#read-in-data)\n", + " * [Update cleaning code](#update-cleaning-code)\n", + " * [Generate report](#generate-report)\n", + " * [Plots](#plots)\n", + "\n" + ] + } + ], + "source": [ + "import json\n", + "import os\n", + "\n", + "\n", + "def generate_toc_from_notebook(notebook_path):\n", + " \"\"\"\n", + " Parses a local .ipynb file and generates Markdown for a Table of Contents.\n", + " \"\"\"\n", + " if not os.path.isfile(notebook_path):\n", + " print(f\"❌ Error: File not found at '{notebook_path}'\")\n", + " return\n", + "\n", + " with open(notebook_path, 'r', encoding='utf-8') as f:\n", + " notebook = json.load(f)\n", + "\n", + " toc_markdown = \"### **Table of Contents**\\n\"\n", + " for cell in notebook.get('cells', []):\n", + " if cell.get('cell_type') == 'markdown':\n", + " for line in cell.get('source', []):\n", + " if line.strip().startswith('#'):\n", + " level = line.count('#')\n", + " title = line.strip('#').strip()\n", + " link = title.lower().replace(' ', '-').strip('-.()')\n", + " indent = ' ' * (level - 1)\n", + " toc_markdown += f\"{indent}* [{title}](#{link})\\n\"\n", + "\n", + " print(\"\\n--- ✅ Copy the Markdown below and paste it \"\n", + " \"into a new markdown cell ---\\n\")\n", + " print(toc_markdown)\n", + "\n", + "\n", + "notebook_path = 'ideal.ipynb'\n", + "generate_toc_from_notebook(notebook_path)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv (3.12.2)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/notebooks/worc_cleaning.ipynb b/src/notebooks/worc_cleaning.ipynb index 267af60..1ed7f9b 100644 --- a/src/notebooks/worc_cleaning.ipynb +++ b/src/notebooks/worc_cleaning.ipynb @@ -418,7 +418,7 @@ "outputs": [], "source": [ "# Dropping multiple columns based including those with no unique values as well as those that seem unnecessary\n", - "cols_to_drop = ['Auto Id','Employment History Name']\n", + "cols_to_drop = ['Employment History Name']\n", "\n", "worc_cols_dropped = worc.drop(columns=cols_to_drop, axis=1)" ] diff --git a/src/notebooks/worc_employment_plots.ipynb b/src/notebooks/worc_employment_plots.ipynb index 325e8cb..e5a0e3f 100644 --- a/src/notebooks/worc_employment_plots.ipynb +++ b/src/notebooks/worc_employment_plots.ipynb @@ -69,6 +69,173 @@ "metadata": {}, "outputs": [ { +<<<<<<< HEAD + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Auto IdFull NameEmailEnrollmentIdCompany NameJob TitleStart DateProgram: Program NameMailing CityMailing Zip/Postal CodeATP Placement TypeSalaryGenderRaceKY Region
20202410-17818name namename@gmail.comEnrollment-11698CentratelRemote Telephone Service Representative2025-04-21Code Kentucky 24-25Sandy Hook41171First ATP Placement - New to Tech23.42MaleWhiteSOAR
21202306-12312name namename@gmail.comEnrollment-9608Richard Harris, Inc.Logistics Technology & Training Specialist2025-05-03Code Kentucky 24-25Busy41723First ATP Placement - Promotion15.00MaleWhiteSOAR
22202412-19753name namename@gmail.comEnrollment-11586Delta Gas CompanyOffice Manager2025-05-19Code Kentucky 24-25Pikeville41501First ATP Placement - New to Tech18.00FemaleWhiteSOAR
23202406-15679name namename@gmail.comEnrollment-9552BurbioGenerative AI Developer2025-05-21Code Kentucky 24-25Loyall40854First ATP Placement - New to Tech28.84MaleWhiteSOAR
24202412-19675name namename@gmail.comEnrollment-11718Mountain AssociationTechnology Associate2025-06-01Code Kentucky 24-25Brodhead40409First ATP Placement - Already in Tech23.83FemaleWhiteSOAR
\n", + "
" + ], + "text/plain": [ + " Auto Id Full Name Email EnrollmentId \\\n", + "20 202410-17818 name name name@gmail.com Enrollment-11698 \n", + "21 202306-12312 name name name@gmail.com Enrollment-9608 \n", + "22 202412-19753 name name name@gmail.com Enrollment-11586 \n", + "23 202406-15679 name name name@gmail.com Enrollment-9552 \n", + "24 202412-19675 name name name@gmail.com Enrollment-11718 \n", + "\n", + " Company Name Job Title \\\n", + "20 Centratel Remote Telephone Service Representative \n", + "21 Richard Harris, Inc. Logistics Technology & Training Specialist \n", + "22 Delta Gas Company Office Manager \n", + "23 Burbio Generative AI Developer \n", + "24 Mountain Association Technology Associate \n", + "\n", + " Start Date Program: Program Name Mailing City Mailing Zip/Postal Code \\\n", + "20 2025-04-21 Code Kentucky 24-25 Sandy Hook 41171 \n", + "21 2025-05-03 Code Kentucky 24-25 Busy 41723 \n", + "22 2025-05-19 Code Kentucky 24-25 Pikeville 41501 \n", + "23 2025-05-21 Code Kentucky 24-25 Loyall 40854 \n", + "24 2025-06-01 Code Kentucky 24-25 Brodhead 40409 \n", + "\n", + " ATP Placement Type Salary Gender Race KY Region \n", + "20 First ATP Placement - New to Tech 23.42 Male White SOAR \n", + "21 First ATP Placement - Promotion 15.00 Male White SOAR \n", + "22 First ATP Placement - New to Tech 18.00 Female White SOAR \n", + "23 First ATP Placement - New to Tech 28.84 Male White SOAR \n", + "24 First ATP Placement - Already in Tech 23.83 Female White SOAR " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" +======= "ename": "NameError", "evalue": "name 'worc_clean' is not defined", "output_type": "error", @@ -78,6 +245,7 @@ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mworc_clean\u001b[49m.tail()\n", "\u001b[31mNameError\u001b[39m: name 'worc_clean' is not defined" ] +>>>>>>> origin/main } ], "source": [ @@ -104,7 +272,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 4, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [], "source": [ @@ -117,7 +289,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 5, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [ { @@ -203,7 +379,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 6, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [ { @@ -228,7 +408,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 7, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [ { @@ -262,14 +446,18 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 8, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/sw/mf1x4fnn1jg2jq5n72k6mkm80000gn/T/ipykernel_25780/1675383775.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", + "/var/folders/sw/mf1x4fnn1jg2jq5n72k6mkm80000gn/T/ipykernel_4812/1675383775.py:2: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n", " worc_clean.set_index('Start Date').resample('M').size().plot(kind='line', marker='o', figsize=(10, 4))\n" ] }, @@ -294,7 +482,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 9, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [ { @@ -319,7 +511,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 10, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [ { @@ -343,7 +539,62 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Gender\n", + "Male 13\n", + "Female 12\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Count of Gender\n", + "worc_clean['Gender'].value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ATP Placement Type Gender\n", + "First ATP Placement - Already in Tech Female 2\n", + " Male 1\n", + "First ATP Placement - New to Tech Female 9\n", + " Male 9\n", + "First ATP Placement - Promotion Female 1\n", + " Male 3\n", + "dtype: int64\n" + ] + } + ], + "source": [ + " # Count of gender by ATP Placement Type\n", + "grouped = worc_clean.groupby(['ATP Placement Type', 'Gender']).size()\n", + "\n", + "print(grouped)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, +======= "execution_count": null, +>>>>>>> origin/main "metadata": {}, "outputs": [], "source": [