{ "cells": [ { "cell_type": "markdown", "id": "ac76df5b-bf9c-4a91-a4f3-096539d22204", "metadata": {}, "source": [ "# Pivotaje de tablas" ] }, { "cell_type": "code", "execution_count": 2, "id": "bf93a197-86d8-40a0-9724-bee1dddcb883", "metadata": {}, "outputs": [], "source": [ "# Es el equivalente a las tablas dinámica en Excel" ] }, { "cell_type": "code", "execution_count": 3, "id": "6d0f54c8-b768-434c-98b9-a615e0eba703", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "titanic = sns.load_dataset('titanic')" ] }, { "cell_type": "code", "execution_count": 5, "id": "aed444b4-d8f5-4daf-b7ba-e860929b96c5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
................................................
88602male27.00013.0000SSecondmanTrueNaNSouthamptonnoTrue
88711female19.00030.0000SFirstwomanFalseBSouthamptonyesTrue
88803femaleNaN1223.4500SThirdwomanFalseNaNSouthamptonnoFalse
88911male26.00030.0000CFirstmanTrueCCherbourgyesTrue
89003male32.0007.7500QThirdmanTrueNaNQueenstownnoTrue
\n", "

891 rows × 15 columns

\n", "
" ], "text/plain": [ " survived pclass sex age sibsp parch fare embarked class \\\n", "0 0 3 male 22.0 1 0 7.2500 S Third \n", "1 1 1 female 38.0 1 0 71.2833 C First \n", "2 1 3 female 26.0 0 0 7.9250 S Third \n", "3 1 1 female 35.0 1 0 53.1000 S First \n", "4 0 3 male 35.0 0 0 8.0500 S Third \n", ".. ... ... ... ... ... ... ... ... ... \n", "886 0 2 male 27.0 0 0 13.0000 S Second \n", "887 1 1 female 19.0 0 0 30.0000 S First \n", "888 0 3 female NaN 1 2 23.4500 S Third \n", "889 1 1 male 26.0 0 0 30.0000 C First \n", "890 0 3 male 32.0 0 0 7.7500 Q Third \n", "\n", " who adult_male deck embark_town alive alone \n", "0 man True NaN Southampton no False \n", "1 woman False C Cherbourg yes False \n", "2 woman False NaN Southampton yes True \n", "3 woman False C Southampton yes False \n", "4 man True NaN Southampton no True \n", ".. ... ... ... ... ... ... \n", "886 man True NaN Southampton no True \n", "887 woman False B Southampton yes True \n", "888 woman False NaN Southampton no False \n", "889 man True C Cherbourg yes True \n", "890 man True NaN Queenstown no True \n", "\n", "[891 rows x 15 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic" ] }, { "cell_type": "code", "execution_count": 6, "id": "662a8170-ac8d-452a-b8da-32107c2f4d01", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 survived 891 non-null int64 \n", " 1 pclass 891 non-null int64 \n", " 2 sex 891 non-null object \n", " 3 age 714 non-null float64 \n", " 4 sibsp 891 non-null int64 \n", " 5 parch 891 non-null int64 \n", " 6 fare 891 non-null float64 \n", " 7 embarked 889 non-null object \n", " 8 class 891 non-null category\n", " 9 who 891 non-null object \n", " 10 adult_male 891 non-null bool \n", " 11 deck 203 non-null category\n", " 12 embark_town 889 non-null object \n", " 13 alive 891 non-null object \n", " 14 alone 891 non-null bool \n", "dtypes: bool(2), category(2), float64(2), int64(4), object(5)\n", "memory usage: 80.7+ KB\n" ] } ], "source": [ "titanic.info()" ] }, { "cell_type": "markdown", "id": "5f0203df-f415-46ea-8d3e-3066d8c5cb5d", "metadata": {}, "source": [ "## Extracción de los porcentajes de superviviencia por clase" ] }, { "cell_type": "code", "execution_count": 4, "id": "77cad999-eb13-49d4-be01-0614eb39345d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survived
class
First0.629630
Second0.472826
Third0.242363
\n", "
" ], "text/plain": [ " survived\n", "class \n", "First 0.629630\n", "Second 0.472826\n", "Third 0.242363" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titanic.pivot_table('survived', 'class')" ] }, { "cell_type": "code", "execution_count": 8, "id": "d902ea7d-efb5-4585-aa1d-f906ec68e245", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survived
class
First136
Second87
Third119
\n", "
" ], "text/plain": [ " survived\n", "class \n", "First 136\n", "Second 87\n", "Third 119" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Si quisiéramos obtener no los porcentajes de supervivencia, sino los totales, usaremos una función de agregación\n", "titanic.pivot_table('survived', 'class', aggfunc=np.sum)" ] }, { "cell_type": "code", "execution_count": 10, "id": "33589941-1442-4181-839d-676a0abcb2eb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
classFirstSecondThird
sexalone
femaleFalse0.9666670.9318180.416667
True0.9705880.9062500.616667
maleFalse0.4255320.2777780.180723
True0.3333330.0972220.121212
\n", "
" ], "text/plain": [ "class First Second Third\n", "sex alone \n", "female False 0.966667 0.931818 0.416667\n", " True 0.970588 0.906250 0.616667\n", "male False 0.425532 0.277778 0.180723\n", " True 0.333333 0.097222 0.121212" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ¿Y si quisiéramos obtener el porcentaje de supervivencia, segregado por sexo y clase?\n", "titanic.pivot_table('survived', ['sex', 'alone'], 'class')" ] }, { "cell_type": "code", "execution_count": 11, "id": "9bf61403-ff18-4b4b-a60c-2adeedd46bf5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
embark_townCherbourgQueenstownSouthampton
classFirstSecondThirdFirstSecondThirdFirstSecondThird
sexalone
femaleFalse1.0000001.0000000.6111111.0NaN0.6250000.9411760.9230770.327586
True0.9444441.0000000.800000NaN1.00.7600001.0000000.8928570.466667
maleFalse0.4736840.1666670.5000000.0NaN0.1000000.4074070.3000000.142857
True0.3478260.2500000.151515NaN0.00.0689660.3269230.0895520.123762
\n", "
" ], "text/plain": [ "embark_town Cherbourg Queenstown \\\n", "class First Second Third First Second Third \n", "sex alone \n", "female False 1.000000 1.000000 0.611111 1.0 NaN 0.625000 \n", " True 0.944444 1.000000 0.800000 NaN 1.0 0.760000 \n", "male False 0.473684 0.166667 0.500000 0.0 NaN 0.100000 \n", " True 0.347826 0.250000 0.151515 NaN 0.0 0.068966 \n", "\n", "embark_town Southampton \n", "class First Second Third \n", "sex alone \n", "female False 0.941176 0.923077 0.327586 \n", " True 1.000000 0.892857 0.466667 \n", "male False 0.407407 0.300000 0.142857 \n", " True 0.326923 0.089552 0.123762 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Complicamos un poco más el ejemplo\n", "titanic.pivot_table('survived',\n", " ['sex', 'alone'],\n", " ['embark_town', 'class'])\n" ] }, { "cell_type": "markdown", "id": "e4b603ed-f3af-4975-a8f8-099cc3f05f8b", "metadata": {}, "source": [ "## Múltiples funciones de agregación" ] }, { "cell_type": "code", "execution_count": 13, "id": "1cd769cf-5a5e-4cb7-b77b-c2b26faf0d24", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
faresurvived
classFirstSecondThirdFirstSecondThird
sex
female106.12579821.97012116.118810917072
male67.22612719.74178212.661633451747
\n", "
" ], "text/plain": [ " fare survived \n", "class First Second Third First Second Third\n", "sex \n", "female 106.125798 21.970121 16.118810 91 70 72\n", "male 67.226127 19.741782 12.661633 45 17 47" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Mostrar el precio medio pagado por el pasaje, en función de su clase y sexo, y además, \n", "# el total de supervivientes segregados por clase y sexo\n", "titanic.pivot_table(index='sex', columns='class',\n", " aggfunc={'survived': np.sum,\n", " 'fare': np.mean})" ] }, { "cell_type": "code", "execution_count": null, "id": "8aaf2eaa-4528-43a6-ab03-28477fa1b20e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }