{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pandas 2\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"#import seaborn as sns\n",
"#import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Carga de datos \n",
"---"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"/home/mydoctor/Documents/03.Trabajos/25.Bootcamp Cámara Comercio Bilbao (310h) 2021-11/Modulo 0 - Introducción/Pandas/Ejercicio/data2.csv\", sep=\"|\")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" Catastro | \n",
" ReferenciaCP | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 106640 | \n",
" 087 2000501120UN02UN0UN | \n",
" 087 2000501120 | \n",
" 6105422 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
"
\n",
" \n",
" | 1 | \n",
" 205094 | \n",
" 033 2006900750UN040B0UN | \n",
" 033 2006900750 | \n",
" 6125687 | \n",
" N01 | \n",
" 2020-05-03 | \n",
" Servicio sin cuota | \n",
" 2020-03-19 00:00:00 | \n",
"
\n",
" \n",
" | 2 | \n",
" 21253 | \n",
" 008 0105902010UN060C0UN | \n",
" 008 0105902010 | \n",
" 6098841 | \n",
" N06 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-01-11 00:00:00 | \n",
"
\n",
" \n",
" | 3 | \n",
" 149999 | \n",
" 002 4802008896UN040C0UN | \n",
" 002 4802008896 | \n",
" 6115427 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-02-26 00:00:00 | \n",
"
\n",
" \n",
" | 4 | \n",
" 58564 | \n",
" 010 2007902403UN03CN0UN | \n",
" 010 2007902403 | \n",
" 6103115 | \n",
" N04 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-01-25 00:00:00 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 13580 | \n",
" 191276 | \n",
" 005 4804400046UNPBUN001 | \n",
" 005 4804400046 | \n",
" 75816 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-14 00:00:00 | \n",
"
\n",
" \n",
" | 13581 | \n",
" 106294 | \n",
" 005 0103601575UN020C0UN | \n",
" 005 0103601575 | \n",
" 6109233 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-06 00:00:00 | \n",
"
\n",
" \n",
" | 13582 | \n",
" 214734 | \n",
" 001 2004501209UNUNUNPOR | \n",
" 001 2004501209 | \n",
" 2239094 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-24 00:00:00 | \n",
"
\n",
" \n",
" | 13583 | \n",
" 175766 | \n",
" 001 4802009095UN050C0UN | \n",
" 001 4802009095 | \n",
" 6119594 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-03-07 00:00:00 | \n",
"
\n",
" \n",
" | 13584 | \n",
" 122530 | \n",
" 011 3120101590UN070C0UN | \n",
" 011 3120101590 | \n",
" 6111352 | \n",
" N06 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
"
\n",
" \n",
"
\n",
"
13585 rows × 8 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 Catastro ReferenciaCP IdCliente \\\n",
"0 106640 087 2000501120UN02UN0UN 087 2000501120 6105422 \n",
"1 205094 033 2006900750UN040B0UN 033 2006900750 6125687 \n",
"2 21253 008 0105902010UN060C0UN 008 0105902010 6098841 \n",
"3 149999 002 4802008896UN040C0UN 002 4802008896 6115427 \n",
"4 58564 010 2007902403UN03CN0UN 010 2007902403 6103115 \n",
"... ... ... ... ... \n",
"13580 191276 005 4804400046UNPBUN001 005 4804400046 75816 \n",
"13581 106294 005 0103601575UN020C0UN 005 0103601575 6109233 \n",
"13582 214734 001 2004501209UNUNUNPOR 001 2004501209 2239094 \n",
"13583 175766 001 4802009095UN050C0UN 001 4802009095 6119594 \n",
"13584 122530 011 3120101590UN070C0UN 011 3120101590 6111352 \n",
"\n",
" Producto Fecha TipoProducto AltaCliente \n",
"0 N01 2020-06-03 Servicio principal 2020-02-16 00:00:00 \n",
"1 N01 2020-05-03 Servicio sin cuota 2020-03-19 00:00:00 \n",
"2 N06 2020-03-03 Servicio principal 2020-01-11 00:00:00 \n",
"3 N06 2020-05-03 Servicio principal 2020-02-26 00:00:00 \n",
"4 N04 2020-05-03 Servicio principal 2020-01-25 00:00:00 \n",
"... ... ... ... ... \n",
"13580 N06 2020-04-03 Servicio principal 2020-03-14 00:00:00 \n",
"13581 N01 2020-03-03 Servicio sin cuota 2020-02-06 00:00:00 \n",
"13582 N06 2020-04-03 Servicio principal 2020-03-24 00:00:00 \n",
"13583 N01 2020-06-03 Servicio principal 2020-03-07 00:00:00 \n",
"13584 N06 2020-06-03 Servicio principal 2020-02-16 00:00:00 \n",
"\n",
"[13585 rows x 8 columns]"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#Otra forma más elegante de cargar los datos...\n",
"\n",
"# Cambiar por carpeta donde se encuentren los datos\n",
"data_dir = '/home/mydoctor/Documents/03.Trabajos/25.Bootcamp Cámara Comercio Bilbao (310h) 2021-11/Modulo 0 - Introducción/Pandas/Ejercicio/'\n",
"\n",
"path = os.path.join(data_dir, 'data2.csv')\n",
"try:\n",
" df = pd.read_csv(path, sep=\"|\") # para cargar csv tabulados, usar sep=\"\\t\"\n",
"except Exception as e:\n",
" print(e)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Corregimos los errores\n",
"Si hay errores, comprobamos los siguiente:\n",
"* Caracter separador.\n",
"* Caracter decimal.\n",
"* Codificación."
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" Catastro | \n",
" ReferenciaCP | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 106640 | \n",
" 087 2000501120UN02UN0UN | \n",
" 087 2000501120 | \n",
" 6105422 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
"
\n",
" \n",
" | 1 | \n",
" 205094 | \n",
" 033 2006900750UN040B0UN | \n",
" 033 2006900750 | \n",
" 6125687 | \n",
" N01 | \n",
" 2020-05-03 | \n",
" Servicio sin cuota | \n",
" 2020-03-19 00:00:00 | \n",
"
\n",
" \n",
" | 2 | \n",
" 21253 | \n",
" 008 0105902010UN060C0UN | \n",
" 008 0105902010 | \n",
" 6098841 | \n",
" N06 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-01-11 00:00:00 | \n",
"
\n",
" \n",
" | 3 | \n",
" 149999 | \n",
" 002 4802008896UN040C0UN | \n",
" 002 4802008896 | \n",
" 6115427 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-02-26 00:00:00 | \n",
"
\n",
" \n",
" | 4 | \n",
" 58564 | \n",
" 010 2007902403UN03CN0UN | \n",
" 010 2007902403 | \n",
" 6103115 | \n",
" N04 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-01-25 00:00:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 Catastro ReferenciaCP IdCliente Producto \\\n",
"0 106640 087 2000501120UN02UN0UN 087 2000501120 6105422 N01 \n",
"1 205094 033 2006900750UN040B0UN 033 2006900750 6125687 N01 \n",
"2 21253 008 0105902010UN060C0UN 008 0105902010 6098841 N06 \n",
"3 149999 002 4802008896UN040C0UN 002 4802008896 6115427 N06 \n",
"4 58564 010 2007902403UN03CN0UN 010 2007902403 6103115 N04 \n",
"\n",
" Fecha TipoProducto AltaCliente \n",
"0 2020-06-03 Servicio principal 2020-02-16 00:00:00 \n",
"1 2020-05-03 Servicio sin cuota 2020-03-19 00:00:00 \n",
"2 2020-03-03 Servicio principal 2020-01-11 00:00:00 \n",
"3 2020-05-03 Servicio principal 2020-02-26 00:00:00 \n",
"4 2020-05-03 Servicio principal 2020-01-25 00:00:00 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" Catastro | \n",
" ReferenciaCP | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 13582 | \n",
" 214734 | \n",
" 001 2004501209UNUNUNPOR | \n",
" 001 2004501209 | \n",
" 2239094 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-24 00:00:00 | \n",
"
\n",
" \n",
" | 13583 | \n",
" 175766 | \n",
" 001 4802009095UN050C0UN | \n",
" 001 4802009095 | \n",
" 6119594 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-03-07 00:00:00 | \n",
"
\n",
" \n",
" | 13584 | \n",
" 122530 | \n",
" 011 3120101590UN070C0UN | \n",
" 011 3120101590 | \n",
" 6111352 | \n",
" N06 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 Catastro ReferenciaCP IdCliente \\\n",
"13582 214734 001 2004501209UNUNUNPOR 001 2004501209 2239094 \n",
"13583 175766 001 4802009095UN050C0UN 001 4802009095 6119594 \n",
"13584 122530 011 3120101590UN070C0UN 011 3120101590 6111352 \n",
"\n",
" Producto Fecha TipoProducto AltaCliente \n",
"13582 N06 2020-04-03 Servicio principal 2020-03-24 00:00:00 \n",
"13583 N01 2020-06-03 Servicio principal 2020-03-07 00:00:00 \n",
"13584 N06 2020-06-03 Servicio principal 2020-02-16 00:00:00 "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_productos.tail(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exploración inicial"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 13585 entries, 0 to 13584\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Unnamed: 0 13585 non-null int64 \n",
" 1 Catastro 13573 non-null object\n",
" 2 ReferenciaCP 13585 non-null object\n",
" 3 IdCliente 13585 non-null int64 \n",
" 4 Producto 13585 non-null object\n",
" 5 Fecha 13585 non-null object\n",
" 6 TipoProducto 13585 non-null object\n",
" 7 AltaCliente 13585 non-null object\n",
"dtypes: int64(2), object(6)\n",
"memory usage: 849.2+ KB\n"
]
}
],
"source": [
"df_productos.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Corregimos los tipos de datos\n",
"Modificamos la carga de datos definiendo:\n",
"* Columnas que utilizar.\n",
"* Columnas que parsear a fecha.\n",
"* Tipo del resto de columnas.\n",
"* Eliminar columna 0"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"df['Catastro'] = df['Catastro'].astype('string')\n",
"df['IdCliente'] = df['IdCliente'].astype('string')\n",
"df['Producto'] = df['Producto'].astype('category')\n",
"df['TipoProducto'] = df['TipoProducto'].astype('category')"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 13585 entries, 0 to 13584\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Unnamed: 0 13585 non-null int64 \n",
" 1 Catastro 13573 non-null string \n",
" 2 ReferenciaCP 13585 non-null object \n",
" 3 IdCliente 13585 non-null string \n",
" 4 Producto 13585 non-null category\n",
" 5 Fecha 13585 non-null object \n",
" 6 TipoProducto 13585 non-null category\n",
" 7 AltaCliente 13585 non-null object \n",
"dtypes: category(2), int64(1), object(3), string(2)\n",
"memory usage: 663.8+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Corregimos el resto de errores\n",
"Si alguna columna no se ha modificado su tipo, puede ser porque contenga errores. Modificamos el tipo indicando que se ignoren los errores."
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"df['Fecha'] = pd.to_datetime(df['Fecha'], errors='coerce')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 13585 entries, 0 to 13584\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Unnamed: 0 13585 non-null int64 \n",
" 1 Catastro 13573 non-null string \n",
" 2 ReferenciaCP 13585 non-null object \n",
" 3 IdCliente 13585 non-null string \n",
" 4 Producto 13585 non-null category \n",
" 5 Fecha 13585 non-null datetime64[ns]\n",
" 6 TipoProducto 13585 non-null category \n",
" 7 AltaCliente 13585 non-null object \n",
"dtypes: category(2), datetime64[ns](1), int64(1), object(2), string(2)\n",
"memory usage: 663.8+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"df = df.drop('Unnamed: 0',axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exploración y modificación\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"# Renombramos columnas\n",
"# Catastro renombrar a CatastroMax\n",
"# ReferenciaCP renombrar a CPReferenciado\n",
"df = df.rename(columns={'Catastro':'CatastroMax','ReferenciaCP':'CPReferenciado'})"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 087 2000501120UN02UN0UN | \n",
" 087 2000501120 | \n",
" 6105422 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
"
\n",
" \n",
" | 1 | \n",
" 033 2006900750UN040B0UN | \n",
" 033 2006900750 | \n",
" 6125687 | \n",
" N01 | \n",
" 2020-05-03 | \n",
" Servicio sin cuota | \n",
" 2020-03-19 00:00:00 | \n",
"
\n",
" \n",
" | 2 | \n",
" 008 0105902010UN060C0UN | \n",
" 008 0105902010 | \n",
" 6098841 | \n",
" N06 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-01-11 00:00:00 | \n",
"
\n",
" \n",
" | 3 | \n",
" 002 4802008896UN040C0UN | \n",
" 002 4802008896 | \n",
" 6115427 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-02-26 00:00:00 | \n",
"
\n",
" \n",
" | 4 | \n",
" 010 2007902403UN03CN0UN | \n",
" 010 2007902403 | \n",
" 6103115 | \n",
" N04 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-01-25 00:00:00 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 13580 | \n",
" 005 4804400046UNPBUN001 | \n",
" 005 4804400046 | \n",
" 75816 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-14 00:00:00 | \n",
"
\n",
" \n",
" | 13581 | \n",
" 005 0103601575UN020C0UN | \n",
" 005 0103601575 | \n",
" 6109233 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-06 00:00:00 | \n",
"
\n",
" \n",
" | 13582 | \n",
" 001 2004501209UNUNUNPOR | \n",
" 001 2004501209 | \n",
" 2239094 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-24 00:00:00 | \n",
"
\n",
" \n",
" | 13583 | \n",
" 001 4802009095UN050C0UN | \n",
" 001 4802009095 | \n",
" 6119594 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-03-07 00:00:00 | \n",
"
\n",
" \n",
" | 13584 | \n",
" 011 3120101590UN070C0UN | \n",
" 011 3120101590 | \n",
" 6111352 | \n",
" N06 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
"
\n",
" \n",
"
\n",
"
13585 rows × 7 columns
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Producto Fecha \\\n",
"0 087 2000501120UN02UN0UN 087 2000501120 6105422 N01 2020-06-03 \n",
"1 033 2006900750UN040B0UN 033 2006900750 6125687 N01 2020-05-03 \n",
"2 008 0105902010UN060C0UN 008 0105902010 6098841 N06 2020-03-03 \n",
"3 002 4802008896UN040C0UN 002 4802008896 6115427 N06 2020-05-03 \n",
"4 010 2007902403UN03CN0UN 010 2007902403 6103115 N04 2020-05-03 \n",
"... ... ... ... ... ... \n",
"13580 005 4804400046UNPBUN001 005 4804400046 75816 N06 2020-04-03 \n",
"13581 005 0103601575UN020C0UN 005 0103601575 6109233 N01 2020-03-03 \n",
"13582 001 2004501209UNUNUNPOR 001 2004501209 2239094 N06 2020-04-03 \n",
"13583 001 4802009095UN050C0UN 001 4802009095 6119594 N01 2020-06-03 \n",
"13584 011 3120101590UN070C0UN 011 3120101590 6111352 N06 2020-06-03 \n",
"\n",
" TipoProducto AltaCliente \n",
"0 Servicio principal 2020-02-16 00:00:00 \n",
"1 Servicio sin cuota 2020-03-19 00:00:00 \n",
"2 Servicio principal 2020-01-11 00:00:00 \n",
"3 Servicio principal 2020-02-26 00:00:00 \n",
"4 Servicio principal 2020-01-25 00:00:00 \n",
"... ... ... \n",
"13580 Servicio principal 2020-03-14 00:00:00 \n",
"13581 Servicio sin cuota 2020-02-06 00:00:00 \n",
"13582 Servicio principal 2020-03-24 00:00:00 \n",
"13583 Servicio principal 2020-03-07 00:00:00 \n",
"13584 Servicio principal 2020-02-16 00:00:00 \n",
"\n",
"[13585 rows x 7 columns]"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
" Producto_N01 | \n",
" Producto_N03 | \n",
" Producto_N04 | \n",
" Producto_N06 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 087 2000501120UN02UN0UN | \n",
" 087 2000501120 | \n",
" 6105422 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 033 2006900750UN040B0UN | \n",
" 033 2006900750 | \n",
" 6125687 | \n",
" 2020-05-03 | \n",
" Servicio sin cuota | \n",
" 2020-03-19 00:00:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 008 0105902010UN060C0UN | \n",
" 008 0105902010 | \n",
" 6098841 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-01-11 00:00:00 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 3 | \n",
" 002 4802008896UN040C0UN | \n",
" 002 4802008896 | \n",
" 6115427 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-02-26 00:00:00 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" 010 2007902403UN03CN0UN | \n",
" 010 2007902403 | \n",
" 6103115 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-01-25 00:00:00 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Fecha \\\n",
"0 087 2000501120UN02UN0UN 087 2000501120 6105422 2020-06-03 \n",
"1 033 2006900750UN040B0UN 033 2006900750 6125687 2020-05-03 \n",
"2 008 0105902010UN060C0UN 008 0105902010 6098841 2020-03-03 \n",
"3 002 4802008896UN040C0UN 002 4802008896 6115427 2020-05-03 \n",
"4 010 2007902403UN03CN0UN 010 2007902403 6103115 2020-05-03 \n",
"\n",
" TipoProducto AltaCliente Producto_N01 Producto_N03 \\\n",
"0 Servicio principal 2020-02-16 00:00:00 1 0 \n",
"1 Servicio sin cuota 2020-03-19 00:00:00 1 0 \n",
"2 Servicio principal 2020-01-11 00:00:00 0 0 \n",
"3 Servicio principal 2020-02-26 00:00:00 0 0 \n",
"4 Servicio principal 2020-01-25 00:00:00 0 0 \n",
"\n",
" Producto_N04 Producto_N06 \n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 1 \n",
"3 0 1 \n",
"4 1 0 "
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Generar variables dummies\n",
"df_dummies_Producto = pd.get_dummies(df, columns=['Producto'])\n",
"df_dummies_Producto.head()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 13585 entries, 0 to 13584\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CatastroMax 13573 non-null string \n",
" 1 CPReferenciado 13585 non-null object \n",
" 2 IdCliente 13585 non-null string \n",
" 3 Fecha 13585 non-null datetime64[ns]\n",
" 4 TipoProducto 13585 non-null category \n",
" 5 AltaCliente 13585 non-null object \n",
" 6 Producto_N01 13585 non-null uint8 \n",
" 7 Producto_N03 13585 non-null uint8 \n",
" 8 Producto_N04 13585 non-null uint8 \n",
" 9 Producto_N06 13585 non-null uint8 \n",
" 10 Facturacion 13585 non-null float64 \n",
"dtypes: category(1), datetime64[ns](1), float64(1), object(2), string(2), uint8(4)\n",
"memory usage: 703.4+ KB\n"
]
}
],
"source": [
"df_dummies_Producto.info()"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"# Voy a generar una columna con supuestos datos de facturación\n",
"df_dummies_Producto ['Facturacion'] = df_dummies_Producto['IdCliente'].astype('int') / 40 * 1.3"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
" Producto_N01 | \n",
" Producto_N03 | \n",
" Producto_N04 | \n",
" Producto_N06 | \n",
" Facturacion | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 087 2000501120UN02UN0UN | \n",
" 087 2000501120 | \n",
" 6105422 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 00:00:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 198426.2150 | \n",
"
\n",
" \n",
" | 1 | \n",
" 033 2006900750UN040B0UN | \n",
" 033 2006900750 | \n",
" 6125687 | \n",
" 2020-05-03 | \n",
" Servicio sin cuota | \n",
" 2020-03-19 00:00:00 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 199084.8275 | \n",
"
\n",
" \n",
" | 2 | \n",
" 008 0105902010UN060C0UN | \n",
" 008 0105902010 | \n",
" 6098841 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-01-11 00:00:00 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 198212.3325 | \n",
"
\n",
" \n",
" | 3 | \n",
" 002 4802008896UN040C0UN | \n",
" 002 4802008896 | \n",
" 6115427 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-02-26 00:00:00 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 198751.3775 | \n",
"
\n",
" \n",
" | 4 | \n",
" 010 2007902403UN03CN0UN | \n",
" 010 2007902403 | \n",
" 6103115 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-01-25 00:00:00 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 198351.2375 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Fecha \\\n",
"0 087 2000501120UN02UN0UN 087 2000501120 6105422 2020-06-03 \n",
"1 033 2006900750UN040B0UN 033 2006900750 6125687 2020-05-03 \n",
"2 008 0105902010UN060C0UN 008 0105902010 6098841 2020-03-03 \n",
"3 002 4802008896UN040C0UN 002 4802008896 6115427 2020-05-03 \n",
"4 010 2007902403UN03CN0UN 010 2007902403 6103115 2020-05-03 \n",
"\n",
" TipoProducto AltaCliente Producto_N01 Producto_N03 \\\n",
"0 Servicio principal 2020-02-16 00:00:00 1 0 \n",
"1 Servicio sin cuota 2020-03-19 00:00:00 1 0 \n",
"2 Servicio principal 2020-01-11 00:00:00 0 0 \n",
"3 Servicio principal 2020-02-26 00:00:00 0 0 \n",
"4 Servicio principal 2020-01-25 00:00:00 0 0 \n",
"\n",
" Producto_N04 Producto_N06 Facturacion \n",
"0 0 0 198426.2150 \n",
"1 0 0 199084.8275 \n",
"2 0 1 198212.3325 \n",
"3 0 1 198751.3775 \n",
"4 1 0 198351.2375 "
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_dummies_Producto.head()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" IdCliente | \n",
" Fecha | \n",
" Producto_N01 | \n",
" Producto_N03 | \n",
" Producto_N04 | \n",
" Producto_N06 | \n",
" Facturacion | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 100787 | \n",
" 2020-05-03 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 3275.5775 | \n",
"
\n",
" \n",
" | 1 | \n",
" 101376 | \n",
" 2020-05-03 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 3294.7200 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1018161 | \n",
" 2020-05-03 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 33090.2325 | \n",
"
\n",
" \n",
" | 3 | \n",
" 103899 | \n",
" 2020-05-03 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 3376.7175 | \n",
"
\n",
" \n",
" | 4 | \n",
" 103899 | \n",
" 2020-06-03 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 3376.7175 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" IdCliente Fecha Producto_N01 Producto_N03 Producto_N04 \\\n",
"0 100787 2020-05-03 0 0 0 \n",
"1 101376 2020-05-03 0 0 0 \n",
"2 1018161 2020-05-03 0 0 0 \n",
"3 103899 2020-05-03 0 0 1 \n",
"4 103899 2020-06-03 0 0 1 \n",
"\n",
" Producto_N06 Facturacion \n",
"0 1 3275.5775 \n",
"1 1 3294.7200 \n",
"2 1 33090.2325 \n",
"3 0 3376.7175 \n",
"4 0 3376.7175 "
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_productos_cliente = df_dummies_Producto.groupby(['IdCliente', 'Fecha']).sum().reset_index()\n",
"df_productos_cliente.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Consultas a dataframes\n",
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Contrataciones del mes de marzo"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 7 | \n",
" 005A2007401640UN030C0UN | \n",
" 005A2007401640 | \n",
" 2305530 | \n",
" N04 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-02-12 00:00:00 | \n",
"
\n",
" \n",
" | 8 | \n",
" 027 0100202420UN020B0UN | \n",
" 027 0100202420 | \n",
" 6097637 | \n",
" N04 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-01-09 00:00:00 | \n",
"
\n",
" \n",
" | 11 | \n",
" 004 2003201842UN040A0UN | \n",
" 004 2003201842 | \n",
" 6122564 | \n",
" N04 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-18 00:00:00 | \n",
"
\n",
" \n",
" | 15 | \n",
" 002A4804300015UN02DR0UN | \n",
" 002A4804300015 | \n",
" 6102122 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-01-29 00:00:00 | \n",
"
\n",
" \n",
" | 17 | \n",
" 008 4802007880UN060C0UN | \n",
" 008 4802007880 | \n",
" 6125473 | \n",
" N01 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-20 00:00:00 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 13575 | \n",
" 005 3190300006UN03DR0UN | \n",
" 005 3190300006 | \n",
" 6097411 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-01-16 00:00:00 | \n",
"
\n",
" \n",
" | 13577 | \n",
" 004 4890200004UN020E0UN | \n",
" 004 4890200004 | \n",
" 6106187 | \n",
" N04 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-02-06 00:00:00 | \n",
"
\n",
" \n",
" | 13579 | \n",
" 012 0105902035UN04DR0UN | \n",
" 012 0105902035 | \n",
" 486645 | \n",
" N04 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-01-20 00:00:00 | \n",
"
\n",
" \n",
" | 13580 | \n",
" 005 4804400046UNPBUN001 | \n",
" 005 4804400046 | \n",
" 75816 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-14 00:00:00 | \n",
"
\n",
" \n",
" | 13582 | \n",
" 001 2004501209UNUNUNPOR | \n",
" 001 2004501209 | \n",
" 2239094 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-24 00:00:00 | \n",
"
\n",
" \n",
"
\n",
"
2955 rows × 7 columns
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Producto Fecha \\\n",
"7 005A2007401640UN030C0UN 005A2007401640 2305530 N04 2020-04-03 \n",
"8 027 0100202420UN020B0UN 027 0100202420 6097637 N04 2020-04-03 \n",
"11 004 2003201842UN040A0UN 004 2003201842 6122564 N04 2020-04-03 \n",
"15 002A4804300015UN02DR0UN 002A4804300015 6102122 N06 2020-04-03 \n",
"17 008 4802007880UN060C0UN 008 4802007880 6125473 N01 2020-04-03 \n",
"... ... ... ... ... ... \n",
"13575 005 3190300006UN03DR0UN 005 3190300006 6097411 N06 2020-04-03 \n",
"13577 004 4890200004UN020E0UN 004 4890200004 6106187 N04 2020-04-03 \n",
"13579 012 0105902035UN04DR0UN 012 0105902035 486645 N04 2020-04-03 \n",
"13580 005 4804400046UNPBUN001 005 4804400046 75816 N06 2020-04-03 \n",
"13582 001 2004501209UNUNUNPOR 001 2004501209 2239094 N06 2020-04-03 \n",
"\n",
" TipoProducto AltaCliente \n",
"7 Servicio principal 2020-02-12 00:00:00 \n",
"8 Servicio principal 2020-01-09 00:00:00 \n",
"11 Servicio principal 2020-03-18 00:00:00 \n",
"15 Servicio principal 2020-01-29 00:00:00 \n",
"17 Servicio principal 2020-03-20 00:00:00 \n",
"... ... ... \n",
"13575 Servicio principal 2020-01-16 00:00:00 \n",
"13577 Servicio principal 2020-02-06 00:00:00 \n",
"13579 Servicio principal 2020-01-20 00:00:00 \n",
"13580 Servicio principal 2020-03-14 00:00:00 \n",
"13582 Servicio principal 2020-03-24 00:00:00 \n",
"\n",
"[2955 rows x 7 columns]"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('Fecha.dt.month == 4')"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "Can only use .dt accessor with datetimelike values",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_19684/3407498302.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'AltaCliente.dt.day <= 10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mquery\u001b[0;34m(self, expr, inplace, **kwargs)\u001b[0m\n\u001b[1;32m 4058\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"level\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"level\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4059\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"target\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4060\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4062\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36meval\u001b[0;34m(self, expr, inplace, **kwargs)\u001b[0m\n\u001b[1;32m 4189\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"resolvers\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"resolvers\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolvers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4191\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_eval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4192\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4193\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mselect_dtypes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minclude\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexclude\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/eval.py\u001b[0m in \u001b[0;36meval\u001b[0;34m(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)\u001b[0m\n\u001b[1;32m 346\u001b[0m )\n\u001b[1;32m 347\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 348\u001b[0;31m \u001b[0mparsed_expr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExpr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparser\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 349\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 350\u001b[0m \u001b[0;31m# construct the engine and evaluate the parsed expression\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, expr, engine, parser, env, level)\u001b[0m\n\u001b[1;32m 804\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 805\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_visitor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPARSERS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mparser\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparser\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 806\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mterms\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 807\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 823\u001b[0m \u001b[0mParse\u001b[0m \u001b[0man\u001b[0m \u001b[0mexpression\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 824\u001b[0m \"\"\"\n\u001b[0;32m--> 825\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_visitor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 826\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 827\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"visit_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mvisitor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit_Module\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mSyntaxError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"only a single expression is allowed\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[0mexpr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 417\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 418\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Expr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"visit_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mvisitor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit_Expr\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Expr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 420\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 422\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_rewrite_membership_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"visit_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mvisitor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit_Compare\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 716\u001b[0m \u001b[0mop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtranslate_In\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mops\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 717\u001b[0m \u001b[0mbinop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mBinOp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcomps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 718\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbinop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 719\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 720\u001b[0m \u001b[0;31m# recursive case: we have a chained comparison, a CMP b CMP c, etc.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"visit_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mvisitor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit_BinOp\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 531\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_BinOp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m \u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_class\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_transform_eq_ne\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 533\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_downcast_constants\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_evaluate_binop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_class\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36m_maybe_transform_eq_ne\u001b[0;34m(self, node, left, right)\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_maybe_transform_eq_ne\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mleft\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 452\u001b[0;31m \u001b[0mleft\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mside\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"left\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 453\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mright\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[0mright\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mside\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"right\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"visit_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mvisitor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit_Attribute\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 639\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLoad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 640\u001b[0m \u001b[0;31m# resolve the value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 641\u001b[0;31m \u001b[0mresolved\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 642\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 643\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolved\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"visit_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mvisitor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mvisit_Module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expr.py\u001b[0m in \u001b[0;36mvisit_Attribute\u001b[0;34m(self, node, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0mresolved\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvisit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolved\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_tmp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mterm_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 5485\u001b[0m ):\n\u001b[1;32m 5486\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/accessor.py\u001b[0m in \u001b[0;36m__get__\u001b[0;34m(self, obj, cls)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;31m# we're accessing the attribute of the class, i.e., Dataset.geo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_accessor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m \u001b[0maccessor_obj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_accessor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0;31m# Replace the property with the accessor object. Inspired by:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;31m# https://www.pydanny.com/cached-property.html\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/pandas/core/indexes/accessors.py\u001b[0m in \u001b[0;36m__new__\u001b[0;34m(cls, data)\u001b[0m\n\u001b[1;32m 504\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mPeriodProperties\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 505\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 506\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Can only use .dt accessor with datetimelike values\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: Can only use .dt accessor with datetimelike values"
]
}
],
"source": [
"df.query('AltaCliente.dt.day <= 10')\n",
"#No funciona pq AltaCliente no es de tipo datetime\n"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"df['AltaCliente'] = pd.to_datetime(df['AltaCliente'], errors='coerce')"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 13585 entries, 0 to 13584\n",
"Data columns (total 7 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 CatastroMax 13573 non-null string \n",
" 1 CPReferenciado 13585 non-null object \n",
" 2 IdCliente 13585 non-null string \n",
" 3 Producto 13585 non-null category \n",
" 4 Fecha 13585 non-null datetime64[ns]\n",
" 5 TipoProducto 13585 non-null category \n",
" 6 AltaCliente 13585 non-null datetime64[ns]\n",
"dtypes: category(2), datetime64[ns](2), object(1), string(2)\n",
"memory usage: 557.6+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 3 | \n",
" 002 4802008896UN040C0UN | \n",
" 002 4802008896 | \n",
" 6115427 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-02-26 | \n",
"
\n",
" \n",
" | 4 | \n",
" 010 2007902403UN03CN0UN | \n",
" 010 2007902403 | \n",
" 6103115 | \n",
" N04 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-01-25 | \n",
"
\n",
" \n",
" | 5 | \n",
" 003 2090200600UN010C0UN | \n",
" 003 2090200600 | \n",
" 6118145 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-03-01 | \n",
"
\n",
" \n",
" | 8 | \n",
" 027 0100202420UN020B0UN | \n",
" 027 0100202420 | \n",
" 6097637 | \n",
" N04 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-01-09 | \n",
"
\n",
" \n",
" | 12 | \n",
" 008 4807800250UN040C0UN | \n",
" 008 4807800250 | \n",
" 409451 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-04-26 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 13578 | \n",
" 008 2608400309UN040A0UN | \n",
" 008 2608400309 | \n",
" 6101386 | \n",
" N06 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-02-07 | \n",
"
\n",
" \n",
" | 13579 | \n",
" 012 0105902035UN04DR0UN | \n",
" 012 0105902035 | \n",
" 486645 | \n",
" N04 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-01-20 | \n",
"
\n",
" \n",
" | 13581 | \n",
" 005 0103601575UN020C0UN | \n",
" 005 0103601575 | \n",
" 6109233 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-06 | \n",
"
\n",
" \n",
" | 13582 | \n",
" 001 2004501209UNUNUNPOR | \n",
" 001 2004501209 | \n",
" 2239094 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-24 | \n",
"
\n",
" \n",
" | 13583 | \n",
" 001 4802009095UN050C0UN | \n",
" 001 4802009095 | \n",
" 6119594 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-03-07 | \n",
"
\n",
" \n",
"
\n",
"
9498 rows × 7 columns
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Producto Fecha \\\n",
"3 002 4802008896UN040C0UN 002 4802008896 6115427 N06 2020-05-03 \n",
"4 010 2007902403UN03CN0UN 010 2007902403 6103115 N04 2020-05-03 \n",
"5 003 2090200600UN010C0UN 003 2090200600 6118145 N06 2020-05-03 \n",
"8 027 0100202420UN020B0UN 027 0100202420 6097637 N04 2020-04-03 \n",
"12 008 4807800250UN040C0UN 008 4807800250 409451 N06 2020-05-03 \n",
"... ... ... ... ... ... \n",
"13578 008 2608400309UN040A0UN 008 2608400309 6101386 N06 2020-03-03 \n",
"13579 012 0105902035UN04DR0UN 012 0105902035 486645 N04 2020-04-03 \n",
"13581 005 0103601575UN020C0UN 005 0103601575 6109233 N01 2020-03-03 \n",
"13582 001 2004501209UNUNUNPOR 001 2004501209 2239094 N06 2020-04-03 \n",
"13583 001 4802009095UN050C0UN 001 4802009095 6119594 N01 2020-06-03 \n",
"\n",
" TipoProducto AltaCliente \n",
"3 Servicio principal 2020-02-26 \n",
"4 Servicio principal 2020-01-25 \n",
"5 Servicio principal 2020-03-01 \n",
"8 Servicio principal 2020-01-09 \n",
"12 Servicio principal 2020-04-26 \n",
"... ... ... \n",
"13578 Servicio principal 2020-02-07 \n",
"13579 Servicio principal 2020-01-20 \n",
"13581 Servicio sin cuota 2020-02-06 \n",
"13582 Servicio principal 2020-03-24 \n",
"13583 Servicio principal 2020-03-07 \n",
"\n",
"[9498 rows x 7 columns]"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('AltaCliente.dt.day <= 10 or AltaCliente.dt.day>=20')"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"### Consultas más complejas"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 13 | \n",
" 016 0819600232UNLOUN001 | \n",
" 016 0819600232 | \n",
" 6106541 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-02 | \n",
"
\n",
" \n",
" | 27 | \n",
" 003 4801500208UN01IZ0UN | \n",
" 003 4801500208 | \n",
" 6106500 | \n",
" N01 | \n",
" 2020-04-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-01 | \n",
"
\n",
" \n",
" | 34 | \n",
" 051 4802004630UN04IZ0UN | \n",
" 051 4802004630 | \n",
" 6119105 | \n",
" N01 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-03-07 | \n",
"
\n",
" \n",
" | 44 | \n",
" 002 2001901250UN01UN0UN | \n",
" 002 2001901250 | \n",
" 6115892 | \n",
" N01 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-08 | \n",
"
\n",
" \n",
" | 53 | \n",
" 167 0818000511E101UN003 | \n",
" 167 0818000511 | \n",
" 6116767 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-03-01 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 13542 | \n",
" 014 0105904530UN05DR0UN | \n",
" 014 0105904530 | \n",
" 6109525 | \n",
" N01 | \n",
" 2020-04-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-07 | \n",
"
\n",
" \n",
" | 13544 | \n",
" 009 4808200107UN010A0UN | \n",
" 009 4808200107 | \n",
" 6105932 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-01 | \n",
"
\n",
" \n",
" | 13570 | \n",
" 003 2008001350UN040H0UN | \n",
" 003 2008001350 | \n",
" 6103683 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-02-01 | \n",
"
\n",
" \n",
" | 13581 | \n",
" 005 0103601575UN020C0UN | \n",
" 005 0103601575 | \n",
" 6109233 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-06 | \n",
"
\n",
" \n",
" | 13583 | \n",
" 001 4802009095UN050C0UN | \n",
" 001 4802009095 | \n",
" 6119594 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-03-07 | \n",
"
\n",
" \n",
"
\n",
"
1279 rows × 7 columns
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Producto Fecha \\\n",
"13 016 0819600232UNLOUN001 016 0819600232 6106541 N01 2020-06-03 \n",
"27 003 4801500208UN01IZ0UN 003 4801500208 6106500 N01 2020-04-03 \n",
"34 051 4802004630UN04IZ0UN 051 4802004630 6119105 N01 2020-05-03 \n",
"44 002 2001901250UN01UN0UN 002 2001901250 6115892 N01 2020-04-03 \n",
"53 167 0818000511E101UN003 167 0818000511 6116767 N01 2020-03-03 \n",
"... ... ... ... ... ... \n",
"13542 014 0105904530UN05DR0UN 014 0105904530 6109525 N01 2020-04-03 \n",
"13544 009 4808200107UN010A0UN 009 4808200107 6105932 N01 2020-06-03 \n",
"13570 003 2008001350UN040H0UN 003 2008001350 6103683 N01 2020-03-03 \n",
"13581 005 0103601575UN020C0UN 005 0103601575 6109233 N01 2020-03-03 \n",
"13583 001 4802009095UN050C0UN 001 4802009095 6119594 N01 2020-06-03 \n",
"\n",
" TipoProducto AltaCliente \n",
"13 Servicio principal 2020-02-02 \n",
"27 Servicio sin cuota 2020-02-01 \n",
"34 Servicio principal 2020-03-07 \n",
"44 Servicio principal 2020-03-08 \n",
"53 Servicio principal 2020-03-01 \n",
"... ... ... \n",
"13542 Servicio sin cuota 2020-02-07 \n",
"13544 Servicio principal 2020-02-01 \n",
"13570 Servicio principal 2020-02-01 \n",
"13581 Servicio sin cuota 2020-02-06 \n",
"13583 Servicio principal 2020-03-07 \n",
"\n",
"[1279 rows x 7 columns]"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"producto = 'N01'\n",
"df.query('AltaCliente.dt.day <= 10 and Producto == @producto')"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
"
\n",
" \n",
" \n",
" \n",
" | 13 | \n",
" 016 0819600232UNLOUN001 | \n",
" 016 0819600232 | \n",
" 6106541 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-02 | \n",
"
\n",
" \n",
" | 27 | \n",
" 003 4801500208UN01IZ0UN | \n",
" 003 4801500208 | \n",
" 6106500 | \n",
" N01 | \n",
" 2020-04-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-01 | \n",
"
\n",
" \n",
" | 33 | \n",
" 014B2512000200UN03UN004 | \n",
" 014B2512000200 | \n",
" 6110921 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-10 | \n",
"
\n",
" \n",
" | 34 | \n",
" 051 4802004630UN04IZ0UN | \n",
" 051 4802004630 | \n",
" 6119105 | \n",
" N01 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-03-07 | \n",
"
\n",
" \n",
" | 44 | \n",
" 002 2001901250UN01UN0UN | \n",
" 002 2001901250 | \n",
" 6115892 | \n",
" N01 | \n",
" 2020-04-03 | \n",
" Servicio principal | \n",
" 2020-03-08 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 13554 | \n",
" 196 0801901015UN04UN001 | \n",
" 196 0801901015 | \n",
" 6098519 | \n",
" N06 | \n",
" 2020-04-03 | \n",
" Servicio sin cuota | \n",
" 2020-01-10 | \n",
"
\n",
" \n",
" | 13558 | \n",
" 421 0801900368UN050B0UN | \n",
" 421 0801900368 | \n",
" 6139655 | \n",
" N06 | \n",
" 2020-06-03 | \n",
" Servicio sin cuota | \n",
" 2020-05-07 | \n",
"
\n",
" \n",
" | 13570 | \n",
" 003 2008001350UN040H0UN | \n",
" 003 2008001350 | \n",
" 6103683 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-02-01 | \n",
"
\n",
" \n",
" | 13581 | \n",
" 005 0103601575UN020C0UN | \n",
" 005 0103601575 | \n",
" 6109233 | \n",
" N01 | \n",
" 2020-03-03 | \n",
" Servicio sin cuota | \n",
" 2020-02-06 | \n",
"
\n",
" \n",
" | 13583 | \n",
" 001 4802009095UN050C0UN | \n",
" 001 4802009095 | \n",
" 6119594 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-03-07 | \n",
"
\n",
" \n",
"
\n",
"
1598 rows × 7 columns
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Producto Fecha \\\n",
"13 016 0819600232UNLOUN001 016 0819600232 6106541 N01 2020-06-03 \n",
"27 003 4801500208UN01IZ0UN 003 4801500208 6106500 N01 2020-04-03 \n",
"33 014B2512000200UN03UN004 014B2512000200 6110921 N06 2020-05-03 \n",
"34 051 4802004630UN04IZ0UN 051 4802004630 6119105 N01 2020-05-03 \n",
"44 002 2001901250UN01UN0UN 002 2001901250 6115892 N01 2020-04-03 \n",
"... ... ... ... ... ... \n",
"13554 196 0801901015UN04UN001 196 0801901015 6098519 N06 2020-04-03 \n",
"13558 421 0801900368UN050B0UN 421 0801900368 6139655 N06 2020-06-03 \n",
"13570 003 2008001350UN040H0UN 003 2008001350 6103683 N01 2020-03-03 \n",
"13581 005 0103601575UN020C0UN 005 0103601575 6109233 N01 2020-03-03 \n",
"13583 001 4802009095UN050C0UN 001 4802009095 6119594 N01 2020-06-03 \n",
"\n",
" TipoProducto AltaCliente \n",
"13 Servicio principal 2020-02-02 \n",
"27 Servicio sin cuota 2020-02-01 \n",
"33 Servicio sin cuota 2020-02-10 \n",
"34 Servicio principal 2020-03-07 \n",
"44 Servicio principal 2020-03-08 \n",
"... ... ... \n",
"13554 Servicio sin cuota 2020-01-10 \n",
"13558 Servicio sin cuota 2020-05-07 \n",
"13570 Servicio principal 2020-02-01 \n",
"13581 Servicio sin cuota 2020-02-06 \n",
"13583 Servicio principal 2020-03-07 \n",
"\n",
"[1598 rows x 7 columns]"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"producto = 'N01'\n",
"tipoprod = 'Servicio sin cuota'\n",
"df.query('AltaCliente.dt.day <= 10 and (Producto == @producto or TipoProducto == @tipoprod)')"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" CatastroMax | \n",
" CPReferenciado | \n",
" IdCliente | \n",
" Producto | \n",
" Fecha | \n",
" TipoProducto | \n",
" AltaCliente | \n",
" Provincia | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 087 2000501120UN02UN0UN | \n",
" 087 2000501120 | \n",
" 6105422 | \n",
" N01 | \n",
" 2020-06-03 | \n",
" Servicio principal | \n",
" 2020-02-16 | \n",
" 20 | \n",
"
\n",
" \n",
" | 1 | \n",
" 033 2006900750UN040B0UN | \n",
" 033 2006900750 | \n",
" 6125687 | \n",
" N01 | \n",
" 2020-05-03 | \n",
" Servicio sin cuota | \n",
" 2020-03-19 | \n",
" 20 | \n",
"
\n",
" \n",
" | 2 | \n",
" 008 0105902010UN060C0UN | \n",
" 008 0105902010 | \n",
" 6098841 | \n",
" N06 | \n",
" 2020-03-03 | \n",
" Servicio principal | \n",
" 2020-01-11 | \n",
" 01 | \n",
"
\n",
" \n",
" | 3 | \n",
" 002 4802008896UN040C0UN | \n",
" 002 4802008896 | \n",
" 6115427 | \n",
" N06 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-02-26 | \n",
" 48 | \n",
"
\n",
" \n",
" | 4 | \n",
" 010 2007902403UN03CN0UN | \n",
" 010 2007902403 | \n",
" 6103115 | \n",
" N04 | \n",
" 2020-05-03 | \n",
" Servicio principal | \n",
" 2020-01-25 | \n",
" 20 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" CatastroMax CPReferenciado IdCliente Producto Fecha \\\n",
"0 087 2000501120UN02UN0UN 087 2000501120 6105422 N01 2020-06-03 \n",
"1 033 2006900750UN040B0UN 033 2006900750 6125687 N01 2020-05-03 \n",
"2 008 0105902010UN060C0UN 008 0105902010 6098841 N06 2020-03-03 \n",
"3 002 4802008896UN040C0UN 002 4802008896 6115427 N06 2020-05-03 \n",
"4 010 2007902403UN03CN0UN 010 2007902403 6103115 N04 2020-05-03 \n",
"\n",
" TipoProducto AltaCliente Provincia \n",
"0 Servicio principal 2020-02-16 20 \n",
"1 Servicio sin cuota 2020-03-19 20 \n",
"2 Servicio principal 2020-01-11 01 \n",
"3 Servicio principal 2020-02-26 48 \n",
"4 Servicio principal 2020-01-25 20 "
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Extraemos del CatastroMax el código de área\n",
"# Para ello eliminamos primero todos los espacios y posteriormente extraemos el texto de las posiciones 3 a 4\n",
"df['Provincia'] = df['CatastroMax'].str.replace(\" \",\"\")\n",
"df['Provincia'] = df['Provincia'].str.slice(3,5)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Clientes cuya antiguedad sea anterior al 2010"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Facturación media del mes de abril del 2020"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Facturación maxima y minima para Producto N06 de los clientes que no pertenezcan a Bizkaia"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### ¿Influye la antigüedad en el gasto medio? Usa df_productos_cliente ya que incluye una columna de Facturación"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}