{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "from scipy.odr import *\n", "from scipy.stats import *\n", "import numpy as np\n", "import pandas as pd\n", "import os\n", "import time\n", "import matplotlib.pyplot as plt\n", "from multiprocessing import Pool" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def cast_to_int(row):\n", " try:\n", " return np.array([a if float(a) >= 0 else 0 for a in row[2:-1]], dtype=np.uint8)\n", " except Exception as e:\n", " return None\n", " \n", "def load_csv(file):\n", " temp_df = pd.read_csv(file, header=None, names = [\"UserID\", \"Age\", \"Gender\"], delimiter=\";\")\n", " return temp_df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 298 ms, sys: 443 ms, total: 741 ms\n", "Wall time: 937 ms\n" ] } ], "source": [ "%%time\n", "pool = Pool(os.cpu_count() - 2)\n", "data_files = [\"DataStudyCollection/%s\" % file for file in os.listdir(\"DataStudyCollection\") if file.endswith(\".csv\") and \"userData\" in file]\n", "df_lst = pool.map(load_csv, data_files)\n", "dfAll = pd.concat(df_lst)\n", "dfAll = dfAll.sort_values(\"UserID\")\n", "dfAll = dfAll.reset_index(drop=True)\n", "pool.close()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "24.166666666666668" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfAll.Age.mean()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.4245742398014511" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfAll.Age.std()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "21" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfAll.Age.min()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "26" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfAll.Age.max()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
UserIDAgeGender
0123male
1224male
2325male
3425male
4526male
5623male
6721female
7824male
8924male
91024male
101125female
111226male
121322female
131424male
141524male
151626female
161726male
171823male
\n", "
" ], "text/plain": [ " UserID Age Gender\n", "0 1 23 male\n", "1 2 24 male\n", "2 3 25 male\n", "3 4 25 male\n", "4 5 26 male\n", "5 6 23 male\n", "6 7 21 female\n", "7 8 24 male\n", "8 9 24 male\n", "9 10 24 male\n", "10 11 25 female\n", "11 12 26 male\n", "12 13 22 female\n", "13 14 24 male\n", "14 15 24 male\n", "15 16 26 female\n", "16 17 26 male\n", "17 18 23 male" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfAll" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }