{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Learning Practice 2 for the University of Tulsa's QM-7063 Data Mining Course\n", "# Dimension Reduction\n", "# Professor: Dr. Abdulrashid, Spring 2023\n", "# Noah L. Schrick - 1492657\n", "\n", "import heapq\n", "from collections import defaultdict\n", "\n", "import pandas as pd\n", "import matplotlib.pylab as plt\n", "from mlxtend.frequent_patterns import apriori\n", "from mlxtend.frequent_patterns import association_rules\n", "\n", "from surprise import Dataset, Reader, KNNBasic\n", "from surprise.model_selection import train_test_split\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Problem 14.1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Problem 14.3" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "\"None of [Index(['userID', 'itemID', 'rating'], dtype='object')] are in the [columns]\"", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[3], line 18\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[39m# Convert the data set into the format required by the surprise package\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[39m# The columns must correspond to user id, item id and ratings (in that order)\u001b[39;00m\n\u001b[1;32m 17\u001b[0m reader \u001b[39m=\u001b[39m Reader(rating_scale\u001b[39m=\u001b[39m(\u001b[39m1\u001b[39m, \u001b[39m5\u001b[39m))\n\u001b[0;32m---> 18\u001b[0m data \u001b[39m=\u001b[39m Dataset\u001b[39m.\u001b[39mload_from_df(courses_df[[\u001b[39m'\u001b[39;49m\u001b[39muserID\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mitemID\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mrating\u001b[39;49m\u001b[39m'\u001b[39;49m]], reader)\n\u001b[1;32m 20\u001b[0m \u001b[39m# Split into training and test set\u001b[39;00m\n\u001b[1;32m 21\u001b[0m trainset, testset \u001b[39m=\u001b[39m train_test_split(data, test_size\u001b[39m=\u001b[39m\u001b[39m.25\u001b[39m, random_state\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/frame.py:3811\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3809\u001b[0m \u001b[39mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m 3810\u001b[0m key \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(key)\n\u001b[0;32m-> 3811\u001b[0m indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcolumns\u001b[39m.\u001b[39;49m_get_indexer_strict(key, \u001b[39m\"\u001b[39;49m\u001b[39mcolumns\u001b[39;49m\u001b[39m\"\u001b[39;49m)[\u001b[39m1\u001b[39m]\n\u001b[1;32m 3813\u001b[0m \u001b[39m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m 3814\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mgetattr\u001b[39m(indexer, \u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39m==\u001b[39m \u001b[39mbool\u001b[39m:\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:6113\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 6110\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 6111\u001b[0m keyarr, indexer, new_indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 6113\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_raise_if_missing(keyarr, indexer, axis_name)\n\u001b[1;32m 6115\u001b[0m keyarr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtake(indexer)\n\u001b[1;32m 6116\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, Index):\n\u001b[1;32m 6117\u001b[0m \u001b[39m# GH 42790 - Preserve name from an Index\u001b[39;00m\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:6173\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 6171\u001b[0m \u001b[39mif\u001b[39;00m use_interval_msg:\n\u001b[1;32m 6172\u001b[0m key \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(key)\n\u001b[0;32m-> 6173\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNone of [\u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m] are in the [\u001b[39m\u001b[39m{\u001b[39;00maxis_name\u001b[39m}\u001b[39;00m\u001b[39m]\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 6175\u001b[0m not_found \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[39m.\u001b[39mnonzero()[\u001b[39m0\u001b[39m]]\u001b[39m.\u001b[39munique())\n\u001b[1;32m 6176\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mnot_found\u001b[39m}\u001b[39;00m\u001b[39m not in index\u001b[39m\u001b[39m\"\u001b[39m)\n", "\u001b[0;31mKeyError\u001b[0m: \"None of [Index(['userID', 'itemID', 'rating'], dtype='object')] are in the [columns]\"" ] } ], "source": [ "## Read in Course Topics data\n", "courses_df = pd.read_csv('Coursetopics.csv')\n", "\n", "reader = Reader(rating_scale=(0, 1))\n", "data = Dataset.load_from_df(ratings[['customerID', 'movieID', 'rating']], reader)\n", "trainset = data.build_full_trainset()\n", "sim_options = {'name': 'cosine', 'user_based': False} # compute cosine similarities between items\n", "algo = KNNBasic(sim_options=sim_options)\n", "algo.fit(trainset)\n", "pred = algo.predict(str(823519), str(30), r_ui=4, verbose=True)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Problem 14.4" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" }, "vscode": { "interpreter": { "hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90" } } }, "nbformat": 4, "nbformat_minor": 2 }