diff --git a/Project_Description.docx b/Project_Description.docx index eba1be8..4cfb91b 100644 Binary files a/Project_Description.docx and b/Project_Description.docx differ diff --git a/Report/Schrick-Noah_QM-7093_Final.aux b/Report/Schrick-Noah_QM-7093_Final.aux index 9b6ec3e..9768fd9 100644 --- a/Report/Schrick-Noah_QM-7093_Final.aux +++ b/Report/Schrick-Noah_QM-7093_Final.aux @@ -27,6 +27,10 @@ \newlabel{fig:after_i_drop}{{3}{3}{Collection Sample After Removing Invoice and Invoice Date}{figure.3}{}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Collection Sample After Removing Empty CustomerID fields }}{3}{figure.4}\protected@file@percent } \newlabel{fig:after_ci_drop}{{4}{3}{Collection Sample After Removing Empty CustomerID fields}{figure.4}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Queries}{3}{subsection.2.2}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {3}Metadata}{3}{section.3}\protected@file@percent } -\gdef \@abspage@last{3} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Queries}{4}{subsection.2.2}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Records with HighDemand}}{5}{figure.5}\protected@file@percent } +\newlabel{fig:high-demand}{{5}{5}{Records with HighDemand}{figure.5}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Records with Price Greater than Four}}{6}{figure.6}\protected@file@percent } +\newlabel{fig:gt_four}{{6}{6}{Records with Price Greater than Four}{figure.6}{}} +\@writefile{toc}{\contentsline {section}{\numberline {3}Metadata}{7}{section.3}\protected@file@percent } +\gdef \@abspage@last{7} diff --git a/Report/Schrick-Noah_QM-7093_Final.log b/Report/Schrick-Noah_QM-7093_Final.log index d42fc58..af8213f 100644 --- a/Report/Schrick-Noah_QM-7093_Final.log +++ b/Report/Schrick-Noah_QM-7093_Final.log @@ -1,8 +1,8 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8) 5 DEC 2022 16:35 +This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8) 6 DEC 2022 17:13 entering extended mode restricted \write18 enabled. %&-line parsing enabled. -**Schrick-Noah_QM-7093_Final.tex +**Schrick-Noah_QM-7093_Final (./Schrick-Noah_QM-7093_Final.tex LaTeX2e <2021-11-15> patch level 1 L3 programming layer <2022-04-10> @@ -297,83 +297,123 @@ Overfull \hbox (27.74675pt too wide) in paragraph at lines 44--44 e[] --file[] Project_Data.csv[] [] -<../images/mongoimport.png, id=41, 1177.39874pt x 162.6075pt> +[2] +<../images/mongoimport.png, id=49, 1177.39874pt x 162.6075pt> File: ../images/mongoimport.png Graphic file (type png) -Package pdftex.def Info: ../images/mongoimport.png used on input line 48. +Package pdftex.def Info: ../images/mongoimport.png used on input line 50. (pdftex.def) Requested size: 345.0pt x 47.64624pt. -LaTeX Warning: `!h' float specifier changed to `!ht'. - - -Overfull \hbox (27.74675pt too wide) in paragraph at lines 58--58 +Overfull \hbox (27.74675pt too wide) in paragraph at lines 61--61 []\OT1/cmtt/m/n/10 db.Project_Data.updateMany({},[] {$unset:[] {[] "Invoice":[] "",[] "InvoiceDate":[] [] -[2] -<../images/prior_invoice_drop.png, id=51, 1926.19624pt x 151.56625pt> +<../images/prior_invoice_drop.png, id=53, 1926.19624pt x 151.56625pt> File: ../images/prior_invoice_drop.png Graphic file (type png) Package pdftex.def Info: ../images/prior_invoice_drop.png used on input line 6 -2. +7. (pdftex.def) Requested size: 345.0pt x 27.1466pt. -<../images/after_invoice_removal.png, id=52, 1753.55125pt x 110.4125pt> +<../images/after_invoice_removal.png, id=54, 1753.55125pt x 110.4125pt> File: ../images/after_invoice_removal.png Graphic file (type png) Package pdftex.def Info: ../images/after_invoice_removal.png used on input lin -e 70. +e 75. (pdftex.def) Requested size: 345.0pt x 21.72156pt. -Overfull \hbox (32.9967pt too wide) in paragraph at lines 81--81 +Overfull \hbox (32.9967pt too wide) in paragraph at lines 86--86 []\OT1/cmtt/m/n/10 db.Project_Data.updateMany({"CustomerID"[] :[] ""},[] {[] $u nset[] :[] {"CustomerID"[] [] -<../images/empty_removal.png, id=53, 1531.7225pt x 106.3975pt> +<../images/empty_removal.png, id=55, 1531.7225pt x 106.3975pt> File: ../images/empty_removal.png Graphic file (type png) -Package pdftex.def Info: ../images/empty_removal.png used on input line 85. +Package pdftex.def Info: ../images/empty_removal.png used on input line 90. (pdftex.def) Requested size: 345.0pt x 23.96437pt. [3 pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) ha s been already used, duplicate ignored ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... -l.99 \end{document} - +l.98 \begin{spverbatim} + pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) ha s been already used, duplicate ignored ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... -l.99 \end{document} - +l.98 \begin{spverbatim} + pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) ha s been already used, duplicate ignored ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... -l.99 \end{document} - +l.98 \begin{spverbatim} + pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) ha s been already used, duplicate ignored ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... -l.99 \end{document} - <../images/mongoimport.png> <../images/prior_invoice_drop.p -ng> <../images/after_invoice_removal.png> <../images/empty_removal.png>] -(./Schrick-Noah_QM-7093_Final.aux) +l.98 \begin{spverbatim} + <../images/mongoimport.png> <../images/prior_invoice_dr +op.png> <../images/after_invoice_removal.png> <../images/empty_removal.png>] +<../images/high_demand.png, id=69, 727.71875pt x 912.40875pt> +File: ../images/high_demand.png Graphic file (type png) + +Package pdftex.def Info: ../images/high_demand.png used on input line 130. +(pdftex.def) Requested size: 345.0pt x 432.56332pt. + + +LaTeX Warning: `!h' float specifier changed to `!ht'. + +<../images/gt_four.png, id=70, 422.57875pt x 895.345pt> +File: ../images/gt_four.png Graphic file (type png) + +Package pdftex.def Info: ../images/gt_four.png used on input line 145. +(pdftex.def) Requested size: 211.28885pt x 447.6714pt. + +LaTeX Warning: `!h' float specifier changed to `!ht'. + +[4] [5 +pdfTeX warning (ext4): destination with the same identifier (name{figure.5}) ha +s been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.151 \clearpage + <../images/high_demand.png>] [6 +pdfTeX warning (ext4): destination with the same identifier (name{figure.6}) ha +s been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.151 \clearpage + <../images/gt_four.png>] +Overfull \hbox (22.4968pt too wide) in paragraph at lines 177--177 +[][] [] [] [] \OT1/cmtt/m/n/10 {"key":[] "Author",[] "value":[] {"LName":[] "Sc +hrick",[] "FName":[] "Noah"}},[] + [] + + +Overfull \hbox (48.74657pt too wide) in paragraph at lines 177--177 +[][] [] [] [] \OT1/cmtt/m/n/10 {"key":[] "RevisionTimestamp",[] "value":[] ISOD +ate("2022-12-08T10:01:00Z")},[] + [] + +[7 + +] (./Schrick-Noah_QM-7093_Final.aux) Package rerunfilecheck Info: File `Schrick-Noah_QM-7093_Final.out' has not chan ged. (rerunfilecheck) Checksum: 7F26178B815D0544D2E6EBEAE170E4EE;542. ) Here is how much of TeX's memory you used: - 8314 strings out of 478238 - 132476 string characters out of 5850456 - 424143 words of memory out of 5000000 - 26444 multiletter control sequences out of 15000+600000 + 8338 strings out of 478238 + 132984 string characters out of 5850456 + 426246 words of memory out of 5000000 + 26460 multiletter control sequences out of 15000+600000 474017 words of font info for 44 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 60i,6n,63p,648b,466s stack positions out of 5000i,500n,10000p,200000b,80000s + 60i,7n,63p,644b,360s stack positions out of 5000i,500n,10000p,200000b,80000s {/usr/share/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc} -Output written on Schrick-Noah_QM-7093_Final.pdf (3 pages, 207165 bytes). +Output written on Schrick-Noah_QM-7093_Final.pdf (7 pages, 345874 bytes). PDF statistics: - 103 PDF objects out of 1000 (max. 8388607) - 75 compressed objects within 1 object stream - 14 named destinations out of 1000 (max. 500000) - 61 words of extra memory for PDF output out of 10000 (max. 10000000) + 131 PDF objects out of 1000 (max. 8388607) + 95 compressed objects within 1 object stream + 20 named destinations out of 1000 (max. 500000) + 71 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/Report/Schrick-Noah_QM-7093_Final.pdf b/Report/Schrick-Noah_QM-7093_Final.pdf index a86bb8b..5a330f0 100644 Binary files a/Report/Schrick-Noah_QM-7093_Final.pdf and b/Report/Schrick-Noah_QM-7093_Final.pdf differ diff --git a/Report/Schrick-Noah_QM-7093_Final.tex b/Report/Schrick-Noah_QM-7093_Final.tex index 3fb3aff..14b7140 100644 --- a/Report/Schrick-Noah_QM-7093_Final.tex +++ b/Report/Schrick-Noah_QM-7093_Final.tex @@ -7,7 +7,7 @@ \usepackage{float} \usepackage{indentfirst} \setlength{\parskip}{\baselineskip}% -\setlength{\belowcaptionskip}{5pt} +\setlength{\belowcaptionskip}{8pt} \title{QM 7093: Enterprise Data Systems: NoSQL with MongoDB} @@ -43,7 +43,9 @@ To reduce the amount of manual insertions and minimize the risk of human inserti mongoimport --db QM_7093_Final --headerline --file Project_Data.csv --type csv \end{spverbatim} -\begin{figure}[h!] +In real-world or large dataset applications, this is not an ideal approach. Inserting data only to subsequently remove it is an inefficient and unoptimized approach. Pre-processing the data to selectively remove unwanted records before insertion would be a better solution. + +\begin{figure}[!h!] \centering \includegraphics[width=\linewidth]{"../images/mongoimport.png"} \vspace*{-6mm} @@ -51,12 +53,15 @@ To reduce the amount of manual insertions and minimize the risk of human inserti \label{fig:import} \end{figure} + Removing Invoice and InvoiceDate from the Project\_Data collection can be performed with: \begin{spverbatim} db.Project_Data.updateMany({}, {$unset: { "Invoice": "", "InvoiceDate": ""}} ) \end{spverbatim} +An image of a sample record prior to removing Invoice and Invoice Date can be seen in Figure \ref{fig:prior_i_drop}, and an image of the same record after removing the fields can be seen in Figure \ref{fig:after_i_drop}. + \begin{figure}[h!] \centering \includegraphics[width=\linewidth]{"../images/prior_invoice_drop.png"} @@ -88,7 +93,8 @@ db.Project_Data.updateMany({"CustomerID" : ""}, { $unset : {"CustomerID" : 1 } } \label{fig:after_ci_drop} \end{figure} -Adding "HighDemand": +Adding the HighDemand column can be performed with an aggregate function. If the condition is met, the true value adds "Yes" to the value of the newly added field. \$\$REMOVE is a built-in mongo indicator to suppress. If the condition is not met, the field is not added to the record. By default, aggregates only read and do not write. Adding \$out tells mongo to write the results. In this case, we have told mongo to write back to the Project\_Data collection. + \begin{spverbatim} db.Project_Data.aggregate([ { @@ -115,11 +121,59 @@ db.Project_Data.aggregate([ db.Project_Data.count({"HighDemand": "Yes"}) \end{spverbatim} +Answer: 8. + +A partial image of the records with column HighDemand can be seen in Figure \ref{fig:high-demand}. + +\begin{figure}[h!] + \centering + \includegraphics[width=\linewidth]{"../images/high_demand.png"} + \vspace*{-6mm} + \caption{Records with HighDemand} + \label{fig:high-demand} +\end{figure} + \textbf{Question 2:} Display the records with price more than 4 (4 excluded) \begin{spverbatim} db.Project_Data.find({"Price": {"$gt": 4}}).pretty() \end{spverbatim} +Total number of records with price greater than four: 6. + +\begin{figure}[h!] + \centering + \includegraphics[scale=0.5]{"../images/gt_four.png"} + \vspace*{-3mm} + \caption{Records with Price Greater than Four} + \label{fig:gt_four} +\end{figure} + +\clearpage \section{Metadata} +\begin{spverbatim} +{ + "metadata": [ + {"key": "InvoiceReceipt", "value": "IN-C123456.pdf"}, + {"key": "File size", "value": 32764}, + {"key": "MIME type", "value": "application/pdf"}, + {"key": "CancellationStatus", "value": "true"}, + {"key": "Author", "value": {"LName": "Schrick”, "FName": "Noah”}}, + {"key": "Security", "value": "false”}, + {"key": "Fonts", "value": "Calibri"}, + {"key": "URL", "value": ""}, + {"key": "RevisionTimestamp", "value": ISODate("2022-12-08T10:01:00Z")}, + {"key": "ItemCategory", "value": "furniture"}, + {"key": "ItemWeight", "value": "20"}, + {"key": "CustomerStanding", "value": "good"}, + {"key": "PaymentMethod", "value": "Bank"}, + {"key": "CreditCardVendor", "value": ""}, + {"key": "Origin", "value": "USA"}, + {"key": "Expedited", "value": "false"}, + {"key": "HoldStatus", "value": ""}, + {"key": "Wholesaler", "value": "true"} + ] +} +\end{spverbatim} + \end{document} \ No newline at end of file diff --git a/Report/Schrick-Noah_QM-7093_Final.toc b/Report/Schrick-Noah_QM-7093_Final.toc index bede882..f6b008c 100644 --- a/Report/Schrick-Noah_QM-7093_Final.toc +++ b/Report/Schrick-Noah_QM-7093_Final.toc @@ -1,5 +1,5 @@ \contentsline {section}{\numberline {1}MongoDB}{2}{section.1}% \contentsline {section}{\numberline {2}Insertions and Queries}{2}{section.2}% \contentsline {subsection}{\numberline {2.1}Inserting Data}{2}{subsection.2.1}% -\contentsline {subsection}{\numberline {2.2}Queries}{3}{subsection.2.2}% -\contentsline {section}{\numberline {3}Metadata}{3}{section.3}% +\contentsline {subsection}{\numberline {2.2}Queries}{4}{subsection.2.2}% +\contentsline {section}{\numberline {3}Metadata}{7}{section.3}%