Finalizing Report

This commit is contained in:
Noah L. Schrick 2022-12-06 17:14:06 -06:00
parent da34e2b6b5
commit cc2fb2c6aa
6 changed files with 142 additions and 44 deletions

Binary file not shown.

View File

@ -27,6 +27,10 @@
\newlabel{fig:after_i_drop}{{3}{3}{Collection Sample After Removing Invoice and Invoice Date}{figure.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Collection Sample After Removing Empty CustomerID fields }}{3}{figure.4}\protected@file@percent }
\newlabel{fig:after_ci_drop}{{4}{3}{Collection Sample After Removing Empty CustomerID fields}{figure.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Queries}{3}{subsection.2.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Metadata}{3}{section.3}\protected@file@percent }
\gdef \@abspage@last{3}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Queries}{4}{subsection.2.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Records with HighDemand}}{5}{figure.5}\protected@file@percent }
\newlabel{fig:high-demand}{{5}{5}{Records with HighDemand}{figure.5}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Records with Price Greater than Four}}{6}{figure.6}\protected@file@percent }
\newlabel{fig:gt_four}{{6}{6}{Records with Price Greater than Four}{figure.6}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Metadata}{7}{section.3}\protected@file@percent }
\gdef \@abspage@last{7}

View File

@ -1,8 +1,8 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8) 5 DEC 2022 16:35
This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8) 6 DEC 2022 17:13
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
**Schrick-Noah_QM-7093_Final.tex
**Schrick-Noah_QM-7093_Final
(./Schrick-Noah_QM-7093_Final.tex
LaTeX2e <2021-11-15> patch level 1
L3 programming layer <2022-04-10>
@ -297,83 +297,123 @@ Overfull \hbox (27.74675pt too wide) in paragraph at lines 44--44
e[] --file[] Project_Data.csv[]
[]
<../images/mongoimport.png, id=41, 1177.39874pt x 162.6075pt>
[2]
<../images/mongoimport.png, id=49, 1177.39874pt x 162.6075pt>
File: ../images/mongoimport.png Graphic file (type png)
<use ../images/mongoimport.png>
Package pdftex.def Info: ../images/mongoimport.png used on input line 48.
Package pdftex.def Info: ../images/mongoimport.png used on input line 50.
(pdftex.def) Requested size: 345.0pt x 47.64624pt.
LaTeX Warning: `!h' float specifier changed to `!ht'.
Overfull \hbox (27.74675pt too wide) in paragraph at lines 58--58
Overfull \hbox (27.74675pt too wide) in paragraph at lines 61--61
[]\OT1/cmtt/m/n/10 db.Project_Data.updateMany({},[] {$unset:[] {[] "Invoice":[]
"",[] "InvoiceDate":[]
[]
[2]
<../images/prior_invoice_drop.png, id=51, 1926.19624pt x 151.56625pt>
<../images/prior_invoice_drop.png, id=53, 1926.19624pt x 151.56625pt>
File: ../images/prior_invoice_drop.png Graphic file (type png)
<use ../images/prior_invoice_drop.png>
Package pdftex.def Info: ../images/prior_invoice_drop.png used on input line 6
2.
7.
(pdftex.def) Requested size: 345.0pt x 27.1466pt.
<../images/after_invoice_removal.png, id=52, 1753.55125pt x 110.4125pt>
<../images/after_invoice_removal.png, id=54, 1753.55125pt x 110.4125pt>
File: ../images/after_invoice_removal.png Graphic file (type png)
<use ../images/after_invoice_removal.png>
Package pdftex.def Info: ../images/after_invoice_removal.png used on input lin
e 70.
e 75.
(pdftex.def) Requested size: 345.0pt x 21.72156pt.
Overfull \hbox (32.9967pt too wide) in paragraph at lines 81--81
Overfull \hbox (32.9967pt too wide) in paragraph at lines 86--86
[]\OT1/cmtt/m/n/10 db.Project_Data.updateMany({"CustomerID"[] :[] ""},[] {[] $u
nset[] :[] {"CustomerID"[]
[]
<../images/empty_removal.png, id=53, 1531.7225pt x 106.3975pt>
<../images/empty_removal.png, id=55, 1531.7225pt x 106.3975pt>
File: ../images/empty_removal.png Graphic file (type png)
<use ../images/empty_removal.png>
Package pdftex.def Info: ../images/empty_removal.png used on input line 85.
Package pdftex.def Info: ../images/empty_removal.png used on input line 90.
(pdftex.def) Requested size: 345.0pt x 23.96437pt.
[3
pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) ha
s been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.99 \end{document}
l.98 \begin{spverbatim}
pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) ha
s been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.99 \end{document}
l.98 \begin{spverbatim}
pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) ha
s been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.99 \end{document}
l.98 \begin{spverbatim}
pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) ha
s been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.99 \end{document}
<../images/mongoimport.png> <../images/prior_invoice_drop.p
ng> <../images/after_invoice_removal.png> <../images/empty_removal.png>]
(./Schrick-Noah_QM-7093_Final.aux)
l.98 \begin{spverbatim}
<../images/mongoimport.png> <../images/prior_invoice_dr
op.png> <../images/after_invoice_removal.png> <../images/empty_removal.png>]
<../images/high_demand.png, id=69, 727.71875pt x 912.40875pt>
File: ../images/high_demand.png Graphic file (type png)
<use ../images/high_demand.png>
Package pdftex.def Info: ../images/high_demand.png used on input line 130.
(pdftex.def) Requested size: 345.0pt x 432.56332pt.
LaTeX Warning: `!h' float specifier changed to `!ht'.
<../images/gt_four.png, id=70, 422.57875pt x 895.345pt>
File: ../images/gt_four.png Graphic file (type png)
<use ../images/gt_four.png>
Package pdftex.def Info: ../images/gt_four.png used on input line 145.
(pdftex.def) Requested size: 211.28885pt x 447.6714pt.
LaTeX Warning: `!h' float specifier changed to `!ht'.
[4] [5
pdfTeX warning (ext4): destination with the same identifier (name{figure.5}) ha
s been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.151 \clearpage
<../images/high_demand.png>] [6
pdfTeX warning (ext4): destination with the same identifier (name{figure.6}) ha
s been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.151 \clearpage
<../images/gt_four.png>]
Overfull \hbox (22.4968pt too wide) in paragraph at lines 177--177
[][] [] [] [] \OT1/cmtt/m/n/10 {"key":[] "Author",[] "value":[] {"LName":[] "Sc
hrick",[] "FName":[] "Noah"}},[]
[]
Overfull \hbox (48.74657pt too wide) in paragraph at lines 177--177
[][] [] [] [] \OT1/cmtt/m/n/10 {"key":[] "RevisionTimestamp",[] "value":[] ISOD
ate("2022-12-08T10:01:00Z")},[]
[]
[7
] (./Schrick-Noah_QM-7093_Final.aux)
Package rerunfilecheck Info: File `Schrick-Noah_QM-7093_Final.out' has not chan
ged.
(rerunfilecheck) Checksum: 7F26178B815D0544D2E6EBEAE170E4EE;542.
)
Here is how much of TeX's memory you used:
8314 strings out of 478238
132476 string characters out of 5850456
424143 words of memory out of 5000000
26444 multiletter control sequences out of 15000+600000
8338 strings out of 478238
132984 string characters out of 5850456
426246 words of memory out of 5000000
26460 multiletter control sequences out of 15000+600000
474017 words of font info for 44 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191
60i,6n,63p,648b,466s stack positions out of 5000i,500n,10000p,200000b,80000s
60i,7n,63p,644b,360s stack positions out of 5000i,500n,10000p,200000b,80000s
{/usr/share/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc}</usr/share/
texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb></usr/share/texmf-dist/fon
ts/type1/public/amsfonts/cm/cmbx12.pfb></usr/share/texmf-dist/fonts/type1/publi
@ -381,10 +421,10 @@ c/amsfonts/cm/cmr10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/c
mr12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr17.pfb></usr/
share/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt10.pfb></usr/share/texmf-di
st/fonts/type1/public/cm-super/sfrm1000.pfb>
Output written on Schrick-Noah_QM-7093_Final.pdf (3 pages, 207165 bytes).
Output written on Schrick-Noah_QM-7093_Final.pdf (7 pages, 345874 bytes).
PDF statistics:
103 PDF objects out of 1000 (max. 8388607)
75 compressed objects within 1 object stream
14 named destinations out of 1000 (max. 500000)
61 words of extra memory for PDF output out of 10000 (max. 10000000)
131 PDF objects out of 1000 (max. 8388607)
95 compressed objects within 1 object stream
20 named destinations out of 1000 (max. 500000)
71 words of extra memory for PDF output out of 10000 (max. 10000000)

View File

@ -7,7 +7,7 @@
\usepackage{float}
\usepackage{indentfirst}
\setlength{\parskip}{\baselineskip}%
\setlength{\belowcaptionskip}{5pt}
\setlength{\belowcaptionskip}{8pt}
\title{QM 7093: Enterprise Data Systems: NoSQL with MongoDB}
@ -43,7 +43,9 @@ To reduce the amount of manual insertions and minimize the risk of human inserti
mongoimport --db QM_7093_Final --headerline --file Project_Data.csv --type csv
\end{spverbatim}
\begin{figure}[h!]
In real-world or large dataset applications, this is not an ideal approach. Inserting data only to subsequently remove it is an inefficient and unoptimized approach. Pre-processing the data to selectively remove unwanted records before insertion would be a better solution.
\begin{figure}[!h!]
\centering
\includegraphics[width=\linewidth]{"../images/mongoimport.png"}
\vspace*{-6mm}
@ -51,12 +53,15 @@ To reduce the amount of manual insertions and minimize the risk of human inserti
\label{fig:import}
\end{figure}
Removing Invoice and InvoiceDate from the Project\_Data collection can be performed with:
\begin{spverbatim}
db.Project_Data.updateMany({}, {$unset: { "Invoice": "", "InvoiceDate": ""}} )
\end{spverbatim}
An image of a sample record prior to removing Invoice and Invoice Date can be seen in Figure \ref{fig:prior_i_drop}, and an image of the same record after removing the fields can be seen in Figure \ref{fig:after_i_drop}.
\begin{figure}[h!]
\centering
\includegraphics[width=\linewidth]{"../images/prior_invoice_drop.png"}
@ -88,7 +93,8 @@ db.Project_Data.updateMany({"CustomerID" : ""}, { $unset : {"CustomerID" : 1 } }
\label{fig:after_ci_drop}
\end{figure}
Adding "HighDemand":
Adding the HighDemand column can be performed with an aggregate function. If the condition is met, the true value adds "Yes" to the value of the newly added field. \$\$REMOVE is a built-in mongo indicator to suppress. If the condition is not met, the field is not added to the record. By default, aggregates only read and do not write. Adding \$out tells mongo to write the results. In this case, we have told mongo to write back to the Project\_Data collection.
\begin{spverbatim}
db.Project_Data.aggregate([
{
@ -115,11 +121,59 @@ db.Project_Data.aggregate([
db.Project_Data.count({"HighDemand": "Yes"})
\end{spverbatim}
Answer: 8.
A partial image of the records with column HighDemand can be seen in Figure \ref{fig:high-demand}.
\begin{figure}[h!]
\centering
\includegraphics[width=\linewidth]{"../images/high_demand.png"}
\vspace*{-6mm}
\caption{Records with HighDemand}
\label{fig:high-demand}
\end{figure}
\textbf{Question 2:} Display the records with price more than 4 (4 excluded)
\begin{spverbatim}
db.Project_Data.find({"Price": {"$gt": 4}}).pretty()
\end{spverbatim}
Total number of records with price greater than four: 6.
\begin{figure}[h!]
\centering
\includegraphics[scale=0.5]{"../images/gt_four.png"}
\vspace*{-3mm}
\caption{Records with Price Greater than Four}
\label{fig:gt_four}
\end{figure}
\clearpage
\section{Metadata}
\begin{spverbatim}
{
"metadata": [
{"key": "InvoiceReceipt", "value": "IN-C123456.pdf"},
{"key": "File size", "value": 32764},
{"key": "MIME type", "value": "application/pdf"},
{"key": "CancellationStatus", "value": "true"},
{"key": "Author", "value": {"LName": "Schrick”, "FName": "Noah”}},
{"key": "Security", "value": "false”},
{"key": "Fonts", "value": "Calibri"},
{"key": "URL", "value": ""},
{"key": "RevisionTimestamp", "value": ISODate("2022-12-08T10:01:00Z")},
{"key": "ItemCategory", "value": "furniture"},
{"key": "ItemWeight", "value": "20"},
{"key": "CustomerStanding", "value": "good"},
{"key": "PaymentMethod", "value": "Bank"},
{"key": "CreditCardVendor", "value": ""},
{"key": "Origin", "value": "USA"},
{"key": "Expedited", "value": "false"},
{"key": "HoldStatus", "value": ""},
{"key": "Wholesaler", "value": "true"}
]
}
\end{spverbatim}
\end{document}

View File

@ -1,5 +1,5 @@
\contentsline {section}{\numberline {1}MongoDB}{2}{section.1}%
\contentsline {section}{\numberline {2}Insertions and Queries}{2}{section.2}%
\contentsline {subsection}{\numberline {2.1}Inserting Data}{2}{subsection.2.1}%
\contentsline {subsection}{\numberline {2.2}Queries}{3}{subsection.2.2}%
\contentsline {section}{\numberline {3}Metadata}{3}{section.3}%
\contentsline {subsection}{\numberline {2.2}Queries}{4}{subsection.2.2}%
\contentsline {section}{\numberline {3}Metadata}{7}{section.3}%