diff --git a/Project_Data.xlsx b/Project_Data.xlsx deleted file mode 100644 index 3653aab..0000000 Binary files a/Project_Data.xlsx and /dev/null differ diff --git a/Schrick-Noah_QM-7093_Final.aux b/Report/Schrick-Noah_QM-7093_Final.aux similarity index 52% rename from Schrick-Noah_QM-7093_Final.aux rename to Report/Schrick-Noah_QM-7093_Final.aux index 42201d4..9b6ec3e 100644 --- a/Schrick-Noah_QM-7093_Final.aux +++ b/Report/Schrick-Noah_QM-7093_Final.aux @@ -19,6 +19,14 @@ \@writefile{toc}{\contentsline {section}{\numberline {1}MongoDB}{2}{section.1}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {2}Insertions and Queries}{2}{section.2}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Inserting Data}{2}{subsection.2.1}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Queries}{2}{subsection.2.2}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Part 1.a: Importing from CSV}}{3}{figure.1}\protected@file@percent } +\newlabel{fig:import}{{1}{3}{Part 1.a: Importing from CSV}{figure.1}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Collection Sample Prior to Removing Invoice and Invoice Date }}{3}{figure.2}\protected@file@percent } +\newlabel{fig:prior_i_drop}{{2}{3}{Collection Sample Prior to Removing Invoice and Invoice Date}{figure.2}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Collection Sample After Removing Invoice and Invoice Date }}{3}{figure.3}\protected@file@percent } +\newlabel{fig:after_i_drop}{{3}{3}{Collection Sample After Removing Invoice and Invoice Date}{figure.3}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Collection Sample After Removing Empty CustomerID fields }}{3}{figure.4}\protected@file@percent } +\newlabel{fig:after_ci_drop}{{4}{3}{Collection Sample After Removing Empty CustomerID fields}{figure.4}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Queries}{3}{subsection.2.2}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {3}Metadata}{3}{section.3}\protected@file@percent } \gdef \@abspage@last{3} diff --git a/Schrick-Noah_QM-7093_Final.log b/Report/Schrick-Noah_QM-7093_Final.log similarity index 69% rename from Schrick-Noah_QM-7093_Final.log rename to Report/Schrick-Noah_QM-7093_Final.log index 5f04c7f..d42fc58 100644 --- a/Schrick-Noah_QM-7093_Final.log +++ b/Report/Schrick-Noah_QM-7093_Final.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8) 5 DEC 2022 14:52 +This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8) 5 DEC 2022 16:35 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -178,6 +178,9 @@ Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2 ) \Hy@SectionHShift=\skip49 ) +(/usr/share/texmf-dist/tex/latex/spverbatim/spverbatim.sty +Package: spverbatim 2009/08/10 v1.0 Verbatim with breakable spaces +) (/usr/share/texmf-dist/tex/latex/base/inputenc.sty Package: inputenc 2021/02/14 v1.3d Input encoding file \inpenc@prehook=\toks17 @@ -202,24 +205,24 @@ File: l3backend-pdftex.def 2022-04-14 L3 backend support: PDF output (pdfTeX) (./Schrick-Noah_QM-7093_Final.aux) \openout1 = `Schrick-Noah_QM-7093_Final.aux'. -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. +LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 17. +LaTeX Font Info: ... okay on input line 17. (/usr/share/texmf-dist/tex/context/base/mkii/supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] @@ -243,7 +246,7 @@ Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv e )) -Package hyperref Info: Link coloring OFF on input line 14. +Package hyperref Info: Link coloring OFF on input line 17. (/usr/share/texmf-dist/tex/latex/hyperref/nameref.sty Package: nameref 2021-04-02 v2.47 Cross-referencing by name of section @@ -256,20 +259,20 @@ Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO) ) \c@section@level=\count277 ) -LaTeX Info: Redefining \ref on input line 14. -LaTeX Info: Redefining \pageref on input line 14. -LaTeX Info: Redefining \nameref on input line 14. +LaTeX Info: Redefining \ref on input line 17. +LaTeX Info: Redefining \pageref on input line 17. +LaTeX Info: Redefining \nameref on input line 17. (./Schrick-Noah_QM-7093_Final.out) (./Schrick-Noah_QM-7093_Final.out) \@outlinefile=\write3 \openout3 = `Schrick-Noah_QM-7093_Final.out'. LaTeX Font Info: External font `cmex10' loaded for size -(Font) <12> on input line 17. +(Font) <12> on input line 20. LaTeX Font Info: External font `cmex10' loaded for size -(Font) <8> on input line 17. +(Font) <8> on input line 20. LaTeX Font Info: External font `cmex10' loaded for size -(Font) <6> on input line 17. +(Font) <6> on input line 20. (./Schrick-Noah_QM-7093_Final.toc LaTeX Font Info: External font `cmex10' loaded for size @@ -283,24 +286,94 @@ LaTeX Font Info: External font `cmex10' loaded for size [1 {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] -Overfull \hbox (17.74684pt too wide) in paragraph at lines 21--23 +Overfull \hbox (17.74684pt too wide) in paragraph at lines 24--26 \OT1/cmtt/m/n/10 db-[]engines . com / en / ranking ? utm _ source = xp & utm _ medium = blog & utm _ campaign = [] -[2] [3] (./Schrick-Noah_QM-7093_Final.aux) + +Overfull \hbox (27.74675pt too wide) in paragraph at lines 44--44 +[][] [] [] [] \OT1/cmtt/m/n/10 mongoimport[] --db[] QM_7093_Final[] --headerlin +e[] --file[] Project_Data.csv[] + [] + +<../images/mongoimport.png, id=41, 1177.39874pt x 162.6075pt> +File: ../images/mongoimport.png Graphic file (type png) + +Package pdftex.def Info: ../images/mongoimport.png used on input line 48. +(pdftex.def) Requested size: 345.0pt x 47.64624pt. + +LaTeX Warning: `!h' float specifier changed to `!ht'. + + +Overfull \hbox (27.74675pt too wide) in paragraph at lines 58--58 +[]\OT1/cmtt/m/n/10 db.Project_Data.updateMany({},[] {$unset:[] {[] "Invoice":[] + "",[] "InvoiceDate":[] + [] + +[2] +<../images/prior_invoice_drop.png, id=51, 1926.19624pt x 151.56625pt> +File: ../images/prior_invoice_drop.png Graphic file (type png) + +Package pdftex.def Info: ../images/prior_invoice_drop.png used on input line 6 +2. +(pdftex.def) Requested size: 345.0pt x 27.1466pt. +<../images/after_invoice_removal.png, id=52, 1753.55125pt x 110.4125pt> +File: ../images/after_invoice_removal.png Graphic file (type png) + +Package pdftex.def Info: ../images/after_invoice_removal.png used on input lin +e 70. +(pdftex.def) Requested size: 345.0pt x 21.72156pt. + +Overfull \hbox (32.9967pt too wide) in paragraph at lines 81--81 +[]\OT1/cmtt/m/n/10 db.Project_Data.updateMany({"CustomerID"[] :[] ""},[] {[] $u +nset[] :[] {"CustomerID"[] + [] + +<../images/empty_removal.png, id=53, 1531.7225pt x 106.3975pt> +File: ../images/empty_removal.png Graphic file (type png) + +Package pdftex.def Info: ../images/empty_removal.png used on input line 85. +(pdftex.def) Requested size: 345.0pt x 23.96437pt. +[3 +pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) ha +s been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.99 \end{document} + +pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) ha +s been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.99 \end{document} + +pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) ha +s been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.99 \end{document} + +pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) ha +s been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.99 \end{document} + <../images/mongoimport.png> <../images/prior_invoice_drop.p +ng> <../images/after_invoice_removal.png> <../images/empty_removal.png>] +(./Schrick-Noah_QM-7093_Final.aux) Package rerunfilecheck Info: File `Schrick-Noah_QM-7093_Final.out' has not chan ged. (rerunfilecheck) Checksum: 7F26178B815D0544D2E6EBEAE170E4EE;542. ) Here is how much of TeX's memory you used: - 8242 strings out of 478238 - 130450 string characters out of 5850456 - 422708 words of memory out of 5000000 - 26386 multiletter control sequences out of 15000+600000 + 8314 strings out of 478238 + 132476 string characters out of 5850456 + 424143 words of memory out of 5000000 + 26444 multiletter control sequences out of 15000+600000 474017 words of font info for 44 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 60i,6n,63p,648b,320s stack positions out of 5000i,500n,10000p,200000b,80000s + 60i,6n,63p,648b,466s stack positions out of 5000i,500n,10000p,200000b,80000s {/usr/share/texmf-dist/fonts/enc/dvips/cm-super/cm-super-ts1.enc} -Output written on Schrick-Noah_QM-7093_Final.pdf (3 pages, 95186 bytes). +Output written on Schrick-Noah_QM-7093_Final.pdf (3 pages, 207165 bytes). PDF statistics: - 89 PDF objects out of 1000 (max. 8388607) - 69 compressed objects within 1 object stream - 10 named destinations out of 1000 (max. 500000) - 41 words of extra memory for PDF output out of 10000 (max. 10000000) + 103 PDF objects out of 1000 (max. 8388607) + 75 compressed objects within 1 object stream + 14 named destinations out of 1000 (max. 500000) + 61 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/Schrick-Noah_QM-7093_Final.out b/Report/Schrick-Noah_QM-7093_Final.out similarity index 100% rename from Schrick-Noah_QM-7093_Final.out rename to Report/Schrick-Noah_QM-7093_Final.out diff --git a/Report/Schrick-Noah_QM-7093_Final.pdf b/Report/Schrick-Noah_QM-7093_Final.pdf new file mode 100644 index 0000000..a86bb8b Binary files /dev/null and b/Report/Schrick-Noah_QM-7093_Final.pdf differ diff --git a/Schrick-Noah_QM-7093_Final.tex b/Report/Schrick-Noah_QM-7093_Final.tex similarity index 52% rename from Schrick-Noah_QM-7093_Final.tex rename to Report/Schrick-Noah_QM-7093_Final.tex index bfa90c4..71aae8c 100644 --- a/Schrick-Noah_QM-7093_Final.tex +++ b/Report/Schrick-Noah_QM-7093_Final.tex @@ -1,11 +1,14 @@ \documentclass{article} \usepackage{graphicx} \usepackage{hyperref} -\graphicspath{ {./images/} } +\usepackage{spverbatim} +\graphicspath{ {../images/} } \usepackage[utf8]{inputenc} \usepackage{float} \usepackage{indentfirst} \setlength{\parskip}{\baselineskip}% +\setlength{\belowcaptionskip}{5pt} + \title{QM 7093: Enterprise Data Systems: NoSQL with MongoDB} \author{Noah L. Schrick} @@ -34,6 +37,58 @@ Insert all records from the provided datasheet with the following properties: } \end{itemize} +To reduce the amount of manual insertions and minimize the risk of human insertion error, the xlsx datasheet was converted to a csv. Each text cell ("Description", for example) was encapsulated in quotes before the conversion. The delimiter used was a comma (","). Saving the data in a csv format allows for an easy insertion by MongoDB using mongoimport. + +\begin{spverbatim} + mongoimport --db QM_7093_Final --headerline --file Project_Data.csv --type csv +\end{spverbatim} + +\begin{figure}[h!] + \centering + \includegraphics[width=\linewidth]{"../images/mongoimport.png"} + \vspace*{-6mm} + \caption{Part 1.a: Importing from CSV} + \label{fig:import} +\end{figure} + +Removing Invoice and InvoiceDate from the Project\_Data collection can be performed with: + +\begin{spverbatim} +db.Project_Data.updateMany({}, {$unset: { "Invoice": "", "InvoiceDate": ""}} ) +\end{spverbatim} + +\begin{figure}[h!] + \centering + \includegraphics[width=\linewidth]{"../images/prior_invoice_drop.png"} + \vspace*{-6mm} + \caption{Collection Sample Prior to Removing Invoice and Invoice Date } + \label{fig:prior_i_drop} +\end{figure} + +\begin{figure}[h!] + \centering + \includegraphics[width=\linewidth]{"../images/after_invoice_removal.png"} + \vspace*{-6mm} + \caption{Collection Sample After Removing Invoice and Invoice Date } + \label{fig:after_i_drop} +\end{figure} + + + +Removing the CutomerID field when empty can be performed with: +\begin{spverbatim} +db.Project_Data.updateMany({"CustomerID" : ""}, { $unset : {"CustomerID" : 1 } } ) +\end{spverbatim} + +\begin{figure}[h!] + \centering + \includegraphics[width=\linewidth]{"../images/empty_removal.png"} + \vspace*{-6mm} + \caption{Collection Sample After Removing Empty CustomerID fields } + \label{fig:after_ci_drop} +\end{figure} + + \subsection{Queries} \textbf{Question 1:} How many records have the column “HighDemand”? (Must have a code to answer this, one way to answer this is to have a code that displays all the records except those with the column HighDemand and then subtract the number from total number of records) diff --git a/Schrick-Noah_QM-7093_Final.toc b/Report/Schrick-Noah_QM-7093_Final.toc similarity index 82% rename from Schrick-Noah_QM-7093_Final.toc rename to Report/Schrick-Noah_QM-7093_Final.toc index bfd1884..bede882 100644 --- a/Schrick-Noah_QM-7093_Final.toc +++ b/Report/Schrick-Noah_QM-7093_Final.toc @@ -1,5 +1,5 @@ \contentsline {section}{\numberline {1}MongoDB}{2}{section.1}% \contentsline {section}{\numberline {2}Insertions and Queries}{2}{section.2}% \contentsline {subsection}{\numberline {2.1}Inserting Data}{2}{subsection.2.1}% -\contentsline {subsection}{\numberline {2.2}Queries}{2}{subsection.2.2}% +\contentsline {subsection}{\numberline {2.2}Queries}{3}{subsection.2.2}% \contentsline {section}{\numberline {3}Metadata}{3}{section.3}% diff --git a/Schrick-Noah_QM-7093_Final.pdf b/Schrick-Noah_QM-7093_Final.pdf deleted file mode 100644 index 1690277..0000000 Binary files a/Schrick-Noah_QM-7093_Final.pdf and /dev/null differ diff --git a/data/.~lock.Project_Data.csv# b/data/.~lock.Project_Data.csv# new file mode 100644 index 0000000..f5e30e6 --- /dev/null +++ b/data/.~lock.Project_Data.csv# @@ -0,0 +1 @@ +,noah,NovaArchSys,05.12.2022 15:54,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/data/Project_Data.csv b/data/Project_Data.csv new file mode 100644 index 0000000..1a76720 --- /dev/null +++ b/data/Project_Data.csv @@ -0,0 +1,21 @@ +"Invoice","StockCode","Description","Quantity","InvoiceDate","Price","CustomerID","Country" +536365,22752,"SET 7 BABUSHKA NESTING BOXES",2,12/1/2010 8:26,7.65,17850,"United Kingdom" +536365,21730,"GLASS STAR FROSTED T-LIGHT HOLDER",6,12/1/2010 8:26,4.25,17850,"United Kingdom" +536366,22633,"HAND WARMER UNION JACK",6,12/1/2010 8:28,1.85,17850,"United Kingdom" +536366,22632,"HAND WARMER RED POLKA DOT",6,12/1/2010 8:28,1.85,17850,"United Kingdom" +536368,22913,"RED COAT RACK PARIS FASHION",3,12/1/2010 8:34,4.95,13047,"United Kingdom" +536370,22726,"ALARM CLOCK BAKELIKE GREEN",12,12/1/2010 8:45,3.75,12583,"France" +536370,21724,"PANDA AND BUNNIES STICKER SHEET",12,12/1/2010 8:45,0.85,12583,"France" +536370,21883,"STARS GIFT TAPE ",24,12/1/2010 8:45,0.65,12583,"France" +536370,10002,"INFLATABLE POLITICAL GLOBE ",48,12/1/2010 8:45,0.85,12583,"France" +536370,21035,"SET/2 RED RETROSPOT TEA TOWELS ",18,12/1/2010 8:45,2.95,12583,"France" +536389,22726,"ALARM CLOCK BAKELIKE GREEN",4,12/1/2010 10:03,3.75,12431,"Australia" +536389,22727,"ALARM CLOCK BAKELIKE RED ",4,12/1/2010 10:03,3.75,12431,"Australia" +536389,22192,"BLUE DINER WALL CLOCK",2,12/1/2010 10:03,8.5,12431,"Australia" +536389,22191,"IVORY DINER WALL CLOCK",2,12/1/2010 10:03,8.5,12431,"Australia" +536389,22195,"LARGE HEART MEASURING SPOONS",24,12/1/2010 10:03,1.65,12431,"Australia" +565927,23324,"RUSTIC STRAWBERRY JAM POT LARGE ",12,9/8/2011 10:08,2.08,,"EIRE" +565927,23325,"RUSTIC STRAWBERRY JAM POT SMALL",12,9/8/2011 10:08,1.65,,"EIRE" +565927,22848,"BREAD BIN DINER STYLE PINK",2,9/8/2011 10:08,16.95,,"EIRE" +565927,23299,"FOOD COVER WITH BEADS SET 2 ",6,9/8/2011 10:08,3.75,,"EIRE" +565927,21465,"PINK FLOWER CROCHET FOOD COVER",6,9/8/2011 10:08,3.75,,"EIRE" diff --git a/data/Project_Data.xlsx b/data/Project_Data.xlsx new file mode 100644 index 0000000..27c0452 Binary files /dev/null and b/data/Project_Data.xlsx differ diff --git a/images/after_invoice_removal.png b/images/after_invoice_removal.png new file mode 100644 index 0000000..03745ed Binary files /dev/null and b/images/after_invoice_removal.png differ diff --git a/images/empty_removal.png b/images/empty_removal.png new file mode 100644 index 0000000..8760302 Binary files /dev/null and b/images/empty_removal.png differ diff --git a/images/mongoimport.png b/images/mongoimport.png new file mode 100644 index 0000000..f7eb20f Binary files /dev/null and b/images/mongoimport.png differ diff --git a/images/prior_invoice_drop.png b/images/prior_invoice_drop.png new file mode 100644 index 0000000..2fce3ec Binary files /dev/null and b/images/prior_invoice_drop.png differ diff --git a/texput.log b/texput.log new file mode 100644 index 0000000..c722a20 --- /dev/null +++ b/texput.log @@ -0,0 +1,21 @@ +This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8) 5 DEC 2022 16:06 +entering extended mode + restricted \write18 enabled. + %&-line parsing enabled. +** + +! Emergency stop. +<*> + +End of file on the terminal! + + +Here is how much of TeX's memory you used: + 3 strings out of 478238 + 133 string characters out of 5850456 + 289994 words of memory out of 5000000 + 18344 multiletter control sequences out of 15000+600000 + 469259 words of font info for 28 fonts, out of 8000000 for 9000 + 1141 hyphenation exceptions out of 8191 + 0i,0n,0p,1b,6s stack positions out of 5000i,500n,10000p,200000b,80000s +! ==> Fatal error occurred, no output PDF file produced!