writingmethod
This commit is contained in:
4
report/.vscode/settings.json
vendored
4
report/.vscode/settings.json
vendored
@@ -28,6 +28,10 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
||||||
|
"ltex.language": "en-GB",
|
||||||
|
"ltex.enabled": [
|
||||||
|
"latex"
|
||||||
|
],
|
||||||
"latex-workshop.latex.clean.fileTypes": [
|
"latex-workshop.latex.clean.fileTypes": [
|
||||||
"*.aux",
|
"*.aux",
|
||||||
"*.bbl",
|
"*.bbl",
|
||||||
|
|||||||
@@ -5,7 +5,8 @@
|
|||||||
\usepackage[T1]{fontenc}
|
\usepackage[T1]{fontenc}
|
||||||
\usepackage[english]{babel}
|
\usepackage[english]{babel}
|
||||||
\usepackage{lmodern}
|
\usepackage{lmodern}
|
||||||
|
\usepackage{enumitem}
|
||||||
|
\usepackage{multicol}
|
||||||
\usepackage[a4paper,margin=2.5cm]{geometry}
|
\usepackage[a4paper,margin=2.5cm]{geometry}
|
||||||
\usepackage{setspace}
|
\usepackage{setspace}
|
||||||
\onehalfspacing
|
\onehalfspacing
|
||||||
@@ -61,20 +62,12 @@
|
|||||||
\setcounter{page}{1}
|
\setcounter{page}{1}
|
||||||
|
|
||||||
\input{sections/01_introduction.tex}
|
\input{sections/01_introduction.tex}
|
||||||
|
\input{sections/01A_theory.tex}
|
||||||
\input{sections/02_method.tex}
|
\input{sections/02_method.tex}
|
||||||
|
|
||||||
\input{sections/03_results.tex}
|
\input{sections/03_results.tex}
|
||||||
%\begin{figure}[h]
|
|
||||||
% \centering
|
|
||||||
% \includegraphics[width=\textwidth]{figures/Figure1.png}
|
|
||||||
% \caption{Total video game sales by genre in North America (millions of units).}
|
|
||||||
%\label{fig:Figure1}
|
|
||||||
%\end{figure}
|
|
||||||
\input{sections/04_discussion.tex}
|
\input{sections/04_discussion.tex}
|
||||||
\input{sections/05_conclusion.tex}
|
\input{sections/05_conclusion.tex}
|
||||||
|
|
||||||
\clearpage
|
|
||||||
\cite{noauthor_video_nodate}
|
|
||||||
\clearpage
|
\clearpage
|
||||||
\printbibliography[title={References}]
|
\printbibliography[title={References}]
|
||||||
\end{document}
|
\end{document}
|
||||||
@@ -1,9 +1,72 @@
|
|||||||
|
|
||||||
@online{noauthor_video_nodate,
|
|
||||||
title = {Video Game Sales Dataset Updated -Extra Feat},
|
@online{TOR,
|
||||||
url = {https://www.kaggle.com/datasets/ibriiee/video-games-sales-dataset-2022-updated-extra-feat},
|
title = {About Tor Browser},
|
||||||
abstract = {Uncover the Gaming Industry Trends with the Most Comprehensive Sales Data},
|
url = {https://support.torproject.org/tor-browser/getting-started/about-tor-browser/},
|
||||||
urldate = {2026-04-21},
|
abstract = {Tor Browser is a privacy-focused web browser that routes your traffic through the Tor network, hiding your real {IP} address, preventing tracking, and protecting you against surveillance and censorship. Tor Browser uses the Tor network to protect your privacy and anonymity.},
|
||||||
|
titleaddon = {Support},
|
||||||
|
author = {Inc, Tor Project},
|
||||||
|
urldate = {2026-05-15},
|
||||||
langid = {english},
|
langid = {english},
|
||||||
file = {Snapshot:/home/tvh/snap/zotero-snap/common/Zotero/storage/C5LJ5QMG/video-games-sales-dataset-2022-updated-extra-feat.html:text/html},
|
file = {Snapshot:/home/tvh/snap/zotero-snap/common/Zotero/storage/R5P9688K/about-tor-browser.html:text/html},
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{HAR,
|
||||||
|
title = {Network request list — Firefox Source Docs documentation},
|
||||||
|
url = {https://firefox-source-docs.mozilla.org/devtools-user/network_monitor/request_list/index.html?utm_source=chatgpt.com},
|
||||||
|
urldate = {2026-05-15},
|
||||||
|
file = {Network request list — Firefox Source Docs documentation:/home/tvh/snap/zotero-snap/common/Zotero/storage/P7S338MU/index.html:text/html},
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{Playwright,
|
||||||
|
title = {Installation {\textbar} Playwright Python},
|
||||||
|
url = {https://playwright.dev/python/docs/intro},
|
||||||
|
abstract = {Introduction},
|
||||||
|
urldate = {2026-05-15},
|
||||||
|
langid = {english},
|
||||||
|
file = {Snapshot:/home/tvh/snap/zotero-snap/common/Zotero/storage/M3HT6FNN/intro.html:text/html},
|
||||||
|
}
|
||||||
|
|
||||||
|
@online{VENV,
|
||||||
|
title = {12. Virtual Environments and Packages},
|
||||||
|
url = {https://docs.python.org/3/tutorial/venv.html},
|
||||||
|
abstract = {Introduction: Python applications will often use packages and modules that don’t come as part of the standard library. Applications will sometimes need a specific version of a library, because the ...},
|
||||||
|
titleaddon = {Python documentation},
|
||||||
|
urldate = {2026-05-15},
|
||||||
|
langid = {english},
|
||||||
|
file = {Snapshot:/home/tvh/snap/zotero-snap/common/Zotero/storage/QEN5QM2A/venv.html:text/html},
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{PDF,
|
||||||
|
title = {Book: Module 7. Lessons and Tasks},
|
||||||
|
author = {{Noroff}},
|
||||||
|
langid = {english},
|
||||||
|
file = {PDF:/home/tvh/snap/zotero-snap/common/Zotero/storage/RVWQE24L/Heggland - Book Module 7. Lessons and Tasks.pdf:application/pdf},
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{heggland_book_nodate-1,
|
||||||
|
title = {Book: Module 4. Lessons and Tasks},
|
||||||
|
author = {{Noroff}},
|
||||||
|
langid = {english},
|
||||||
|
file = {PDF:/home/tvh/snap/zotero-snap/common/Zotero/storage/YC4C99HY/Heggland - Book Module 4. Lessons and Tasks.pdf:application/pdf},
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{heggland_book_nodate-2,
|
||||||
|
title = {Book: Module 2. Lessons and Tasks},
|
||||||
|
author = {{Noroff}},
|
||||||
|
langid = {english},
|
||||||
|
file = {PDF:/home/tvh/snap/zotero-snap/common/Zotero/storage/ZUATB293/Heggland - Book Module 2. Lessons and Tasks.pdf:application/pdf},
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{noroff_modules,
|
||||||
|
author = {{Noroff}},
|
||||||
|
title = {Modules 2, 4 and 7: Lessons and Tasks},
|
||||||
|
year = {2026},
|
||||||
|
note = {Internal course material used in the Data Analytics programme},
|
||||||
|
langid = {english},
|
||||||
|
file = {
|
||||||
|
PDF:/path/module2.pdf:application/pdf;
|
||||||
|
PDF:/path/module4.pdf:application/pdf;
|
||||||
|
PDF:/path/module7.pdf:application/pdf
|
||||||
|
}
|
||||||
}
|
}
|
||||||
1
report/scripts/capture_search_har.py
Symbolic link
1
report/scripts/capture_search_har.py
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../.noroff-env/har_capture/capture_search_har.py
|
||||||
21
report/sections/01A_theory.tex
Normal file
21
report/sections/01A_theory.tex
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
\section{Theory\label{sec:theor}}
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{EDA - Exploratory Data Analysis}
|
||||||
|
|
||||||
|
\subsubsection{Data preprocessing and cleaning}
|
||||||
|
\subsubsection{Data reliability and consistency}
|
||||||
|
\subsubsection{Data visualisation principles}
|
||||||
|
|
||||||
|
\subsection{Web traffic}
|
||||||
|
|
||||||
|
% HAR files
|
||||||
|
|
||||||
|
\subsubsection{HTTP requests and responses}
|
||||||
|
|
||||||
|
%
|
||||||
|
|
||||||
|
\subsubsection{Cookies and tracking parameters}
|
||||||
|
|
||||||
|
|
||||||
|
\subsubsection{Search engines and privacy}
|
||||||
@@ -1,5 +1,8 @@
|
|||||||
\section{Method\label{sec:metho}}
|
\section{Method\label{sec:metho}}
|
||||||
|
|
||||||
|
%This section describes the methodology used throughout the research process. Some technical concepts and terminology referenced in this section are further explained in the Theory section and later discussed in the Discussion section.
|
||||||
|
|
||||||
|
This section describes the methodology used in this research. Any technical concepts and terminology references in this section are further explained in Section~\ref{sec:theor}, and discussed in Section~\ref{sec:discu}.
|
||||||
|
|
||||||
\subsection{Research design\label{sec:metho:research_design}}
|
\subsection{Research design\label{sec:metho:research_design}}
|
||||||
% Stikkord:
|
% Stikkord:
|
||||||
@@ -8,6 +11,42 @@
|
|||||||
% same searches across search engines
|
% same searches across search engines
|
||||||
% comparison between search engines, browsers, and network modes
|
% comparison between search engines, browsers, and network modes
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%This research is design using tools to simulate human interaction with simple web searches. Each search is design to be anomynous, with no browser histories and cookies before each search was done. For each web search the browser history is cleaned and cookies removed. Browser profile used has no login data, so the web queries can not connect the search to any real person.
|
||||||
|
%4 Search Engines are used for this prosess, once in each webbrowser, Firefox and Chromium. Which means each web query is don 8 times. For this work, several queries are create to widen the data collection. The web quires are following:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
This research is designed using tools to simulate human interaction with simple web searches. Each search is designed to be anonymous, with no browser history or cookies stored before the search is performed. Before each search, the browser history is cleared and cookies are removed. The browser profiles used contain no login data, preventing the web queries from being connected to any real person.
|
||||||
|
|
||||||
|
Four search engines are used in this process (Brave, Bing, DuckDuckGo and Google), once in each web browser: Firefox and Chromium. This means that each web query is performed eight times. For this work, several queries are created to widen the data collection. The web queries are as follows:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{multicols}{2}
|
||||||
|
\begin{itemize}[noitemsep, topsep=0pt]
|
||||||
|
\item weather oslo
|
||||||
|
\item migraine symptoms
|
||||||
|
\item vitamin d deficiency
|
||||||
|
\item running shoes
|
||||||
|
\item coffee grinder
|
||||||
|
\item best laptop for students
|
||||||
|
\item electric car charging
|
||||||
|
\item cheap flights to london
|
||||||
|
\item home insurance
|
||||||
|
\item python list tutorial
|
||||||
|
\item banana bread recipe
|
||||||
|
\item news norway
|
||||||
|
\end{itemize}
|
||||||
|
\end{multicols}
|
||||||
|
|
||||||
|
Data collection is performed using either a Tor proxy to help hide the identity of the person performing the web searches, or a normal network connection where web traffic may be used to identify the user.
|
||||||
|
%ata collected is filtered using either Tor proxy to hide the identity of the person premforing websearch, and not using any proxy, where any web traffic can identify you traffic.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Test environment\label{sec:metho:test_environment}}
|
\subsection{Test environment\label{sec:metho:test_environment}}
|
||||||
% Stikkord:
|
% Stikkord:
|
||||||
% operating system / controlled environment
|
% operating system / controlled environment
|
||||||
@@ -18,7 +57,31 @@
|
|||||||
% cookies allowed
|
% cookies allowed
|
||||||
% same wait condition and timeout
|
% same wait condition and timeout
|
||||||
|
|
||||||
\subsection{Search engines and search queries\label{sec:metho:search_engines}}
|
|
||||||
|
%When you tap ctrl + shift + C
|
||||||
|
When pressing \texttt{Ctrl + Shift + C} and click on \texttt{Network}, a log of Network traffic shows up. This window is open and the web-history is emptied before performing a web-search manually. This process gives a clean anonymous log web traffic from only one web query as known in Figure~\ref{fig:metho:manually_har}. Right to \texttt{"No throttling"} is a settings icon. Clicking on that bottom gives the options on Figure~\ref{fig:metho:export_har}.
|
||||||
|
Each query could be done manually, or the processes of collecting data could be automated. For this research the process of collecting first-hand raw data was automated. A tool used to automate web-queries is python using Playwright \parencite{Playwright}. Playwright is installed in a virtual environment packages using python \parencite{VENV} \texttt{script/capture\_search\_har.py}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[
|
||||||
|
width=\linewidth,
|
||||||
|
]{figures/png/09_importing_har_manually.png}
|
||||||
|
\caption{Network traffic by a simply web search}
|
||||||
|
\label{fig:metho:manually_har}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\includegraphics[
|
||||||
|
width=0.27\linewidth,
|
||||||
|
]{figures/png/10_har_options.png}
|
||||||
|
\caption{Download Har files.}
|
||||||
|
\label{fig:metho:export_har}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
%\subsection{Search engines and search queries\label{sec:metho:search_engines}}
|
||||||
% Stikkord:
|
% Stikkord:
|
||||||
% Google
|
% Google
|
||||||
% Bing
|
% Bing
|
||||||
@@ -27,7 +90,7 @@
|
|||||||
% list of search queries
|
% list of search queries
|
||||||
% same query used across all engines
|
% same query used across all engines
|
||||||
|
|
||||||
\subsection{Variables and measurements\label{sec:metho:Variables_measurements}}
|
%\subsection{Variables and measurements\label{sec:metho:Variables_measurements}}
|
||||||
% Stikkord:
|
% Stikkord:
|
||||||
% requests_total
|
% requests_total
|
||||||
% unique_domains
|
% unique_domains
|
||||||
|
|||||||
Reference in New Issue
Block a user