This commit is contained in:
2026-05-11 13:17:25 +02:00
commit db0309eedc
14 changed files with 3563 additions and 0 deletions

40
.gitignore vendored Normal file
View File

@@ -0,0 +1,40 @@
# LaTeX build files
*.aux
*.log
*.out
*.toc
*.fls
*.fdb_latexmk
*.synctex.gz
# PDF (valgfritt)
*.pdf
# Temporary
*.blg
*.bbl
*.lof
*.lot
*.bbl-SAVE-ERROR
# Editor
.vscode/*
!.vscode/settings.json.bcf
*.bcf
*.run.xml
work/*/
# Ignore raw/generated files
*.har
# Ignore all work output
work/*
# But keep shell scripts
!work/*.sh
.noroff-env

78
report/.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,78 @@
{
"latex-workshop.latex.autoBuild.run": "onFileChange",
"latex-workshop.view.pdf.viewer": "tab",
"latex-workshop.synctex.afterBuild.enabled": true,
"latex-workshop.message.error.show": false,
"latex-workshop.message.warning.show": false,
"latex-workshop.latex.outDir": ".",
"latex-workshop.latex.tools": [
{
"name": "latexmk",
"command": "latexmk",
"args": [
"-pdf",
"-interaction=nonstopmode",
"-synctex=1",
"-f",
"%DOC%"
]
}
],
"latex-workshop.latex.recipes": [
{
"name": "latexmk",
"tools": ["latexmk"]
}
],
"latex-workshop.latex.clean.fileTypes": [
"*.aux",
"*.bbl",
"*.bcf",
"*.blg",
"*.fdb_latexmk",
"*.fls",
"*.lof",
"*.log",
"*.lot",
"*.out",
"*.run.xml",
"*.synctex.gz",
"*.toc"
],
"files.watcherExclude": {
"**/*.aux": true,
"**/*.bbl": true,
"**/*.bcf": true,
"**/*.blg": true,
"**/*.fdb_latexmk": true,
"**/*.fls": true,
"**/*.lof": true,
"**/*.log": true,
"**/*.lot": true,
"**/*.out": true,
"**/*.run.xml": true,
"**/*.synctex.gz": true,
"**/*.toc": true
},
"search.exclude": {
"**/*.aux": true,
"**/*.bbl": true,
"**/*.bcf": true,
"**/*.blg": true,
"**/*.fdb_latexmk": true,
"**/*.fls": true,
"**/*.lof": true,
"**/*.log": true,
"**/*.lot": true,
"**/*.out": true,
"**/*.run.xml": true,
"**/*.synctex.gz": true,
"**/*.toc": true
}
}

80
report/main.tex Normal file
View File

@@ -0,0 +1,80 @@
% !TeX root = main.tex
\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage{lmodern}
\usepackage[a4paper,margin=2.5cm]{geometry}
\usepackage{setspace}
\onehalfspacing
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{hyperref}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{lipsum}
\usepackage{cleveref}
\usepackage[backend=biber,style=authoryear]{biblatex}
\addbibresource{references.bib}
\hypersetup{
colorlinks=true,
linkcolor=black,
urlcolor=blue,
citecolor=blue
}
\lstset{
basicstyle=\ttfamily\small,
breaklines=true,
frame=single,
numbers=left,
numberstyle=\tiny,
keywordstyle=\color{blue},
commentstyle=\color{gray},
stringstyle=\color{teal}
}
\title{Analysis of Cookie Activity in Web Search Traffic}
\author{Tord-Vincent Heggland}
\date{\today}
\begin{document}
\pagenumbering{roman}
\maketitle
\begin{abstract}
\label{abs:abstr}
\centering
\lipsum[1]
\end{abstract}
\clearpage
\tableofcontents
\clearpage
\pagenumbering{arabic}
\setcounter{page}{1}
\input{sections/introduction.tex}
\input{sections/method.tex}
\input{sections/results.tex}
%\begin{figure}[h]
% \centering
% \includegraphics[width=\textwidth]{figures/Figure1.png}
% \caption{Total video game sales by genre in North America (millions of units).}
%\label{fig:Figure1}
%\end{figure}
\input{sections/discussion.tex}
\input{sections/conclusion.tex}
\clearpage
\cite{noauthor_video_nodate}
\clearpage
\printbibliography[title={References}]
\end{document}

9
report/references.bib Normal file
View File

@@ -0,0 +1,9 @@
@online{noauthor_video_nodate,
title = {Video Game Sales Dataset Updated -Extra Feat},
url = {https://www.kaggle.com/datasets/ibriiee/video-games-sales-dataset-2022-updated-extra-feat},
abstract = {Uncover the Gaming Industry Trends with the Most Comprehensive Sales Data},
urldate = {2026-04-21},
langid = {english},
file = {Snapshot:/home/tvh/snap/zotero-snap/common/Zotero/storage/C5LJ5QMG/video-games-sales-dataset-2022-updated-extra-feat.html:text/html},
}

View File

@@ -0,0 +1,8 @@
\section{Conclusion\label{sec:concl}}
% \subsection{Summary}
% \subsection{Hypothesis evaluation}
% \subsection{Future work}

View File

@@ -0,0 +1,10 @@
\section{Discussion\label{sec:discu}}
% \subsection{Interpretation of findings}
% \subsection{Privacy implications}
% \subsection{Reliability and limitations}
% \subsection{Ethical considerations}

View File

@@ -0,0 +1,8 @@
\section{Introduction\label{sec:intro}}
\subsection{Background\label{sec:intro:background}}
\subsection{Problem statement\label{sec:intro:statement}}
\subsection{Research objectives\label{sec:intro:research}}
\subsection{Hypotheses\label{sec:intro:hypotheses}}

View File

@@ -0,0 +1,71 @@
\section{Method\label{sec:metho}}
\subsection{Research design\label{sec:metho:research_design}}
% Stikkord:
% observational study
% browser-based network measurements
% same searches across search engines
% comparison between search engines, browsers, and network modes
\subsection{Test environment\label{sec:metho:test_environment}}
% Stikkord:
% operating system / controlled environment
% Playwright
% Chromium and Firefox
% normal network and Tor proxy
% clean browser context
% cookies allowed
% same wait condition and timeout
\subsection{Search engines and search queries\label{sec:metho:search_engines}}
% Stikkord:
% Google
% Bing
% DuckDuckGo
% Brave Search
% list of search queries
% same query used across all engines
\subsection{Variables and measurements\label{sec:metho:Variables_measurements}}
% Stikkord:
% requests_total
% unique_domains
% third_party_requests
% request_cookies_total
% response_cookies_total
% query_params_total
% post_requests_total
% tracking_hint_requests
% transferred_kb_approx
% page_load_ms
% HTTP status groups
\subsection{Data collection\label{sec:metho:data_collection}}
% Stikkord:
% HAR files
% one HAR file per search engine/query/browser/network mode
% capture_search_har script
% headed browser
% wait-until load
% timeout 60000 ms
% Tor via SOCKS proxy where applicable
\subsection{Data processing\label{sec:metho:data_processing}}
% Stikkord:
% HAR files converted to CSV
% har_entries.csv: one row per HAR entry/request
% har_summary.csv: one row per HAR file
% Power Query used to combine summary files
% folder names used to identify browser/network mode
\subsection{Limitations of the method\label{sec:metho:limitations}}
% Stikkord:
% HAR shows observable browser-side traffic only
% cannot prove server-side storage
% Playwright may differ from manual browsing
% Tor may change website behaviour
% cookie consent state affects results
% tracking_hint is keyword-based, not proof of tracking

View File

@@ -0,0 +1,18 @@
\section{Results\label{sec:resul}}
% \subsection{Dataset overview}
% \subsection{Exploratory Data Analysis}
% \subsection{Cookie activity by search engine}
% \subsection{Third-party request analysis}
% \subsection{Tracking-related domains}
% \subsection{Temporal patterns}
% \subsection{Outliers and anomalies}
% \subsection{Summary of findings}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,9 @@
har_filename,search_engine,query_text,requests_total,unique_domains,third_party_requests,request_cookies_total,response_cookies_total,query_params_total,post_requests_total,tracking_hint_requests,transferred_kb_approx,page_load_ms,status_2xx,status_3xx,status_4xx,status_5xx
20260507_143612_bing_migraine_symptoms.har,Unknown,migraine symptoms,465,6,0,4922,57,282,11,6,3218.75,1671.0,458,4,1,0
20260507_143612_brave_migraine_symptoms.har,Unknown,migraine symptoms,172,3,0,0,0,7,9,3,1965.89,5181.12,171,0,0,0
20260507_143612_duckduckgo_migraine_symptoms.har,DuckDuckGo,migraine symptoms,73,4,0,0,0,930,31,32,2122.14,1543.0,73,0,0,0
20260507_143612_google_migraine_symptoms.har,Google,migraine symptoms,13,3,6,19,5,21,0,1,4039.12,1278.0,11,1,1,0
20260507_144223_bing_migraine_symptoms.har,Unknown,migraine symptoms,483,6,0,4816,48,274,15,4,3298.3,6589.0,470,3,2,0
20260507_144223_brave_migraine_symptoms.har,Unknown,migraine symptoms,197,3,0,0,0,7,9,3,1877.96,5905.22,194,0,3,0
20260507_144223_duckduckgo_migraine_symptoms.har,DuckDuckGo,migraine symptoms,72,4,0,0,0,984,30,31,2109.12,3062.0,72,0,0,0
20260507_144223_google_migraine_symptoms.har,Google,migraine symptoms,7,2,3,9,3,13,0,0,1317.34,2353.61,4,1,1,0
1 har_filename search_engine query_text requests_total unique_domains third_party_requests request_cookies_total response_cookies_total query_params_total post_requests_total tracking_hint_requests transferred_kb_approx page_load_ms status_2xx status_3xx status_4xx status_5xx
2 20260507_143612_bing_migraine_symptoms.har Unknown migraine symptoms 465 6 0 4922 57 282 11 6 3218.75 1671.0 458 4 1 0
3 20260507_143612_brave_migraine_symptoms.har Unknown migraine symptoms 172 3 0 0 0 7 9 3 1965.89 5181.12 171 0 0 0
4 20260507_143612_duckduckgo_migraine_symptoms.har DuckDuckGo migraine symptoms 73 4 0 0 0 930 31 32 2122.14 1543.0 73 0 0 0
5 20260507_143612_google_migraine_symptoms.har Google migraine symptoms 13 3 6 19 5 21 0 1 4039.12 1278.0 11 1 1 0
6 20260507_144223_bing_migraine_symptoms.har Unknown migraine symptoms 483 6 0 4816 48 274 15 4 3298.3 6589.0 470 3 2 0
7 20260507_144223_brave_migraine_symptoms.har Unknown migraine symptoms 197 3 0 0 0 7 9 3 1877.96 5905.22 194 0 3 0
8 20260507_144223_duckduckgo_migraine_symptoms.har DuckDuckGo migraine symptoms 72 4 0 0 0 984 30 31 2109.12 3062.0 72 0 0 0
9 20260507_144223_google_migraine_symptoms.har Google migraine symptoms 7 2 3 9 3 13 0 0 1317.34 2353.61 4 1 1 0

View File

@@ -0,0 +1,9 @@
har_filename,search_engine,query_text,requests_total,unique_domains,third_party_requests,request_cookies_total,response_cookies_total,query_params_total,post_requests_total,tracking_hint_requests,transferred_kb_approx,page_load_ms,status_2xx,status_3xx,status_4xx,status_5xx
20260507_164651_bing_weather_oslo.har,Unknown,weather oslo,458,6,0,4224,40,273,13,4,2892.66,5031.0,446,3,2,0
20260507_164651_brave_weather_oslo.har,Unknown,weather oslo,96,2,0,0,0,1,0,0,806.42,1731.0,95,0,1,0
20260507_164651_duckduckgo_weather_oslo.har,DuckDuckGo,weather oslo,76,4,0,0,0,981,29,30,2411.64,2986.0,76,0,0,0
20260507_164651_google_weather_oslo.har,Google,weather oslo,12,3,6,15,3,19,0,2,3990.94,9253.0,10,1,1,0
20260507_164805_bing_weather_oslo.har,Unknown,weather oslo,614,5,0,6856,71,350,8,8,3299.72,1639.0,608,4,0,0
20260507_164805_brave_weather_oslo.har,Unknown,weather oslo,154,3,0,0,0,1,9,3,1214.27,1094.0,154,0,0,0
20260507_164805_duckduckgo_weather_oslo.har,DuckDuckGo,weather oslo,82,4,0,0,0,925,35,30,2449.43,1289.0,82,0,0,0
20260507_164805_google_weather_oslo.har,Google,weather oslo,2,1,0,4,5,3,0,0,49.69,496.65,1,1,0,0
1 har_filename search_engine query_text requests_total unique_domains third_party_requests request_cookies_total response_cookies_total query_params_total post_requests_total tracking_hint_requests transferred_kb_approx page_load_ms status_2xx status_3xx status_4xx status_5xx
2 20260507_164651_bing_weather_oslo.har Unknown weather oslo 458 6 0 4224 40 273 13 4 2892.66 5031.0 446 3 2 0
3 20260507_164651_brave_weather_oslo.har Unknown weather oslo 96 2 0 0 0 1 0 0 806.42 1731.0 95 0 1 0
4 20260507_164651_duckduckgo_weather_oslo.har DuckDuckGo weather oslo 76 4 0 0 0 981 29 30 2411.64 2986.0 76 0 0 0
5 20260507_164651_google_weather_oslo.har Google weather oslo 12 3 6 15 3 19 0 2 3990.94 9253.0 10 1 1 0
6 20260507_164805_bing_weather_oslo.har Unknown weather oslo 614 5 0 6856 71 350 8 8 3299.72 1639.0 608 4 0 0
7 20260507_164805_brave_weather_oslo.har Unknown weather oslo 154 3 0 0 0 1 9 3 1214.27 1094.0 154 0 0 0
8 20260507_164805_duckduckgo_weather_oslo.har DuckDuckGo weather oslo 82 4 0 0 0 925 35 30 2449.43 1289.0 82 0 0 0
9 20260507_164805_google_weather_oslo.har Google weather oslo 2 1 0 4 5 3 0 0 49.69 496.65 1 1 0 0

55
work/many_search.sh Executable file
View File

@@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -euo pipefail
QUERIES=(
"weather oslo"
"migraine symptoms"
"vitamin d deficiency"
"running shoes"
"coffee grinder"
"best laptop for students"
"electric car charging"
"cheap flights to london"
"home insurance"
"python list tutorial"
"banana bread recipe"
"news norway"
)
for query in "${QUERIES[@]}"; do
echo "Running query: $query"
capture_search_har \
--query "$query" \
--browser chromium \
--wait-until load \
--headed \
--output-dir normal_chromium \
--timeout-ms 60000
capture_search_har \
--query "$query" \
--browser chromium \
--wait-until load \
--headed \
--output-dir tor_chromium \
--timeout-ms 60000 \
--proxy socks5://127.0.0.1:9050
capture_search_har \
--query "$query" \
--browser firefox \
--wait-until load \
--headed \
--output-dir tor_firefox \
--timeout-ms 60000 \
--proxy socks5://127.0.0.1:9050
capture_search_har \
--query "$query" \
--browser firefox \
--wait-until load \
--headed \
--output-dir normal_firefox \
--timeout-ms 60000
done