first
This commit is contained in:
40
.gitignore
vendored
Normal file
40
.gitignore
vendored
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# LaTeX build files
|
||||||
|
*.aux
|
||||||
|
*.log
|
||||||
|
*.out
|
||||||
|
*.toc
|
||||||
|
*.fls
|
||||||
|
*.fdb_latexmk
|
||||||
|
*.synctex.gz
|
||||||
|
|
||||||
|
# PDF (valgfritt)
|
||||||
|
*.pdf
|
||||||
|
|
||||||
|
# Temporary
|
||||||
|
*.blg
|
||||||
|
*.bbl
|
||||||
|
*.lof
|
||||||
|
*.lot
|
||||||
|
*.bbl-SAVE-ERROR
|
||||||
|
|
||||||
|
# Editor
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/settings.json.bcf
|
||||||
|
|
||||||
|
*.bcf
|
||||||
|
*.run.xml
|
||||||
|
work/*/
|
||||||
|
|
||||||
|
# Ignore raw/generated files
|
||||||
|
*.har
|
||||||
|
|
||||||
|
|
||||||
|
# Ignore all work output
|
||||||
|
work/*
|
||||||
|
|
||||||
|
|
||||||
|
# But keep shell scripts
|
||||||
|
!work/*.sh
|
||||||
|
|
||||||
|
|
||||||
|
.noroff-env
|
||||||
78
report/.vscode/settings.json
vendored
Normal file
78
report/.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
{
|
||||||
|
"latex-workshop.latex.autoBuild.run": "onFileChange",
|
||||||
|
"latex-workshop.view.pdf.viewer": "tab",
|
||||||
|
"latex-workshop.synctex.afterBuild.enabled": true,
|
||||||
|
"latex-workshop.message.error.show": false,
|
||||||
|
"latex-workshop.message.warning.show": false,
|
||||||
|
|
||||||
|
"latex-workshop.latex.outDir": ".",
|
||||||
|
|
||||||
|
"latex-workshop.latex.tools": [
|
||||||
|
{
|
||||||
|
"name": "latexmk",
|
||||||
|
"command": "latexmk",
|
||||||
|
"args": [
|
||||||
|
"-pdf",
|
||||||
|
"-interaction=nonstopmode",
|
||||||
|
"-synctex=1",
|
||||||
|
"-f",
|
||||||
|
"%DOC%"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
|
||||||
|
"latex-workshop.latex.recipes": [
|
||||||
|
{
|
||||||
|
"name": "latexmk",
|
||||||
|
"tools": ["latexmk"]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
|
||||||
|
"latex-workshop.latex.clean.fileTypes": [
|
||||||
|
"*.aux",
|
||||||
|
"*.bbl",
|
||||||
|
"*.bcf",
|
||||||
|
"*.blg",
|
||||||
|
"*.fdb_latexmk",
|
||||||
|
"*.fls",
|
||||||
|
"*.lof",
|
||||||
|
"*.log",
|
||||||
|
"*.lot",
|
||||||
|
"*.out",
|
||||||
|
"*.run.xml",
|
||||||
|
"*.synctex.gz",
|
||||||
|
"*.toc"
|
||||||
|
],
|
||||||
|
|
||||||
|
"files.watcherExclude": {
|
||||||
|
"**/*.aux": true,
|
||||||
|
"**/*.bbl": true,
|
||||||
|
"**/*.bcf": true,
|
||||||
|
"**/*.blg": true,
|
||||||
|
"**/*.fdb_latexmk": true,
|
||||||
|
"**/*.fls": true,
|
||||||
|
"**/*.lof": true,
|
||||||
|
"**/*.log": true,
|
||||||
|
"**/*.lot": true,
|
||||||
|
"**/*.out": true,
|
||||||
|
"**/*.run.xml": true,
|
||||||
|
"**/*.synctex.gz": true,
|
||||||
|
"**/*.toc": true
|
||||||
|
},
|
||||||
|
|
||||||
|
"search.exclude": {
|
||||||
|
"**/*.aux": true,
|
||||||
|
"**/*.bbl": true,
|
||||||
|
"**/*.bcf": true,
|
||||||
|
"**/*.blg": true,
|
||||||
|
"**/*.fdb_latexmk": true,
|
||||||
|
"**/*.fls": true,
|
||||||
|
"**/*.lof": true,
|
||||||
|
"**/*.log": true,
|
||||||
|
"**/*.lot": true,
|
||||||
|
"**/*.out": true,
|
||||||
|
"**/*.run.xml": true,
|
||||||
|
"**/*.synctex.gz": true,
|
||||||
|
"**/*.toc": true
|
||||||
|
}
|
||||||
|
}
|
||||||
80
report/main.tex
Normal file
80
report/main.tex
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
% !TeX root = main.tex
|
||||||
|
\documentclass[12pt,a4paper]{article}
|
||||||
|
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage[english]{babel}
|
||||||
|
\usepackage{lmodern}
|
||||||
|
|
||||||
|
\usepackage[a4paper,margin=2.5cm]{geometry}
|
||||||
|
\usepackage{setspace}
|
||||||
|
\onehalfspacing
|
||||||
|
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{booktabs}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{lipsum}
|
||||||
|
|
||||||
|
\usepackage{cleveref}
|
||||||
|
|
||||||
|
\usepackage[backend=biber,style=authoryear]{biblatex}
|
||||||
|
\addbibresource{references.bib}
|
||||||
|
|
||||||
|
\hypersetup{
|
||||||
|
colorlinks=true,
|
||||||
|
linkcolor=black,
|
||||||
|
urlcolor=blue,
|
||||||
|
citecolor=blue
|
||||||
|
}
|
||||||
|
|
||||||
|
\lstset{
|
||||||
|
basicstyle=\ttfamily\small,
|
||||||
|
breaklines=true,
|
||||||
|
frame=single,
|
||||||
|
numbers=left,
|
||||||
|
numberstyle=\tiny,
|
||||||
|
keywordstyle=\color{blue},
|
||||||
|
commentstyle=\color{gray},
|
||||||
|
stringstyle=\color{teal}
|
||||||
|
}
|
||||||
|
|
||||||
|
\title{Analysis of Cookie Activity in Web Search Traffic}
|
||||||
|
\author{Tord-Vincent Heggland}
|
||||||
|
\date{\today}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\pagenumbering{roman}
|
||||||
|
\maketitle
|
||||||
|
\begin{abstract}
|
||||||
|
\label{abs:abstr}
|
||||||
|
\centering
|
||||||
|
\lipsum[1]
|
||||||
|
\end{abstract}
|
||||||
|
\clearpage
|
||||||
|
\tableofcontents
|
||||||
|
\clearpage
|
||||||
|
\pagenumbering{arabic}
|
||||||
|
|
||||||
|
\setcounter{page}{1}
|
||||||
|
|
||||||
|
\input{sections/introduction.tex}
|
||||||
|
\input{sections/method.tex}
|
||||||
|
|
||||||
|
\input{sections/results.tex}
|
||||||
|
%\begin{figure}[h]
|
||||||
|
% \centering
|
||||||
|
% \includegraphics[width=\textwidth]{figures/Figure1.png}
|
||||||
|
% \caption{Total video game sales by genre in North America (millions of units).}
|
||||||
|
%\label{fig:Figure1}
|
||||||
|
%\end{figure}
|
||||||
|
\input{sections/discussion.tex}
|
||||||
|
\input{sections/conclusion.tex}
|
||||||
|
|
||||||
|
\clearpage
|
||||||
|
\cite{noauthor_video_nodate}
|
||||||
|
\clearpage
|
||||||
|
\printbibliography[title={References}]
|
||||||
|
\end{document}
|
||||||
9
report/references.bib
Normal file
9
report/references.bib
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
|
||||||
|
@online{noauthor_video_nodate,
|
||||||
|
title = {Video Game Sales Dataset Updated -Extra Feat},
|
||||||
|
url = {https://www.kaggle.com/datasets/ibriiee/video-games-sales-dataset-2022-updated-extra-feat},
|
||||||
|
abstract = {Uncover the Gaming Industry Trends with the Most Comprehensive Sales Data},
|
||||||
|
urldate = {2026-04-21},
|
||||||
|
langid = {english},
|
||||||
|
file = {Snapshot:/home/tvh/snap/zotero-snap/common/Zotero/storage/C5LJ5QMG/video-games-sales-dataset-2022-updated-extra-feat.html:text/html},
|
||||||
|
}
|
||||||
8
report/sections/conclusion.tex
Normal file
8
report/sections/conclusion.tex
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
\section{Conclusion\label{sec:concl}}
|
||||||
|
|
||||||
|
|
||||||
|
% \subsection{Summary}
|
||||||
|
|
||||||
|
% \subsection{Hypothesis evaluation}
|
||||||
|
|
||||||
|
% \subsection{Future work}
|
||||||
10
report/sections/discussion.tex
Normal file
10
report/sections/discussion.tex
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
\section{Discussion\label{sec:discu}}
|
||||||
|
|
||||||
|
|
||||||
|
% \subsection{Interpretation of findings}
|
||||||
|
|
||||||
|
% \subsection{Privacy implications}
|
||||||
|
|
||||||
|
% \subsection{Reliability and limitations}
|
||||||
|
|
||||||
|
% \subsection{Ethical considerations}
|
||||||
8
report/sections/introduction.tex
Normal file
8
report/sections/introduction.tex
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
\section{Introduction\label{sec:intro}}
|
||||||
|
\subsection{Background\label{sec:intro:background}}
|
||||||
|
|
||||||
|
\subsection{Problem statement\label{sec:intro:statement}}
|
||||||
|
|
||||||
|
\subsection{Research objectives\label{sec:intro:research}}
|
||||||
|
|
||||||
|
\subsection{Hypotheses\label{sec:intro:hypotheses}}
|
||||||
71
report/sections/method.tex
Normal file
71
report/sections/method.tex
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
\section{Method\label{sec:metho}}
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Research design\label{sec:metho:research_design}}
|
||||||
|
% Stikkord:
|
||||||
|
% observational study
|
||||||
|
% browser-based network measurements
|
||||||
|
% same searches across search engines
|
||||||
|
% comparison between search engines, browsers, and network modes
|
||||||
|
|
||||||
|
\subsection{Test environment\label{sec:metho:test_environment}}
|
||||||
|
% Stikkord:
|
||||||
|
% operating system / controlled environment
|
||||||
|
% Playwright
|
||||||
|
% Chromium and Firefox
|
||||||
|
% normal network and Tor proxy
|
||||||
|
% clean browser context
|
||||||
|
% cookies allowed
|
||||||
|
% same wait condition and timeout
|
||||||
|
|
||||||
|
\subsection{Search engines and search queries\label{sec:metho:search_engines}}
|
||||||
|
% Stikkord:
|
||||||
|
% Google
|
||||||
|
% Bing
|
||||||
|
% DuckDuckGo
|
||||||
|
% Brave Search
|
||||||
|
% list of search queries
|
||||||
|
% same query used across all engines
|
||||||
|
|
||||||
|
\subsection{Variables and measurements\label{sec:metho:Variables_measurements}}
|
||||||
|
% Stikkord:
|
||||||
|
% requests_total
|
||||||
|
% unique_domains
|
||||||
|
% third_party_requests
|
||||||
|
% request_cookies_total
|
||||||
|
% response_cookies_total
|
||||||
|
% query_params_total
|
||||||
|
% post_requests_total
|
||||||
|
% tracking_hint_requests
|
||||||
|
% transferred_kb_approx
|
||||||
|
% page_load_ms
|
||||||
|
% HTTP status groups
|
||||||
|
|
||||||
|
\subsection{Data collection\label{sec:metho:data_collection}}
|
||||||
|
% Stikkord:
|
||||||
|
% HAR files
|
||||||
|
% one HAR file per search engine/query/browser/network mode
|
||||||
|
% capture_search_har script
|
||||||
|
% headed browser
|
||||||
|
% wait-until load
|
||||||
|
% timeout 60000 ms
|
||||||
|
% Tor via SOCKS proxy where applicable
|
||||||
|
|
||||||
|
\subsection{Data processing\label{sec:metho:data_processing}}
|
||||||
|
% Stikkord:
|
||||||
|
% HAR files converted to CSV
|
||||||
|
% har_entries.csv: one row per HAR entry/request
|
||||||
|
% har_summary.csv: one row per HAR file
|
||||||
|
% Power Query used to combine summary files
|
||||||
|
% folder names used to identify browser/network mode
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Limitations of the method\label{sec:metho:limitations}}
|
||||||
|
% Stikkord:
|
||||||
|
% HAR shows observable browser-side traffic only
|
||||||
|
% cannot prove server-side storage
|
||||||
|
% Playwright may differ from manual browsing
|
||||||
|
% Tor may change website behaviour
|
||||||
|
% cookie consent state affects results
|
||||||
|
% tracking_hint is keyword-based, not proof of tracking
|
||||||
18
report/sections/results.tex
Normal file
18
report/sections/results.tex
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
\section{Results\label{sec:resul}}
|
||||||
|
|
||||||
|
|
||||||
|
% \subsection{Dataset overview}
|
||||||
|
|
||||||
|
% \subsection{Exploratory Data Analysis}
|
||||||
|
|
||||||
|
% \subsection{Cookie activity by search engine}
|
||||||
|
|
||||||
|
% \subsection{Third-party request analysis}
|
||||||
|
|
||||||
|
% \subsection{Tracking-related domains}
|
||||||
|
|
||||||
|
% \subsection{Temporal patterns}
|
||||||
|
|
||||||
|
% \subsection{Outliers and anomalies}
|
||||||
|
|
||||||
|
% \subsection{Summary of findings}
|
||||||
1685
results/entries_osloweather.csv
Normal file
1685
results/entries_osloweather.csv
Normal file
File diff suppressed because it is too large
Load Diff
1483
results/har_entries_tor_test.csv
Normal file
1483
results/har_entries_tor_test.csv
Normal file
File diff suppressed because it is too large
Load Diff
9
results/har_summary_tor_test.csv
Normal file
9
results/har_summary_tor_test.csv
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
har_filename,search_engine,query_text,requests_total,unique_domains,third_party_requests,request_cookies_total,response_cookies_total,query_params_total,post_requests_total,tracking_hint_requests,transferred_kb_approx,page_load_ms,status_2xx,status_3xx,status_4xx,status_5xx
|
||||||
|
20260507_143612_bing_migraine_symptoms.har,Unknown,migraine symptoms,465,6,0,4922,57,282,11,6,3218.75,1671.0,458,4,1,0
|
||||||
|
20260507_143612_brave_migraine_symptoms.har,Unknown,migraine symptoms,172,3,0,0,0,7,9,3,1965.89,5181.12,171,0,0,0
|
||||||
|
20260507_143612_duckduckgo_migraine_symptoms.har,DuckDuckGo,migraine symptoms,73,4,0,0,0,930,31,32,2122.14,1543.0,73,0,0,0
|
||||||
|
20260507_143612_google_migraine_symptoms.har,Google,migraine symptoms,13,3,6,19,5,21,0,1,4039.12,1278.0,11,1,1,0
|
||||||
|
20260507_144223_bing_migraine_symptoms.har,Unknown,migraine symptoms,483,6,0,4816,48,274,15,4,3298.3,6589.0,470,3,2,0
|
||||||
|
20260507_144223_brave_migraine_symptoms.har,Unknown,migraine symptoms,197,3,0,0,0,7,9,3,1877.96,5905.22,194,0,3,0
|
||||||
|
20260507_144223_duckduckgo_migraine_symptoms.har,DuckDuckGo,migraine symptoms,72,4,0,0,0,984,30,31,2109.12,3062.0,72,0,0,0
|
||||||
|
20260507_144223_google_migraine_symptoms.har,Google,migraine symptoms,7,2,3,9,3,13,0,0,1317.34,2353.61,4,1,1,0
|
||||||
|
9
results/summary_osloweather.csv
Normal file
9
results/summary_osloweather.csv
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
har_filename,search_engine,query_text,requests_total,unique_domains,third_party_requests,request_cookies_total,response_cookies_total,query_params_total,post_requests_total,tracking_hint_requests,transferred_kb_approx,page_load_ms,status_2xx,status_3xx,status_4xx,status_5xx
|
||||||
|
20260507_164651_bing_weather_oslo.har,Unknown,weather oslo,458,6,0,4224,40,273,13,4,2892.66,5031.0,446,3,2,0
|
||||||
|
20260507_164651_brave_weather_oslo.har,Unknown,weather oslo,96,2,0,0,0,1,0,0,806.42,1731.0,95,0,1,0
|
||||||
|
20260507_164651_duckduckgo_weather_oslo.har,DuckDuckGo,weather oslo,76,4,0,0,0,981,29,30,2411.64,2986.0,76,0,0,0
|
||||||
|
20260507_164651_google_weather_oslo.har,Google,weather oslo,12,3,6,15,3,19,0,2,3990.94,9253.0,10,1,1,0
|
||||||
|
20260507_164805_bing_weather_oslo.har,Unknown,weather oslo,614,5,0,6856,71,350,8,8,3299.72,1639.0,608,4,0,0
|
||||||
|
20260507_164805_brave_weather_oslo.har,Unknown,weather oslo,154,3,0,0,0,1,9,3,1214.27,1094.0,154,0,0,0
|
||||||
|
20260507_164805_duckduckgo_weather_oslo.har,DuckDuckGo,weather oslo,82,4,0,0,0,925,35,30,2449.43,1289.0,82,0,0,0
|
||||||
|
20260507_164805_google_weather_oslo.har,Google,weather oslo,2,1,0,4,5,3,0,0,49.69,496.65,1,1,0,0
|
||||||
|
55
work/many_search.sh
Executable file
55
work/many_search.sh
Executable file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
QUERIES=(
|
||||||
|
"weather oslo"
|
||||||
|
"migraine symptoms"
|
||||||
|
"vitamin d deficiency"
|
||||||
|
"running shoes"
|
||||||
|
"coffee grinder"
|
||||||
|
"best laptop for students"
|
||||||
|
"electric car charging"
|
||||||
|
"cheap flights to london"
|
||||||
|
"home insurance"
|
||||||
|
"python list tutorial"
|
||||||
|
"banana bread recipe"
|
||||||
|
"news norway"
|
||||||
|
)
|
||||||
|
|
||||||
|
for query in "${QUERIES[@]}"; do
|
||||||
|
echo "Running query: $query"
|
||||||
|
|
||||||
|
capture_search_har \
|
||||||
|
--query "$query" \
|
||||||
|
--browser chromium \
|
||||||
|
--wait-until load \
|
||||||
|
--headed \
|
||||||
|
--output-dir normal_chromium \
|
||||||
|
--timeout-ms 60000
|
||||||
|
|
||||||
|
capture_search_har \
|
||||||
|
--query "$query" \
|
||||||
|
--browser chromium \
|
||||||
|
--wait-until load \
|
||||||
|
--headed \
|
||||||
|
--output-dir tor_chromium \
|
||||||
|
--timeout-ms 60000 \
|
||||||
|
--proxy socks5://127.0.0.1:9050
|
||||||
|
|
||||||
|
capture_search_har \
|
||||||
|
--query "$query" \
|
||||||
|
--browser firefox \
|
||||||
|
--wait-until load \
|
||||||
|
--headed \
|
||||||
|
--output-dir tor_firefox \
|
||||||
|
--timeout-ms 60000 \
|
||||||
|
--proxy socks5://127.0.0.1:9050
|
||||||
|
|
||||||
|
capture_search_har \
|
||||||
|
--query "$query" \
|
||||||
|
--browser firefox \
|
||||||
|
--wait-until load \
|
||||||
|
--headed \
|
||||||
|
--output-dir normal_firefox \
|
||||||
|
--timeout-ms 60000
|
||||||
|
done
|
||||||
Reference in New Issue
Block a user