first
This commit is contained in:
40
.gitignore
vendored
Normal file
40
.gitignore
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# LaTeX build files
|
||||
*.aux
|
||||
*.log
|
||||
*.out
|
||||
*.toc
|
||||
*.fls
|
||||
*.fdb_latexmk
|
||||
*.synctex.gz
|
||||
|
||||
# PDF (valgfritt)
|
||||
*.pdf
|
||||
|
||||
# Temporary
|
||||
*.blg
|
||||
*.bbl
|
||||
*.lof
|
||||
*.lot
|
||||
*.bbl-SAVE-ERROR
|
||||
|
||||
# Editor
|
||||
.vscode/*
|
||||
!.vscode/settings.json.bcf
|
||||
|
||||
*.bcf
|
||||
*.run.xml
|
||||
work/*/
|
||||
|
||||
# Ignore raw/generated files
|
||||
*.har
|
||||
|
||||
|
||||
# Ignore all work output
|
||||
work/*
|
||||
|
||||
|
||||
# But keep shell scripts
|
||||
!work/*.sh
|
||||
|
||||
|
||||
.noroff-env
|
||||
78
report/.vscode/settings.json
vendored
Normal file
78
report/.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
{
|
||||
"latex-workshop.latex.autoBuild.run": "onFileChange",
|
||||
"latex-workshop.view.pdf.viewer": "tab",
|
||||
"latex-workshop.synctex.afterBuild.enabled": true,
|
||||
"latex-workshop.message.error.show": false,
|
||||
"latex-workshop.message.warning.show": false,
|
||||
|
||||
"latex-workshop.latex.outDir": ".",
|
||||
|
||||
"latex-workshop.latex.tools": [
|
||||
{
|
||||
"name": "latexmk",
|
||||
"command": "latexmk",
|
||||
"args": [
|
||||
"-pdf",
|
||||
"-interaction=nonstopmode",
|
||||
"-synctex=1",
|
||||
"-f",
|
||||
"%DOC%"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
"latex-workshop.latex.recipes": [
|
||||
{
|
||||
"name": "latexmk",
|
||||
"tools": ["latexmk"]
|
||||
}
|
||||
],
|
||||
|
||||
"latex-workshop.latex.clean.fileTypes": [
|
||||
"*.aux",
|
||||
"*.bbl",
|
||||
"*.bcf",
|
||||
"*.blg",
|
||||
"*.fdb_latexmk",
|
||||
"*.fls",
|
||||
"*.lof",
|
||||
"*.log",
|
||||
"*.lot",
|
||||
"*.out",
|
||||
"*.run.xml",
|
||||
"*.synctex.gz",
|
||||
"*.toc"
|
||||
],
|
||||
|
||||
"files.watcherExclude": {
|
||||
"**/*.aux": true,
|
||||
"**/*.bbl": true,
|
||||
"**/*.bcf": true,
|
||||
"**/*.blg": true,
|
||||
"**/*.fdb_latexmk": true,
|
||||
"**/*.fls": true,
|
||||
"**/*.lof": true,
|
||||
"**/*.log": true,
|
||||
"**/*.lot": true,
|
||||
"**/*.out": true,
|
||||
"**/*.run.xml": true,
|
||||
"**/*.synctex.gz": true,
|
||||
"**/*.toc": true
|
||||
},
|
||||
|
||||
"search.exclude": {
|
||||
"**/*.aux": true,
|
||||
"**/*.bbl": true,
|
||||
"**/*.bcf": true,
|
||||
"**/*.blg": true,
|
||||
"**/*.fdb_latexmk": true,
|
||||
"**/*.fls": true,
|
||||
"**/*.lof": true,
|
||||
"**/*.log": true,
|
||||
"**/*.lot": true,
|
||||
"**/*.out": true,
|
||||
"**/*.run.xml": true,
|
||||
"**/*.synctex.gz": true,
|
||||
"**/*.toc": true
|
||||
}
|
||||
}
|
||||
80
report/main.tex
Normal file
80
report/main.tex
Normal file
@@ -0,0 +1,80 @@
|
||||
% !TeX root = main.tex
|
||||
\documentclass[12pt,a4paper]{article}
|
||||
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[english]{babel}
|
||||
\usepackage{lmodern}
|
||||
|
||||
\usepackage[a4paper,margin=2.5cm]{geometry}
|
||||
\usepackage{setspace}
|
||||
\onehalfspacing
|
||||
|
||||
\usepackage{graphicx}
|
||||
\usepackage{booktabs}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{listings}
|
||||
\usepackage{lipsum}
|
||||
|
||||
\usepackage{cleveref}
|
||||
|
||||
\usepackage[backend=biber,style=authoryear]{biblatex}
|
||||
\addbibresource{references.bib}
|
||||
|
||||
\hypersetup{
|
||||
colorlinks=true,
|
||||
linkcolor=black,
|
||||
urlcolor=blue,
|
||||
citecolor=blue
|
||||
}
|
||||
|
||||
\lstset{
|
||||
basicstyle=\ttfamily\small,
|
||||
breaklines=true,
|
||||
frame=single,
|
||||
numbers=left,
|
||||
numberstyle=\tiny,
|
||||
keywordstyle=\color{blue},
|
||||
commentstyle=\color{gray},
|
||||
stringstyle=\color{teal}
|
||||
}
|
||||
|
||||
\title{Analysis of Cookie Activity in Web Search Traffic}
|
||||
\author{Tord-Vincent Heggland}
|
||||
\date{\today}
|
||||
|
||||
\begin{document}
|
||||
\pagenumbering{roman}
|
||||
\maketitle
|
||||
\begin{abstract}
|
||||
\label{abs:abstr}
|
||||
\centering
|
||||
\lipsum[1]
|
||||
\end{abstract}
|
||||
\clearpage
|
||||
\tableofcontents
|
||||
\clearpage
|
||||
\pagenumbering{arabic}
|
||||
|
||||
\setcounter{page}{1}
|
||||
|
||||
\input{sections/introduction.tex}
|
||||
\input{sections/method.tex}
|
||||
|
||||
\input{sections/results.tex}
|
||||
%\begin{figure}[h]
|
||||
% \centering
|
||||
% \includegraphics[width=\textwidth]{figures/Figure1.png}
|
||||
% \caption{Total video game sales by genre in North America (millions of units).}
|
||||
%\label{fig:Figure1}
|
||||
%\end{figure}
|
||||
\input{sections/discussion.tex}
|
||||
\input{sections/conclusion.tex}
|
||||
|
||||
\clearpage
|
||||
\cite{noauthor_video_nodate}
|
||||
\clearpage
|
||||
\printbibliography[title={References}]
|
||||
\end{document}
|
||||
9
report/references.bib
Normal file
9
report/references.bib
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
@online{noauthor_video_nodate,
|
||||
title = {Video Game Sales Dataset Updated -Extra Feat},
|
||||
url = {https://www.kaggle.com/datasets/ibriiee/video-games-sales-dataset-2022-updated-extra-feat},
|
||||
abstract = {Uncover the Gaming Industry Trends with the Most Comprehensive Sales Data},
|
||||
urldate = {2026-04-21},
|
||||
langid = {english},
|
||||
file = {Snapshot:/home/tvh/snap/zotero-snap/common/Zotero/storage/C5LJ5QMG/video-games-sales-dataset-2022-updated-extra-feat.html:text/html},
|
||||
}
|
||||
8
report/sections/conclusion.tex
Normal file
8
report/sections/conclusion.tex
Normal file
@@ -0,0 +1,8 @@
|
||||
\section{Conclusion\label{sec:concl}}
|
||||
|
||||
|
||||
% \subsection{Summary}
|
||||
|
||||
% \subsection{Hypothesis evaluation}
|
||||
|
||||
% \subsection{Future work}
|
||||
10
report/sections/discussion.tex
Normal file
10
report/sections/discussion.tex
Normal file
@@ -0,0 +1,10 @@
|
||||
\section{Discussion\label{sec:discu}}
|
||||
|
||||
|
||||
% \subsection{Interpretation of findings}
|
||||
|
||||
% \subsection{Privacy implications}
|
||||
|
||||
% \subsection{Reliability and limitations}
|
||||
|
||||
% \subsection{Ethical considerations}
|
||||
8
report/sections/introduction.tex
Normal file
8
report/sections/introduction.tex
Normal file
@@ -0,0 +1,8 @@
|
||||
\section{Introduction\label{sec:intro}}
|
||||
\subsection{Background\label{sec:intro:background}}
|
||||
|
||||
\subsection{Problem statement\label{sec:intro:statement}}
|
||||
|
||||
\subsection{Research objectives\label{sec:intro:research}}
|
||||
|
||||
\subsection{Hypotheses\label{sec:intro:hypotheses}}
|
||||
71
report/sections/method.tex
Normal file
71
report/sections/method.tex
Normal file
@@ -0,0 +1,71 @@
|
||||
\section{Method\label{sec:metho}}
|
||||
|
||||
|
||||
\subsection{Research design\label{sec:metho:research_design}}
|
||||
% Stikkord:
|
||||
% observational study
|
||||
% browser-based network measurements
|
||||
% same searches across search engines
|
||||
% comparison between search engines, browsers, and network modes
|
||||
|
||||
\subsection{Test environment\label{sec:metho:test_environment}}
|
||||
% Stikkord:
|
||||
% operating system / controlled environment
|
||||
% Playwright
|
||||
% Chromium and Firefox
|
||||
% normal network and Tor proxy
|
||||
% clean browser context
|
||||
% cookies allowed
|
||||
% same wait condition and timeout
|
||||
|
||||
\subsection{Search engines and search queries\label{sec:metho:search_engines}}
|
||||
% Stikkord:
|
||||
% Google
|
||||
% Bing
|
||||
% DuckDuckGo
|
||||
% Brave Search
|
||||
% list of search queries
|
||||
% same query used across all engines
|
||||
|
||||
\subsection{Variables and measurements\label{sec:metho:Variables_measurements}}
|
||||
% Stikkord:
|
||||
% requests_total
|
||||
% unique_domains
|
||||
% third_party_requests
|
||||
% request_cookies_total
|
||||
% response_cookies_total
|
||||
% query_params_total
|
||||
% post_requests_total
|
||||
% tracking_hint_requests
|
||||
% transferred_kb_approx
|
||||
% page_load_ms
|
||||
% HTTP status groups
|
||||
|
||||
\subsection{Data collection\label{sec:metho:data_collection}}
|
||||
% Stikkord:
|
||||
% HAR files
|
||||
% one HAR file per search engine/query/browser/network mode
|
||||
% capture_search_har script
|
||||
% headed browser
|
||||
% wait-until load
|
||||
% timeout 60000 ms
|
||||
% Tor via SOCKS proxy where applicable
|
||||
|
||||
\subsection{Data processing\label{sec:metho:data_processing}}
|
||||
% Stikkord:
|
||||
% HAR files converted to CSV
|
||||
% har_entries.csv: one row per HAR entry/request
|
||||
% har_summary.csv: one row per HAR file
|
||||
% Power Query used to combine summary files
|
||||
% folder names used to identify browser/network mode
|
||||
|
||||
|
||||
|
||||
\subsection{Limitations of the method\label{sec:metho:limitations}}
|
||||
% Stikkord:
|
||||
% HAR shows observable browser-side traffic only
|
||||
% cannot prove server-side storage
|
||||
% Playwright may differ from manual browsing
|
||||
% Tor may change website behaviour
|
||||
% cookie consent state affects results
|
||||
% tracking_hint is keyword-based, not proof of tracking
|
||||
18
report/sections/results.tex
Normal file
18
report/sections/results.tex
Normal file
@@ -0,0 +1,18 @@
|
||||
\section{Results\label{sec:resul}}
|
||||
|
||||
|
||||
% \subsection{Dataset overview}
|
||||
|
||||
% \subsection{Exploratory Data Analysis}
|
||||
|
||||
% \subsection{Cookie activity by search engine}
|
||||
|
||||
% \subsection{Third-party request analysis}
|
||||
|
||||
% \subsection{Tracking-related domains}
|
||||
|
||||
% \subsection{Temporal patterns}
|
||||
|
||||
% \subsection{Outliers and anomalies}
|
||||
|
||||
% \subsection{Summary of findings}
|
||||
1685
results/entries_osloweather.csv
Normal file
1685
results/entries_osloweather.csv
Normal file
File diff suppressed because it is too large
Load Diff
1483
results/har_entries_tor_test.csv
Normal file
1483
results/har_entries_tor_test.csv
Normal file
File diff suppressed because it is too large
Load Diff
9
results/har_summary_tor_test.csv
Normal file
9
results/har_summary_tor_test.csv
Normal file
@@ -0,0 +1,9 @@
|
||||
har_filename,search_engine,query_text,requests_total,unique_domains,third_party_requests,request_cookies_total,response_cookies_total,query_params_total,post_requests_total,tracking_hint_requests,transferred_kb_approx,page_load_ms,status_2xx,status_3xx,status_4xx,status_5xx
|
||||
20260507_143612_bing_migraine_symptoms.har,Unknown,migraine symptoms,465,6,0,4922,57,282,11,6,3218.75,1671.0,458,4,1,0
|
||||
20260507_143612_brave_migraine_symptoms.har,Unknown,migraine symptoms,172,3,0,0,0,7,9,3,1965.89,5181.12,171,0,0,0
|
||||
20260507_143612_duckduckgo_migraine_symptoms.har,DuckDuckGo,migraine symptoms,73,4,0,0,0,930,31,32,2122.14,1543.0,73,0,0,0
|
||||
20260507_143612_google_migraine_symptoms.har,Google,migraine symptoms,13,3,6,19,5,21,0,1,4039.12,1278.0,11,1,1,0
|
||||
20260507_144223_bing_migraine_symptoms.har,Unknown,migraine symptoms,483,6,0,4816,48,274,15,4,3298.3,6589.0,470,3,2,0
|
||||
20260507_144223_brave_migraine_symptoms.har,Unknown,migraine symptoms,197,3,0,0,0,7,9,3,1877.96,5905.22,194,0,3,0
|
||||
20260507_144223_duckduckgo_migraine_symptoms.har,DuckDuckGo,migraine symptoms,72,4,0,0,0,984,30,31,2109.12,3062.0,72,0,0,0
|
||||
20260507_144223_google_migraine_symptoms.har,Google,migraine symptoms,7,2,3,9,3,13,0,0,1317.34,2353.61,4,1,1,0
|
||||
|
9
results/summary_osloweather.csv
Normal file
9
results/summary_osloweather.csv
Normal file
@@ -0,0 +1,9 @@
|
||||
har_filename,search_engine,query_text,requests_total,unique_domains,third_party_requests,request_cookies_total,response_cookies_total,query_params_total,post_requests_total,tracking_hint_requests,transferred_kb_approx,page_load_ms,status_2xx,status_3xx,status_4xx,status_5xx
|
||||
20260507_164651_bing_weather_oslo.har,Unknown,weather oslo,458,6,0,4224,40,273,13,4,2892.66,5031.0,446,3,2,0
|
||||
20260507_164651_brave_weather_oslo.har,Unknown,weather oslo,96,2,0,0,0,1,0,0,806.42,1731.0,95,0,1,0
|
||||
20260507_164651_duckduckgo_weather_oslo.har,DuckDuckGo,weather oslo,76,4,0,0,0,981,29,30,2411.64,2986.0,76,0,0,0
|
||||
20260507_164651_google_weather_oslo.har,Google,weather oslo,12,3,6,15,3,19,0,2,3990.94,9253.0,10,1,1,0
|
||||
20260507_164805_bing_weather_oslo.har,Unknown,weather oslo,614,5,0,6856,71,350,8,8,3299.72,1639.0,608,4,0,0
|
||||
20260507_164805_brave_weather_oslo.har,Unknown,weather oslo,154,3,0,0,0,1,9,3,1214.27,1094.0,154,0,0,0
|
||||
20260507_164805_duckduckgo_weather_oslo.har,DuckDuckGo,weather oslo,82,4,0,0,0,925,35,30,2449.43,1289.0,82,0,0,0
|
||||
20260507_164805_google_weather_oslo.har,Google,weather oslo,2,1,0,4,5,3,0,0,49.69,496.65,1,1,0,0
|
||||
|
55
work/many_search.sh
Executable file
55
work/many_search.sh
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
QUERIES=(
|
||||
"weather oslo"
|
||||
"migraine symptoms"
|
||||
"vitamin d deficiency"
|
||||
"running shoes"
|
||||
"coffee grinder"
|
||||
"best laptop for students"
|
||||
"electric car charging"
|
||||
"cheap flights to london"
|
||||
"home insurance"
|
||||
"python list tutorial"
|
||||
"banana bread recipe"
|
||||
"news norway"
|
||||
)
|
||||
|
||||
for query in "${QUERIES[@]}"; do
|
||||
echo "Running query: $query"
|
||||
|
||||
capture_search_har \
|
||||
--query "$query" \
|
||||
--browser chromium \
|
||||
--wait-until load \
|
||||
--headed \
|
||||
--output-dir normal_chromium \
|
||||
--timeout-ms 60000
|
||||
|
||||
capture_search_har \
|
||||
--query "$query" \
|
||||
--browser chromium \
|
||||
--wait-until load \
|
||||
--headed \
|
||||
--output-dir tor_chromium \
|
||||
--timeout-ms 60000 \
|
||||
--proxy socks5://127.0.0.1:9050
|
||||
|
||||
capture_search_har \
|
||||
--query "$query" \
|
||||
--browser firefox \
|
||||
--wait-until load \
|
||||
--headed \
|
||||
--output-dir tor_firefox \
|
||||
--timeout-ms 60000 \
|
||||
--proxy socks5://127.0.0.1:9050
|
||||
|
||||
capture_search_har \
|
||||
--query "$query" \
|
||||
--browser firefox \
|
||||
--wait-until load \
|
||||
--headed \
|
||||
--output-dir normal_firefox \
|
||||
--timeout-ms 60000
|
||||
done
|
||||
Reference in New Issue
Block a user