Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,9 @@ S3_ACCESS_KEY_ID=your_s3_access_key_id_here
S3_SECRET_ACCESS_KEY=your_s3_secret_access_key_here
S3_BUCKET=your_s3_bucket_name_here
S3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com

# ClickHouse database
CLICKHOUSE_URL=http://clickhouse:8123
CLICKHOUSE_DATABASE=hackatime_development
CLICKHOUSE_USERNAME=default
CLICKHOUSE_PASSWORD=
Comment thread
skyfallwastaken marked this conversation as resolved.
3 changes: 3 additions & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ HCA_CLIENT_ID=<hca_client_id>
HCA_CLIENT_SECRET=<hca_client_secret>
```

ClickHouse is automatically started by Docker Compose alongside Postgres — no extra setup needed.

Start the containers:

```sh
Expand All @@ -51,6 +53,7 @@ We'll now setup the database. In your container shell, run the following:

```bash
app# bin/rails db:create db:schema:load db:seed
app# bin/rails db:migrate:clickhouse
```

Run the Vite build with SSR (server-side-rendering):
Expand Down
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ gem "propshaft"
gem "sqlite3", ">= 2.1"
# Use PostgreSQL as the database for Wakatime
gem "pg"
# Use ClickHouse for analytics
gem "clickhouse-activerecord"
# Use the Puma web server [https://github.com/puma/puma]
gem "puma", ">= 5.0"
# Use JavaScript with ESM import maps [https://github.com/rails/importmap-rails]
Expand Down
102 changes: 42 additions & 60 deletions app/controllers/api/admin/v1/admin_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,71 +51,77 @@ def visualization_quantized
quantized_query = <<-SQL
WITH base_heartbeats AS (
SELECT
"time",
time,
lineno,
cursorpos,
date_trunc('day', to_timestamp("time")) as day_start
toDate(toDateTime(toUInt32(time))) as day_start
FROM heartbeats
WHERE user_id = ?
AND "time" >= ? AND "time" <= ?
AND time >= ? AND time <= ?
AND (lineno IS NOT NULL OR cursorpos IS NOT NULL)
Comment thread
skyfallwastaken marked this conversation as resolved.
Outdated
LIMIT 1000000
),
daily_stats AS (
SELECT
*,
GREATEST(1, MAX(lineno) OVER (PARTITION BY day_start)) as max_lineno,
GREATEST(1, MAX(cursorpos) OVER (PARTITION BY day_start)) as max_cursorpos
greatest(1, max(lineno) OVER (PARTITION BY day_start)) as max_lineno,
greatest(1, max(cursorpos) OVER (PARTITION BY day_start)) as max_cursorpos
FROM base_heartbeats
),
quantized_heartbeats AS (
SELECT
*,
ROUND(2 + (("time" - extract(epoch from day_start)) / 86400) * (396)) as qx,
ROUND(2 + (1 - CAST(lineno AS decimal) / max_lineno) * (96)) as qy_lineno,
ROUND(2 + (1 - CAST(cursorpos AS decimal) / max_cursorpos) * (96)) as qy_cursorpos
round(2 + ((time - toUInt32(toDateTime(day_start))) / 86400) * (396)) as qx,
round(2 + (1 - CAST(lineno AS Float64) / max_lineno) * (96)) as qy_lineno,
round(2 + (1 - CAST(cursorpos AS Float64) / max_cursorpos) * (96)) as qy_cursorpos
FROM daily_stats
)
SELECT "time", lineno, cursorpos
SELECT time, lineno, cursorpos
FROM (
SELECT DISTINCT ON (day_start, qx, qy_lineno) "time", lineno, cursorpos
SELECT
any(time) AS time,
any(lineno) AS lineno,
any(cursorpos) AS cursorpos
FROM quantized_heartbeats
WHERE lineno IS NOT NULL
ORDER BY day_start, qx, qy_lineno, "time" ASC
GROUP BY day_start, qx, qy_lineno
) AS lineno_pixels
UNION
SELECT "time", lineno, cursorpos
UNION ALL
SELECT time, lineno, cursorpos
FROM (
SELECT DISTINCT ON (day_start, qx, qy_cursorpos) "time", lineno, cursorpos
SELECT
any(time) AS time,
any(lineno) AS lineno,
any(cursorpos) AS cursorpos
FROM quantized_heartbeats
WHERE cursorpos IS NOT NULL
ORDER BY day_start, qx, qy_cursorpos, "time" ASC
GROUP BY day_start, qx, qy_cursorpos
) AS cursorpos_pixels
ORDER BY "time" ASC
ORDER BY time ASC
SQL

daily_totals_query = <<-SQL
WITH heartbeats_with_gaps AS (
SELECT
date_trunc('day', to_timestamp("time"))::date as day,
"time" - LAG("time", 1, "time") OVER (PARTITION BY date_trunc('day', to_timestamp("time")) ORDER BY "time") as gap
toDate(toDateTime(toUInt32(time))) as day,
time - lagInFrame(time, 1, time) OVER (PARTITION BY toDate(toDateTime(toUInt32(time))) ORDER BY time) as gap
FROM heartbeats
WHERE user_id = ? AND time >= ? AND time <= ?
)
SELECT
day,
SUM(LEAST(gap, 120)) as total_seconds
SUM(least(gap, 120)) as total_seconds
FROM heartbeats_with_gaps
WHERE gap IS NOT NULL
GROUP BY day
SQL

quantized_result = ActiveRecord::Base.connection.execute(
ActiveRecord::Base.sanitize_sql([ quantized_query, user.id, start_epoch, end_epoch ])
quantized_result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ quantized_query, user.id, start_epoch, end_epoch ])
)

daily_totals_result = ActiveRecord::Base.connection.execute(
ActiveRecord::Base.sanitize_sql([ daily_totals_query, user.id, start_epoch, end_epoch ])
daily_totals_result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ daily_totals_query, user.id, start_epoch, end_epoch ])
)

daily_totals = daily_totals_result.each_with_object({}) do |row, hash|
Expand Down Expand Up @@ -197,8 +203,8 @@ def alt_candidates
LIMIT 5000
SQL

result = ActiveRecord::Base.connection.exec_query(
ActiveRecord::Base.sanitize_sql([ query, cutoff, cutoff ])
result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ query, cutoff, cutoff ])
)

render json: { candidates: result.to_a }
Expand All @@ -210,44 +216,20 @@ def shared_machines

query = <<-SQL
SELECT
sms.machine,
sms.machine_frequency,
ARRAY_AGG(DISTINCT u.id) AS user_ids
FROM
(
SELECT
machine,
COUNT(user_id) AS machine_frequency,
ARRAY_AGG(user_id) AS user_ids
FROM
(
SELECT DISTINCT
machine,
user_id
FROM
heartbeats
WHERE
machine IS NOT NULL
AND time > ?
) AS UserMachines
GROUP BY
machine
HAVING
COUNT(user_id) > 1
) AS sms,
LATERAL UNNEST(sms.user_ids) AS user_id_from_array
JOIN
users AS u ON u.id = user_id_from_array
GROUP BY
sms.machine,
sms.machine_frequency
ORDER BY
sms.machine_frequency DESC
machine,
uniq(user_id) AS machine_frequency,
groupArray(DISTINCT user_id) AS user_ids
Comment thread
skyfallwastaken marked this conversation as resolved.
Outdated
FROM heartbeats
WHERE machine != '' AND machine IS NOT NULL
AND time > ?
GROUP BY machine
HAVING uniq(user_id) > 1
ORDER BY machine_frequency DESC
LIMIT 5000
SQL

result = ActiveRecord::Base.connection.exec_query(
ActiveRecord::Base.sanitize_sql([ query, cutoff ])
result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ query, cutoff ])
)

render json: { machines: result.to_a }
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/api/hackatime/v1/hackatime_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def handle_heartbeat(heartbeat_array)
}).slice(*Heartbeat.column_names.map(&:to_sym))
# ^^ They say safety laws are written in blood. Well, so is this line!
# Basically this filters out columns that aren't in our DB (the biggest one being raw_data)
new_heartbeat = Heartbeat.find_or_create_by(attrs)
new_heartbeat = Heartbeat.create(attrs)
Comment thread
skyfallwastaken marked this conversation as resolved.
Outdated

queue_project_mapping(heartbeat[:project])
results << [ new_heartbeat.attributes, 201 ]
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/api/v1/authenticated/hours_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def index
end_date = params[:end_date]&.to_date || Date.current

total_seconds = current_user.heartbeats
.where(created_at: start_date.beginning_of_day..end_date.end_of_day)
.where(time: start_date.beginning_of_day.to_f..end_date.end_of_day.to_f)
.duration_seconds

render json: {
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/concerns/api/admin/v1/user_utilities.rb
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def user_info
total_coding_time: valid.duration_seconds || 0,
languages_used: valid.distinct.pluck(:language).compact.count,
projects_worked_on: valid.distinct.pluck(:project).compact.count,
days_active: valid.distinct.count("DATE(to_timestamp(CASE WHEN time > 1000000000000 THEN time / 1000 ELSE time END))")
days_active: Heartbeat.connection.select_value("SELECT uniq(toDate(toDateTime(toUInt32(time)))) FROM (#{valid.to_sql}) AS hb").to_i
}
}
}
Expand Down
34 changes: 25 additions & 9 deletions app/jobs/cache/active_projects_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,30 @@ def cache_expiration
end

def calculate
# Get recent heartbeats with matching project_repo_mappings in a single SQL query
ProjectRepoMapping.active
.joins("INNER JOIN heartbeats ON heartbeats.project = project_repo_mappings.project_name AND heartbeats.user_id = project_repo_mappings.user_id")
.joins("INNER JOIN users ON users.id = heartbeats.user_id")
.where("heartbeats.source_type = ?", Heartbeat.source_types[:direct_entry])
.where("heartbeats.time > ?", 5.minutes.ago.to_f)
.select("DISTINCT ON (heartbeats.user_id) project_repo_mappings.*, heartbeats.user_id")
.order("heartbeats.user_id, heartbeats.time DESC")
.index_by(&:user_id)
# Query recent heartbeats from ClickHouse
recent_hbs = Heartbeat.where(source_type: Heartbeat.source_types[:direct_entry])
.where("time > ?", 5.minutes.ago.to_f)
.order(time: :desc)
.to_a

# Deduplicate by user_id (most recent heartbeat per user)
latest_by_user = recent_hbs.group_by(&:user_id).transform_values(&:first)

return {} if latest_by_user.empty?

# Find matching project_repo_mappings from Postgres
user_ids = latest_by_user.keys

mappings = ProjectRepoMapping.active
.where(user_id: user_ids)
.to_a

result = {}
latest_by_user.each do |user_id, hb|
mapping = mappings.find { |m| m.user_id == user_id && m.project_name == hb.project }
result[user_id] = mapping if mapping
end

result
end
end
25 changes: 14 additions & 11 deletions app/jobs/cache/active_users_graph_data_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,24 @@ class Cache::ActiveUsersGraphDataJob < Cache::ActivityJob

def calculate
# over the last 24 hours, count the number of people who were active each hour
hours = Heartbeat.coding_only
.with_valid_timestamps
.where("time > ?", 24.hours.ago.to_f)
.where("time < ?", Time.current.to_f)
.select("(EXTRACT(EPOCH FROM to_timestamp(time))::bigint / 3600 * 3600) as hour, COUNT(DISTINCT user_id) as count")
.group("hour")
.order("hour DESC")
connection = Heartbeat.connection
hours = connection.select_all(<<~SQL
SELECT
toInt64(toUInt32(time) / 3600) * 3600 AS hour,
uniq(user_id) AS count
FROM (#{Heartbeat.coding_only.with_valid_timestamps.where("time > ?", 24.hours.ago.to_f).where("time < ?", Time.current.to_f).to_sql}) AS hb
GROUP BY hour
ORDER BY hour DESC
SQL
)

top_hour_count = hours.max_by(&:count)&.count || 1
top_hour_count = hours.map { |h| h["count"].to_i }.max || 1

hours = hours.map do |h|
{
hour: Time.at(h.hour),
users: h.count,
height: (h.count.to_f / top_hour_count * 100).round
hour: Time.at(h["hour"].to_i),
users: h["count"].to_i,
height: (h["count"].to_f / top_hour_count * 100).round
}
end
end
Expand Down
7 changes: 3 additions & 4 deletions app/jobs/cache/currently_hacking_count_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ def cache_expiration
end

def calculate
count = Heartbeat.joins(:user)
.where(source_type: :direct_entry)
count = Heartbeat.where(source_type: :direct_entry)
.coding_only
.where("time > ?", 5.minutes.ago.to_f)
.select("DISTINCT user_id")
.count
.distinct
.count(:user_id)

{ count: count }
end
Expand Down
29 changes: 18 additions & 11 deletions app/jobs/cache/currently_hacking_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,24 @@ def cache_expiration
end

def calculate
# Get most recent heartbeats and users in a single query
recent_heartbeats = Heartbeat.joins(:user)
.where(source_type: :direct_entry)
.coding_only
.where("time > ?", 5.minutes.ago.to_f)
.select("DISTINCT ON (user_id) user_id, project, time, users.*")
.order("user_id, time DESC")
.includes(user: [ :project_repo_mappings, :email_addresses ])
.index_by(&:user_id)

users = recent_heartbeats.values.map(&:user)
# Query ClickHouse for recent heartbeats (no cross-DB join)
raw_heartbeats = Heartbeat.where(source_type: :direct_entry)
.coding_only
.where("time > ?", 5.minutes.ago.to_f)
.order(time: :desc)
.to_a

# Deduplicate by user_id (keep most recent)
recent_heartbeats = raw_heartbeats.group_by(&:user_id)
.transform_values(&:first)

# Load users from Postgres
user_ids = recent_heartbeats.keys
users_by_id = User.where(id: user_ids)
.includes(:project_repo_mappings, :email_addresses)
.index_by(&:id)

users = user_ids.filter_map { |uid| users_by_id[uid] }

active_projects = {}
users.each do |user|
Expand Down
12 changes: 9 additions & 3 deletions app/jobs/cache/home_stats_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@ class Cache::HomeStatsJob < Cache::ActivityJob
private

def calculate
seconds_by_user = Heartbeat.group(:user_id).duration_seconds
result = HeartbeatUserDailySummary.connection.select_one(<<~SQL)
SELECT
uniq(user_id) AS users_tracked,
toInt64(coalesce(sum(duration_s), 0)) AS seconds_tracked
FROM heartbeat_user_daily_summary FINAL
SQL
Comment thread
skyfallwastaken marked this conversation as resolved.

{
users_tracked: seconds_by_user.size,
seconds_tracked: seconds_by_user.values.sum
users_tracked: result["users_tracked"].to_i,
seconds_tracked: result["seconds_tracked"].to_i
}
end
end
10 changes: 6 additions & 4 deletions app/jobs/leaderboard_update_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@ def build_leaderboard(date, period, force_update = false)
range = LeaderboardDateRange.calculate(date, period)

ActiveRecord::Base.transaction do
# Build the base heartbeat query
# Get eligible user IDs from Postgres (can't cross-DB join)
eligible_user_ids = User.where.not(github_uid: nil)
.where.not(trust_level: User.trust_levels[:red])
.pluck(:id)

heartbeat_query = Heartbeat.where(time: range)
.with_valid_timestamps
.joins(:user)
.coding_only
.where.not(users: { github_uid: nil })
.where.not(users: { trust_level: User.trust_levels[:red] })
.where(user_id: eligible_user_ids)

data = heartbeat_query.group(:user_id).duration_seconds
.filter { |_, seconds| seconds > 60 }
Expand Down
Loading
Loading