Employees

These are homework tasks on employees database.

Clone and init the database (requires a local PostgreSQL instance):

  1. psql -U postgres -c 'CREATE DATABASE employees;'
  2. git clone https://github.com/vrajmohan/pgsql-sample-data.git
  3. psql -U postgres -d employees -f pgsql-sample-data/employee/employees.dump

Execute a PRQL query:

  1. cd prql-compiler
  2. cargo run compile examples/employees/average-title-salary.prql | psql -U postgres -d employees

Task 1

rank the employee titles according to the average salary for each department.

My solution:

  • for each employee, find their average salary,
  • join employees with their departments and titles (duplicating employees for each of their titles and departments)
  • group by department and title, aggregating average salary
  • join with department to get department name

PRQL

  1. from salaries
  2. group [emp_no] (
  3. aggregate [emp_salary = average salary]
  4. )
  5. join t=titles [==emp_no]
  6. join dept_emp side:left [==emp_no]
  7. group [dept_emp.dept_no, t.title] (
  8. aggregate [avg_salary = average emp_salary]
  9. )
  10. join departments [==dept_no]
  11. select [dept_name, title, avg_salary]

SQL

  1. WITH table_3 AS (
  2. SELECT
  3. AVG(salary) AS _expr_0,
  4. emp_no
  5. FROM
  6. salaries
  7. GROUP BY
  8. emp_no
  9. ),
  10. table_1 AS (
  11. SELECT
  12. t.title,
  13. AVG(table_2._expr_0) AS avg_salary,
  14. dept_emp.dept_no
  15. FROM
  16. table_3 AS table_2
  17. JOIN titles AS t ON table_2.emp_no = t.emp_no
  18. LEFT JOIN dept_emp ON table_2.emp_no = dept_emp.emp_no
  19. GROUP BY
  20. dept_emp.dept_no,
  21. t.title
  22. )
  23. SELECT
  24. departments.dept_name,
  25. table_0.title,
  26. table_0.avg_salary
  27. FROM
  28. table_1 AS table_0
  29. JOIN departments ON table_0.dept_no = departments.dept_no

Task 2

Estimate distribution of salaries and gender for each department departments.

PRQL

  1. from e=employees
  2. join salaries [==emp_no]
  3. group [e.emp_no, e.gender] (
  4. aggregate [
  5. emp_salary = average salaries.salary
  6. ]
  7. )
  8. join de=dept_emp [==emp_no] side:left
  9. group [de.dept_no, gender] (
  10. aggregate [
  11. salary_avg = average emp_salary,
  12. salary_sd = stddev emp_salary,
  13. ]
  14. )
  15. join departments [==dept_no]
  16. select [dept_name, gender, salary_avg, salary_sd]

SQL

  1. WITH table_3 AS (
  2. SELECT
  3. e.gender,
  4. AVG(salaries.salary) AS _expr_0,
  5. e.emp_no
  6. FROM
  7. employees AS e
  8. JOIN salaries ON e.emp_no = salaries.emp_no
  9. GROUP BY
  10. e.emp_no,
  11. e.gender
  12. ),
  13. table_1 AS (
  14. SELECT
  15. table_2.gender,
  16. AVG(table_2._expr_0) AS salary_avg,
  17. STDDEV(table_2._expr_0) AS salary_sd,
  18. de.dept_no
  19. FROM
  20. table_3 AS table_2
  21. LEFT JOIN dept_emp AS de ON table_2.emp_no = de.emp_no
  22. GROUP BY
  23. de.dept_no,
  24. table_2.gender
  25. )
  26. SELECT
  27. departments.dept_name,
  28. table_0.gender,
  29. table_0.salary_avg,
  30. table_0.salary_sd
  31. FROM
  32. table_1 AS table_0
  33. JOIN departments ON table_0.dept_no = departments.dept_no

Task 3

Estimate distribution of salaries and gender for each manager.

PRQL

  1. from e=employees
  2. join salaries [==emp_no]
  3. group [e.emp_no, e.gender] (
  4. aggregate [
  5. emp_salary = average salaries.salary
  6. ]
  7. )
  8. join de=dept_emp [==emp_no]
  9. join dm=dept_manager [
  10. (dm.dept_no == de.dept_no) && s"(de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)"
  11. ]
  12. group [dm.emp_no, gender] (
  13. aggregate [
  14. salary_avg = average emp_salary,
  15. salary_sd = stddev emp_salary
  16. ]
  17. )
  18. derive mng_no = emp_no
  19. join managers=employees [==emp_no]
  20. derive mng_name = s"managers.first_name || ' ' || managers.last_name"
  21. select [mng_name, managers.gender, salary_avg, salary_sd]

SQL

  1. WITH table_5 AS (
  2. SELECT
  3. e.gender,
  4. AVG(salaries.salary) AS _expr_0,
  5. e.emp_no
  6. FROM
  7. employees AS e
  8. JOIN salaries ON e.emp_no = salaries.emp_no
  9. GROUP BY
  10. e.emp_no,
  11. e.gender
  12. ),
  13. table_3 AS (
  14. SELECT
  15. AVG(table_4._expr_0) AS salary_avg,
  16. STDDEV(table_4._expr_0) AS salary_sd,
  17. dm.emp_no
  18. FROM
  19. table_5 AS table_4
  20. JOIN dept_emp AS de ON table_4.emp_no = de.emp_no
  21. JOIN dept_manager AS dm ON dm.dept_no = de.dept_no
  22. AND (de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)
  23. GROUP BY
  24. dm.emp_no,
  25. table_4.gender
  26. ),
  27. table_1 AS (
  28. SELECT
  29. salary_avg,
  30. salary_sd,
  31. emp_no
  32. FROM
  33. table_3 AS table_2
  34. )
  35. SELECT
  36. managers.first_name || ' ' || managers.last_name AS mng_name,
  37. managers.gender,
  38. table_0.salary_avg,
  39. table_0.salary_sd
  40. FROM
  41. table_1 AS table_0
  42. JOIN employees AS managers ON table_0.emp_no = managers.emp_no

Task 4

Find distributions of titles, salaries and genders for each department.

PRQL

  1. from de=dept_emp
  2. join s=salaries side:left [
  3. (s.emp_no == de.emp_no),
  4. s"({s.from_date}, {s.to_date}) OVERLAPS ({de.from_date}, {de.to_date})"
  5. ]
  6. group [de.emp_no, de.dept_no] (
  7. aggregate salary = (average s.salary)
  8. )
  9. join employees [==emp_no]
  10. join titles [==emp_no]
  11. select [dept_no, salary, employees.gender, titles.title]

SQL

  1. WITH table_1 AS (
  2. SELECT
  3. de.dept_no,
  4. AVG(s.salary) AS salary,
  5. de.emp_no
  6. FROM
  7. dept_emp AS de
  8. LEFT JOIN salaries AS s ON s.emp_no = de.emp_no
  9. AND (s.from_date, s.to_date) OVERLAPS (de.from_date, de.to_date)
  10. GROUP BY
  11. de.emp_no,
  12. de.dept_no
  13. )
  14. SELECT
  15. table_0.dept_no,
  16. table_0.salary,
  17. employees.gender,
  18. titles.title
  19. FROM
  20. table_1 AS table_0
  21. JOIN employees ON table_0.emp_no = employees.emp_no
  22. JOIN titles ON table_0.emp_no = titles.emp_no