Calculating Percentiles with SQL Server CTE
Here percentiles calculations are based on this fictitious table of employee salaries:
CREATE TABLE Employee
(EmployeeID INT IDENTITY(1,1) PRIMARY KEY,
GroupName VARCHAR(30),
Emp_Name VARCHAR(80),
Salary FLOAT)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P1', 12000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P2', 13500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P3', 13500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P4', 11500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S1', 12500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S2', 15500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S3', 14500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S4', 12500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S5', 11000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S6', 14000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M1', 11500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M2', 11000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M3', 16500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M4', 15500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M5', 12250)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M6', 11500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('ACCOUNTING', 'A1', 10500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('ACCOUNTING', 'A2', 13500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('ACCOUNTING', 'A3', 15000)
GO
DECLARE @Percentile FLOAT
SELECT @Percentile = .95;
WITH Emp_Sal(Salary, Prev_Rank, Curr_Rank, Next_Rank) AS
(
SELECT Salary,
(ROW_NUMBER() OVER ( ORDER BY GroupName, Salary ) - 2.0) / ((SELECT COUNT(*) FROM Employee) - 1) [Prev_Rank],
(ROW_NUMBER() OVER ( ORDER BY GroupName, Salary ) - 1.0) / ((SELECT COUNT(*) FROM Employee) - 1) [Curr_Rank],
(ROW_NUMBER() OVER ( ORDER BY GroupName, Salary ) + 0.0) / ((SELECT COUNT(*) FROM Employee) - 1) [Next_Rank]
FROM Employee
)
SELECT
CASE
WHEN T1.Salary = T2.Salary THEN T1.Salary
ELSE T1.Salary + (T2.Salary - T1.Salary) * ((@Percentile - T1.Curr_Rank) / (T2.Curr_Rank - T1.Curr_Rank))
END
FROM Emp_Sal T1, Emp_Sal T2
WHERE (T1.Curr_Rank = @Percentile OR (T1.Curr_Rank < @Percentile AND T1.Next_Rank > @Percentile))
AND (T2.Curr_Rank = @Percentile OR (T2.Curr_Rank > @Percentile AND T2.Prev_Rank < @Percentile))
Calculating Percentiles with Grouping
The query in the previous section evaluates only the overall percentile. If you want to calculate the 95th percentile for each Group than use the below query: It contains two CTEs:
DECLARE @Percentile FLOAT
SELECT @Percentile = .95;
WITH Group_Row_Count(GroupName, Row_Count) AS
(
SELECT GroupName, COUNT(*)
FROM Employee
GROUP BY GroupName
HAVING COUNT(*) > 1
),
Emp_Sal(GroupName, Salary, Prev_Rank, Curr_Rank, Next_Rank) AS
(
SELECT E.GroupName, E.Salary,
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) - 2.0) / (C.Row_Count - 1),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) - 1.0) / (C.Row_Count - 1),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) + 0.0) / (C.Row_Count - 1)
FROM Employee E
JOIN Group_Row_Count C ON C.GroupName = E.GroupName
)
SELECT T1.GroupName,
CASE
WHEN T1.Salary = T2.Salary THEN T1.Salary
ELSE T1.Salary + (T2.Salary - T1.Salary) * ((@Percentile - T1.Curr_Rank) / (T2.Curr_Rank - T1.Curr_Rank))
END
FROM Emp_Sal T1
JOIN Emp_Sal T2 ON T1.GroupName = T2.GroupName
WHERE (T1.Curr_Rank = @Percentile OR (T1.Curr_Rank < @Percentile AND T1.Next_Rank > @Percentile))
AND (T2.Curr_Rank = @Percentile OR (T2.Curr_Rank > @Percentile AND T2.Prev_Rank < @Percentile))
Go
Result: