Calculating Percentiles with SQL Server CTE
Here percentiles calculations are based on this fictitious table of employee salaries:
CREATE TABLE Employee
(EmployeeID INT IDENTITY(1,1) PRIMARY KEY,
GroupName VARCHAR(30),
Emp_Name VARCHAR(80),
Salary FLOAT)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P1', 12000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P2', 13500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P3', 13500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('PRODUCTION', 'P4', 11500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S1', 12500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S2', 15500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S3', 14500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S4', 12500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S5', 11000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('SALES', 'S6', 14000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M1', 11500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M2', 11000)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M3', 16500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M4', 15500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M5', 12250)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('MARKETING', 'M6', 11500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('ACCOUNTING', 'A1', 10500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('ACCOUNTING', 'A2', 13500)
INSERT INTO Employee(GroupName, Emp_Name, Salary) values('ACCOUNTING', 'A3', 15000)
GO
DECLARE @Percentile FLOAT
SELECT @Percentile = .95;
WITH Emp_Sal(Salary, Prev_Rank, Curr_Rank, Next_Rank) AS
(
SELECT Salary,
(ROW_NUMBER() OVER ( ORDER BY GroupName, Salary ) - 2.0) / ((SELECT COUNT(*) FROM Employee) - 1) [Prev_Rank],
(ROW_NUMBER() OVER ( ORDER BY GroupName, Salary ) - 1.0) / ((SELECT COUNT(*) FROM Employee) - 1) [Curr_Rank],
(ROW_NUMBER() OVER ( ORDER BY GroupName, Salary ) + 0.0) / ((SELECT COUNT(*) FROM Employee) - 1) [Next_Rank]
FROM Employee
)
SELECT
CASE
WHEN T1.Salary = T2.Salary THEN T1.Salary
ELSE T1.Salary + (T2.Salary - T1.Salary) * ((@Percentile - T1.Curr_Rank) / (T2.Curr_Rank - T1.Curr_Rank))
END
FROM Emp_Sal T1, Emp_Sal T2
WHERE (T1.Curr_Rank = @Percentile OR (T1.Curr_Rank < @Percentile AND T1.Next_Rank > @Percentile))
AND (T2.Curr_Rank = @Percentile OR (T2.Curr_Rank > @Percentile AND T2.Prev_Rank < @Percentile))
Calculating Percentiles with Grouping
The query in the previous section evaluates only the overall percentile. If you want to calculate the 95th percentile for each Group than use the below query: It contains two CTEs:
DECLARE @Percentile FLOAT
SELECT @Percentile = .95;
WITH Group_Row_Count(GroupName, Row_Count) AS
(
SELECT GroupName, COUNT(*)
FROM Employee
GROUP BY GroupName
HAVING COUNT(*) > 1
),
Emp_Sal(GroupName, Salary, Prev_Rank, Curr_Rank, Next_Rank) AS
(
SELECT E.GroupName, E.Salary,
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) - 2.0) / (C.Row_Count - 1),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) - 1.0) / (C.Row_Count - 1),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) + 0.0) / (C.Row_Count - 1)
FROM Employee E
JOIN Group_Row_Count C ON C.GroupName = E.GroupName
)
SELECT T1.GroupName,
CASE
WHEN T1.Salary = T2.Salary THEN T1.Salary
ELSE T1.Salary + (T2.Salary - T1.Salary) * ((@Percentile - T1.Curr_Rank) / (T2.Curr_Rank - T1.Curr_Rank))
END
FROM Emp_Sal T1
JOIN Emp_Sal T2 ON T1.GroupName = T2.GroupName
WHERE (T1.Curr_Rank = @Percentile OR (T1.Curr_Rank < @Percentile AND T1.Next_Rank > @Percentile))
AND (T2.Curr_Rank = @Percentile OR (T2.Curr_Rank > @Percentile AND T2.Prev_Rank < @Percentile))
Go
Result:
Hi Arun,
ReplyDeleteThanks a million, you are a star, code works beautifully, many thanks again,I think I found my SQL Guru!!!
Regards
Matrika
Hi Matrika,
ReplyDeleteThanks a lot
Hello Dayalan,
ReplyDeleteCheck below link for the solution
http://sqlanddotnetdevelopment.blogspot.in/2012/02/how-to-generate-quartile-ranking.html
Thanks
Arun Ladha
HI Arun,
ReplyDeletei have one doubt ,
could you please help me.
question : In my table Personame is Rajesh and his age 50.
could you please help me, how to calculate the single record percentile values?
thanks in advance.
Regards,
vinay
Hi Arun,
ReplyDeleteThanks for your great article,it was really helped me.
But i have one doubt.
In the below "Employee" Table
EmpID GroupName Emp_Name Salary
1 PRODUCTION P1 12000
2 PRODUCTION P2 13500
3 PRODUCTION P3 13500
4 PRODUCTION P4 11500
5 SALES S1 12500
11 MARKETING M1 11500
12 MARKETING M2 11000
13 MARKETING M3 16500
14 MARKETING M4 15500
15 MARKETING M5 12250
16 MARKETING M6 11500
17 ACCOUNTING A1 10500
18 ACCOUNTING A2 13500
19 ACCOUNTING A3 15000
i am unable to find out "Sales" percentile ?
Could you please help me?
Thanks in advance for your help.
Regards,
vinay.
Hi Vinay,
DeleteCheck below query for solution:
DECLARE @Percentile FLOAT
SELECT @Percentile = .95;
WITH Group_Row_Count(GroupName, Row_Count) AS
(
SELECT GroupName, COUNT(*)
FROM Employees
GROUP BY GroupName
HAVING COUNT(*) >= 1
),
Emp_Sal(GroupName, Salary, Prev_Rank, Curr_Rank, Next_Rank) AS
(
SELECT E.GroupName, E.Salary,
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) - 2.0) / (C.Row_Count - (CASE WHEN C.Row_Count > 1 THEN 1 ELSE 0 END)),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) - 1.0) / (C.Row_Count - (CASE WHEN C.Row_Count > 1 THEN 1 ELSE 0 END)),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName ORDER BY E.Salary) + 0.0) / (C.Row_Count - (CASE WHEN C.Row_Count > 1 THEN 1 ELSE 0 END))
FROM Employees E
JOIN Group_Row_Count C ON C.GroupName = E.GroupName
)
SELECT T1.GroupName,
CASE
WHEN T1.Salary = T2.Salary THEN T1.Salary
ELSE T1.Salary + (T2.Salary - T1.Salary) * ((@Percentile - T1.Curr_Rank) / (T2.Curr_Rank - T1.Curr_Rank))
END
FROM Emp_Sal T1
JOIN Emp_Sal T2 ON T1.GroupName = T2.GroupName
WHERE ((T1.Curr_Rank = @Percentile OR (T1.Curr_Rank < @Percentile AND T1.Next_Rank > @Percentile))
AND (T2.Curr_Rank = @Percentile OR (T2.Curr_Rank > @Percentile AND T2.Prev_Rank < @Percentile)))
OR (T1.Curr_Rank = 0 AND T1.Next_Rank = 1 AND T1.Prev_Rank = -1)
Go
Thanks Arun,
DeleteThis query has helpmed me a lot.
thanks a lot for your query.
Regards,
vinay
Hi Arun,
ReplyDeletecan we calculate the percentile values by using the temp tables inseted of by using
CTE?
could you please suggest me with some example.
thanks in advance..
Hi Arun,
ReplyDeletei have tried with example which you posted earlier,
but i am getting two percentile values for below table.
Name CSO_Age
OT 73
OT 3
could you suggest me how can calculate these two percenterle values?
Thanks in advance!!
Regards,
vinay
Hi Sir,
ReplyDeleteThanks for the excellent solutions.
Regards,
Hiren
Is this possible with TWO by groups? Would it require another CTE?
ReplyDeleteYes it is possible with two Group. You had to Group by two columns. Check below Example:
DeleteAdded one more column in table.
DECLARE @Percentile FLOAT
SELECT @Percentile = .95;
WITH Group_Row_Count(GroupName, GroupName1, Row_Count) AS
(
SELECT GroupName, GroupName1, COUNT(*)
FROM Employee
GROUP BY GroupName, GroupName1
HAVING COUNT(*) > 1
),
Emp_Sal(GroupName, GroupName1, Salary, Prev_Rank, Curr_Rank, Next_Rank) AS
(
SELECT E.GroupName, E.GroupName1, E.Salary,
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName, E.GroupName1 ORDER BY E.Salary) - 2.0) / (C.Row_Count - 1),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName, E.GroupName1 ORDER BY E.Salary) - 1.0) / (C.Row_Count - 1),
(ROW_NUMBER() OVER ( PARTITION BY E.GroupName, E.GroupName1 ORDER BY E.Salary) + 0.0) / (C.Row_Count - 1)
FROM Employee E
JOIN Group_Row_Count C ON C.GroupName = E.GroupName AND C.GroupName1 = E.GroupName1
)
SELECT T1.GroupName, T1.GroupName1,
CASE
WHEN T1.Salary = T2.Salary THEN T1.Salary
ELSE T1.Salary + (T2.Salary - T1.Salary) * ((@Percentile - T1.Curr_Rank) / (T2.Curr_Rank - T1.Curr_Rank))
END
FROM Emp_Sal T1
JOIN Emp_Sal T2 ON T1.GroupName = T2.GroupName AND T1.GroupName1 = T2.GroupName1
WHERE (T1.Curr_Rank = @Percentile OR (T1.Curr_Rank < @Percentile AND T1.Next_Rank > @Percentile))
AND (T2.Curr_Rank = @Percentile OR (T2.Curr_Rank > @Percentile AND T2.Prev_Rank < @Percentile))
Go
Fantastic, this gets the results i am after.. How can this be setup in a view to report from ?
ReplyDeleteAttacj the resultset into Report Grid.
Delete