21  SQL in SAS: Example

SQL Commands & Clauses

21.1 Data: Tables

21.2 Connection using SAS PROC SQL

Note
  • All examples in this chapter are presented using PROC SQL

  • For running the SQL statements in SAS, see the previous chapter.

  • In principle, just copy and paste the SQL statement in the PROC SQL command.

  • Note: No quote marks are required to enclose the SQL statement, but the semicolon at the end of the SQL statement is required. This is SAS-specific.

  • The SQL commands and clauses are not case sensitive; in general, they are written in upper case.

# Give your own path

libname SASData 'Path\To\Folder\data';

proc sql;
    SELECT *
    FROM tblStudent
;
quit;
%put NUMBER_RECORDS = &sqlobs;

21.3 SELECT

Example 1
  • Select ALL columns from tblStudent
Code
SELECT * FROM tblStudent
Example 2
  • Select SName, Gender, Age columns from tblStudent
Code
SELECT SName, Gender, Age FROM tblStudent
Example 3
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
Code
SELECT SName AS StudentName, Gender, Age FROM tblStudent
Example 4
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Retrieve the first FIVE rows
Code
SELECT SName AS StudentName, Gender, Age FROM tblStudent LIMIT 5

21.4 WHERE

Example 1
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where Age > 16
Code

SELECT SName AS StudentName, Gender, Age
FROM tblStudent
WHERE Age > 16
Example 2
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where Age > 16 AND Attendance greater than 85%
Code

SELECT SName AS StudentName, Gender, Age
FROM tblStudent
WHERE Age > 16 AND Attendance > 85
Example 3
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where Age > 16 AND Attendance BETWEEN 85% and 95%
Code

SELECT SName AS StudentName, Gender, Age
FROM tblStudent
WHERE Age > 16 AND Attendance BETWEEN 85 AND 90
Example 4
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where Age values include 15 and 16 years
Code

SELECT SName AS StudentName, Gender, Age
FROM tblStudent
WHERE Age IN (15, 16)
Example 5
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where StudentName starts with Al
Code

SELECT SName AS StudentName, Gender, Age
FROM tblStudent
WHERE StudentName LIKE 'Al%'
Example 6
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where StudentName ends with an e
Code

SELECT SName AS StudentName, Gender, Age
FROM tblStudent
WHERE StudentName LIKE '%e'
Example 7
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where Attendance is greater than any attendance from ClassID 1
Code

SELECT SName AS StudentName, Gender, Age, Attendance
FROM tblStudent
WHERE Attendance > ANY (SELECT Attendance FROM tblStudent WHERE ClassID=1)

21.5 ORDER BY

Example 1
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where Age is greater than 16 years and Attendance is greater than 80%
  • Order records in ascending order
Code

SELECT SName AS StudentName, Gender, Age, Attendance
FROM tblStudent
WHERE Age > 16 AND Attendance > 80
ORDER BY Attendance ASC
Example 2
  • Select SName, Gender, Age columns from tblStudent
  • Rename SName as StudentName
  • Match records where Age is greater than 16 years and Attendance is greater than 80%
  • Retrieve first two highest attendance records
Code

SELECT SName AS StudentName, Gender, Age, Attendance
FROM tblStudent
WHERE Age > 16 AND Attendance > 80
ORDER BY Attendance DESC
LIMIT 2

21.6 SELECT with Aggregate functions

Example 1
  • From tblStudent, find all unique ClassID
Code

SELECT DISTINCT ClassID
FROM tblStudent
Example 2
  • From tblStudent, find total number of unique ClassID
Code

SELECT COUNT(DISTINCT ClassID)
FROM tblStudent
Example 3
  • From tblStudent, find all unique Age values in the data
Code

SELECT DISTINCT Age
FROM tblStudent
Example 4
  • From tblStudent, find total number of unique Age values in the data
Code

SELECT COUNT(DISTINCT Age)
FROM tblStudent
Example 5
  • From tblStudent, find total number of students with Attendance greater than 85%
Code

SELECT COUNT(StudentID) AS N
FROM tblStudent
WHERE Attendance > 85
Example 6
  • From tblStudent, find total number of students where Attendance between 85% and 95%
Code

SELECT COUNT(StudentID) AS N
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
Example 7
  • From tblStudent, find total number of students and their average Age where Attendance between 85% and 95%
Code

SELECT COUNT(StudentID) AS N, AVG(Age) as AvgAge
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
Example 8
  • From tblStudent, find minimum, maximum, sum, average of Age and total number of students
Code

SELECT MIN(Age) AS Min, MAX(Age) AS Max, 
       SUM(Age) AS Sum, AVG(Age) AS Avg, COUNT(Age) AS N
FROM tblStudent 

21.7 GROUP BY

Example 1
  • From tblStudent, find total number of students in each class
Code

SELECT ClassID, COUNT(StudentID) AS N
FROM tblStudent
GROUP BY ClassID
Example 2
  • From tblStudent, find total number of students in each class where Attendance was between 85% and 95%
Code

SELECT ClassID, COUNT(StudentID) AS N
FROM tblStudent
WHERE Attendance BETWEEN 85 AND 95
GROUP BY ClassID
Example 3
  • From tblStudent, find total number of male and female students where Attendance between 85% and 95%
Code
SELECT Gender, COUNT(StudentID) AS N
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
Example 4
  • From tblStudent, find total number of male and female students by age where Attendance was between 85% and 95%

  • Get the records in ascending order by Gender and Age

Code

SELECT Gender, Age, COUNT(StudentID) AS N
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender, Age
ORDER BY Gender, Age ASC
Example 5
  • From tblStudent, find total number of male and female students and their average age where Attendance was between 85% and 95%

  • Get the records in ascending order by Gender

Code

SELECT Gender, COUNT(StudentID) AS N, AVG(Age) AS AvgAge
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
ORDER BY Gender ASC
Example 6
  • From tblStudent, find minimum, maximum, sum, average of Age and total number of students where Attendance was between 85% and 95%

  • Get the records in ascending order by Gender

Code

SELECT Gender,
       COUNT(StudentID) AS N,
       MIN(Age) AS Min, MAX(Age) AS Max, 
       SUM(Age) AS Sum, AVG(Age) AS Avg
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
ORDER BY Gender ASC

21.8 HAVING

Example 1
  • From tblStudent, find total number of male and female students where Attendance greater than 85%

  • Present records where the count is greater than 3

Code

SELECT Gender, COUNT(StudentID) AS N
FROM tblStudent
WHERE Attendance > 85
GROUP BY Gender

SELECT Gender, COUNT(StudentID) AS N
FROM tblStudent
WHERE Attendance > 85
GROUP BY Gender
HAVING N > 3
Example 2
  • From tblStudent, find total number of male and female students where Attendance between 85% and 95%

  • Present records where the average age is greater than 16 years

Code
SELECT Gender, COUNT(ClassID) AS N, AVG(Age) as AvgAge
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender

SELECT Gender, COUNT(ClassID) AS N, AVG(Age) as AvgAge
FROM tblStudent 
WHERE Attendance BETWEEN 85 AND 95
GROUP BY Gender
HAVING AvgAge > 16

21.9 JOIN

STEPS
  • Read all tables as data.frame

  • Register the connection with duckdb

  • Remember to provide separate table names for each table while registering it

  • The following script assumes that all data are in your current working directory

Code
tblClass = read.table(file='tblClass.txt', sep='\t', header=TRUE)
tblTeacher = read.table(file='tblTeacher.txt', sep='\t', header=TRUE)
tblStudent = read.table(file='tblStudent.txt', sep='\t', header=TRUE)
tblMarks = read.table(file='tblMarks.txt', sep='\t', header=TRUE)

conn = dbConnect(duckdb())

duckdb_register(conn = conn, name = 'tblClass', df = tblClass)
duckdb_register(conn = conn, name = 'tblTeacher', df = tblTeacher)
duckdb_register(conn = conn, name = 'tblStudent', df = tblStudent)
duckdb_register(conn = conn, name = 'tblMarks', df = tblMarks)

21.9.1 LEFT JOIN

LEFT JOIN
  • LEFT JOIN: Everything on the LEFT AND those matches on the RIGHT

LEFT JOIN

Code

SELECT * 
FROM tblClass
LEFT JOIN tblTeacher 
ON tblClass.TeacherID = tblTeacher.TeacherID


SELECT * 
FROM tblClass AS C
LEFT JOIN tblTeacher AS T
ON C.TeacherID = T.TeacherID


SELECT tblClass.ClassID, tblClass.Subject, tblClass.TeacherID 
FROM tblClass
LEFT JOIN tblTeacher AS T
ON tblClass.TeacherID = T.TeacherID


SELECT C.ClassID, C.Subject, C.TeacherID 
FROM tblClass AS C
LEFT JOIN tblTeacher AS T
ON C.TeacherID = T.TeacherID


SELECT C.ClassID, C.Subject, C.TeacherID, 
       T.TeacherID, T.TName, T.Experience
FROM tblClass AS C
LEFT JOIN tblTeacher AS T
ON C.TeacherID = T.TeacherID


SELECT T.TeacherID, T.TName, T.Experience,
       C.ClassID, C.Subject, C.TeacherID
FROM tblTeacher AS T
LEFT JOIN tblClass AS C 
ON T.TeacherID = C.TeacherID

21.9.2 Anti LEFT JOIN

Anti LEFT JOIN
  • Anti LEFT JOIN or LEFT JOIN Exclusive

  • Everything on the LEFT that is NOT on the RIGHT

LEFT JOIN Exclusive

Code

SELECT T.TeacherID, T.TName, T.Experience,
       C.ClassID, C.Subject, C.TeacherID
FROM tblTeacher AS T
LEFT JOIN tblClass AS C 
ON T.TeacherID = C.TeacherID
WHERE C.TeacherID IS NULL

21.9.3 RIGHT JOIN

RIGHT JOIN
  • RIGHT JOIN: Everything on the RIGHT AND those matches on the LEFT

RIGHT JOIN

Code

SELECT C.ClassID, C.Subject,
       S.StudentID, S.SName, S.ClassID
FROM tblClass AS C 
RIGHT JOIN tblStudent AS S
ON C.ClassID = S.ClassID
ORDER BY S.StudentID

21.9.4 Anti RIGHT JOIN

Anti RIGHT JOIN
  • Anti RIGHT JOIN or RIGHT JOIN Exclusive

  • Everything on the RIGHT that is NOT on the LEFT

RIGHT JOIN Exclusive

Code

SELECT C.ClassID, C.Subject,
       S.StudentID, S.SName, S.ClassID
FROM tblClass AS C 
RIGHT JOIN tblStudent AS S
ON C.ClassID = S.ClassID
WHERE C.ClassID IS NULL
ORDER BY S.StudentID

21.9.5 INNER JOIN

INNER JOIN
  • INNER JOIN: Only those matches on the LEFT AND the RIGHT

INNER JOIN

Code

SELECT S.StudentID, S.SName, S.ClassID,
       M.StudentID, M.Marks
FROM tblStudent AS S
INNER JOIN tblMarks AS M
ON S.StudentID = M.StudentID
ORDER BY S.StudentID

21.9.6 FULL JOIN

FULL JOIN
  • FULL JOIN or OUTER JOIN: Everything on the LEFT AND everything on the RIGHT

FULL JOIN

Code

SELECT S.StudentID, S.SName, S.ClassID,
       M.StudentID, M.Marks
FROM tblStudent AS S
FULL JOIN tblMarks AS M
ON S.StudentID = M.StudentID
ORDER BY S.StudentID