' ***********************************************************************
' Author : Original: http://filetypedetective.codeplex.com/
' Source translated, revised and extended by Elektro.
'
' Modified : 03-06-2014
' ***********************************************************************
' <copyright file="FileTypeDetective.vb" company="Elektro Studios">
' Copyright (c) Elektro Studios. All rights reserved.
' </copyright>
' ***********************************************************************
#Region " Info "
' file headers are taken from here:
'http://www.garykessler.net/library/file_sigs.html
' mime types are taken from here:
' http://www.webmaster-toolkit.com/mime-types.shtml
#End Region
#Region " Usage Examples "
'Imports FileTypeDetective
'Public Class Form1
' Private Sub Test() Handles MyBase.Load
' MessageBox.Show(Detective.isType("C:\File.reg", FileType.REG)) ' NOTE: The regfile should be Unicode, not ANSI.
' MessageBox.Show(Detective.GetFileType("C:\File.reg").mime)
' End Sub
'End Class
#End Region
#Region " Imports "
Imports System.IO
Imports FileTypeDetective.FileType
#End Region
#Region " FileType Detective "
''' <summary>
''' Little data structure to hold information about file types.
''' Holds information about binary header at the start of the file
''' </summary>
Public Class FileType
' MS Office files
Public Shared ReadOnly WORD As New FileType(
New Nullable(Of Byte)() {&HEC, &HA5, &HC1, &H0}, 512I, "doc", "application/msword")
Public Shared ReadOnly EXCEL As New FileType(
New Nullable(Of Byte)() {&H9, &H8, &H10, &H0, &H0, &H6, &H5, &H0}, 512I, "xls", "application/excel")
Public Shared ReadOnly PPT As New FileType(
New Nullable(Of Byte)() {&HFD, &HFF, &HFF, &HFF, Nothing, &H0, &H0, &H0}, 512I, "ppt", "application/mspowerpoint")
' common documents
Public Shared ReadOnly RTF As New FileType(
New Nullable(Of Byte)() {&H7B, &H5C, &H72, &H74, &H66, &H31}, "rtf", "application/rtf")
Public Shared ReadOnly PDF As New FileType(
New Nullable(Of Byte)() {&H25, &H50, &H44, &H46}, "pdf", "application/pdf")
Public Shared ReadOnly REG As New FileType(
New Nullable(Of Byte)() {&HFF, &HFE}, "reg", "text/plain")
' grafics
Public Shared ReadOnly JPEG As New FileType(
New Nullable(Of Byte)() {&HFF, &HD8, &HFF}, "jpg", "image/jpeg")
Public Shared ReadOnly PNG As New FileType(
New Nullable(Of Byte)() {&H89, &H50, &H4E, &H47, &HD, &HA, &H1A, &HA}, "png", "image/png")
Public Shared ReadOnly GIF As New FileType(
New Nullable(Of Byte)() {&H47, &H49, &H46, &H38, Nothing, &H61}, "gif", "image/gif")
' Compressed
Public Shared ReadOnly ZIP As New FileType(
New Nullable(Of Byte)() {&H50, &H4B, &H3, &H4}, "zip", "application/x-compressed")
Public Shared ReadOnly RAR As New FileType(
New Nullable(Of Byte)() {&H52, &H61, &H72, &H21}, "rar", "application/x-compressed")
' all the file types to be put into one list
Friend Shared ReadOnly types As New List(Of FileType)() From { _
PDF,
WORD,
EXCEL,
JPEG,
ZIP,
RAR,
RTF,
PNG,
PPT,
GIF,
REG
}
' number of bytes we read from a file
Friend Const MaxHeaderSize As Integer = 560
' some file formats have headers offset to 512 bytes
' most of the times we only need first 8 bytes, but sometimes extend for 16
Private m_header As Nullable(Of Byte)()
Public Property header() As Nullable(Of Byte)()
Get
Return m_header
End Get
Private Set(value As Nullable(Of Byte)())
m_header = value
End Set
End Property
Private m_headerOffset As Integer
Public Property headerOffset() As Integer
Get
Return m_headerOffset
End Get
Private Set(value As Integer)
m_headerOffset = value
End Set
End Property
Private m_extension As String
Public Property extension() As String
Get
Return m_extension
End Get
Private Set(value As String)
m_extension = value
End Set
End Property
Private m_mime As String
Public Property mime() As String
Get
Return m_mime
End Get
Private Set(value As String)
m_mime = value
End Set
End Property
#Region " Constructors "
''' <summary>
''' Initializes a new instance of the <see cref="FileType"/> class.
''' Default construction with the header offset being set to zero by default
''' </summary>
''' <param name="header">Byte array with header.</param>
''' <param name="extension">String with extension.</param>
''' <param name="mime">The description of MIME.</param>
Public Sub New(header As Nullable(Of Byte)(), extension As String, mime As String)
Me.header = header
Me.extension = extension
Me.mime = mime
Me.headerOffset = 0
End Sub
''' <summary>
''' Initializes a new instance of the <see cref="FileType"/> struct.
''' Takes the details of offset for the header
''' </summary>
''' <param name="header">Byte array with header.</param>
''' <param name="offset">The header offset - how far into the file we need to read the header</param>
''' <param name="extension">String with extension.</param>
''' <param name="mime">The description of MIME.</param>
Public Sub New(header As Nullable(Of Byte)(), offset As Integer, extension As String, mime As String)
Me.header = Nothing
Me.header = header
Me.headerOffset = offset
Me.extension = extension
Me.mime = mime
End Sub
#End Region
Public Overrides Function Equals(other As Object) As Boolean
If Not MyBase.Equals(other) Then
Return False
End If
If Not (TypeOf other Is FileType) Then
Return False
End If
Dim otherType As FileType = DirectCast(other, FileType)
If Not Me.header Is otherType.header Then
Return False
End If
If Me.headerOffset <> otherType.headerOffset Then
Return False
End If
If Me.extension <> otherType.extension Then
Return False
End If
If Me.mime <> otherType.mime Then
Return False
End If
Return True
End Function
Public Overrides Function ToString() As String
Return extension
End Function
End Class
''' <summary>
''' Helper class to identify file type by the file header, not file extension.
''' </summary>
Public NotInheritable Class FileTypeDetective
''' <summary>
''' Prevents a default instance of the <see cref="FileTypeDetective"/> class from being created.
''' </summary>
Private Sub New()
End Sub
#Region "Main Methods"
''' <summary>
''' Gets the list of FileTypes based on list of extensions in Comma-Separated-Values string
''' </summary>
''' <param name="CSV">The CSV String with extensions</param>
''' <returns>List of FileTypes</returns>
Private Shared Function GetFileTypesByExtensions(CSV As String) As List(Of FileType)
Dim extensions As [String]() = CSV.ToUpper().Replace(" ", "").Split(","c)
Dim result As New List(Of FileType)()
For Each type As FileType In types
If extensions.Contains(type.extension.ToUpper()) Then
result.Add(type)
End If
Next
Return result
End Function
''' <summary>
''' Reads the file header - first (16) bytes from the file
''' </summary>
''' <param name="file">The file to work with</param>
''' <returns>Array of bytes</returns>
Private Shared Function ReadFileHeader
(file As FileInfo, MaxHeaderSize
As Integer) As [Byte]() Dim header As [Byte]() = New Byte(MaxHeaderSize - 1) {}
Try
' read file
Using fsSource
As New FileStream
(file.
FullName, FileMode.
Open, FileAccess.
Read) ' read first symbols from file into array of bytes.
fsSource.Read(header, 0, MaxHeaderSize)
' close the file stream
End Using
Catch e As Exception
' file could not be found/read
Throw New ApplicationException("Could not read file : " & e.Message)
End Try
Return header
End Function
''' <summary>
''' Read header of a file and depending on the information in the header
''' return object FileType.
''' Return null in case when the file type is not identified.
''' Throws Application exception if the file can not be read or does not exist
''' </summary>
''' <param name="file">The FileInfo object.</param>
''' <returns>FileType or null not identified</returns>
Public Shared Function GetFileType
(file As FileInfo
) As FileType
' read first n-bytes from the file
Dim fileHeader
As [Byte]() = ReadFileHeader
(file, MaxHeaderSize
)
' compare the file header to the stored file headers
For Each type As FileType In types
Dim matchingCount As Integer = 0
For i As Integer = 0 To type.header.Length - 1
' if file offset is not set to zero, we need to take this into account when comparing.
' if byte in type.header is set to null, means this byte is variable, ignore it
If type.header(i) IsNot Nothing AndAlso type.header(i) <> fileHeader(i + type.headerOffset) Then
' if one of the bytes does not match, move on to the next type
matchingCount = 0
Exit For
Else
matchingCount += 1
End If
Next
If matchingCount = type.header.Length Then
' if all the bytes match, return the type
Return type
End If
Next
' if none of the types match, return null
Return Nothing
End Function
''' <summary>
''' Read header of a file and depending on the information in the header
''' return object FileType.
''' Return null in case when the file type is not identified.
''' Throws Application exception if the file can not be read or does not exist
''' </summary>
''' <param name="file">The FileInfo object.</param>
''' <returns>FileType or null not identified</returns>
Public Shared Function GetFileType
(file As String) As FileType
Return GetFileType
(New FileInfo
(file)) End Function
''' <summary>
''' Determines whether provided file belongs to one of the provided list of files
''' </summary>
''' <param name="file">The file.</param>
''' <param name="requiredTypes">The required types.</param>
''' <returns>
''' <c>true</c> if file of the one of the provided types; otherwise, <c>false</c>.
''' </returns>
Public Shared Function isFileOfTypes
(file As FileInfo, requiredTypes
As List
(Of FileType
)) As Boolean
Dim currentType
As FileType
= GetFileType
(file)
If currentType Is Nothing Then
Return False
End If
Return requiredTypes.Contains(currentType)
End Function
''' <summary>
''' Determines whether provided file belongs to one of the provided list of files,
''' where list of files provided by string with Comma-Separated-Values of extensions
''' </summary>
''' <param name="file">The file.</param>
''' <returns>
''' <c>true</c> if file of the one of the provided types; otherwise, <c>false</c>.
''' </returns>
Public Shared Function isFileOfTypes
(file As FileInfo, CSV
As String) As Boolean
Dim providedTypes As List(Of FileType) = GetFileTypesByExtensions(CSV)
Return isFileOfTypes
(file, providedTypes
)
End Function
#End Region
#Region "isType functions"
''' <summary>
''' Determines whether the specified file is of provided type
''' </summary>
''' <param name="file">The file.</param>
''' <param name="type">The FileType</param>
''' <returns>
''' <c>true</c> if the specified file is type; otherwise, <c>false</c>.
''' </returns>
Public Shared Function isType
(file As FileInfo, type
As FileType
) As Boolean
Dim actualType
As FileType
= GetFileType
(file)
If actualType Is Nothing Then
Return False
End If
Return (actualType.Equals(type))
End Function
''' <summary>
''' Determines whether the specified file is of provided type
''' </summary>
''' <param name="file">The file.</param>
''' <param name="type">The FileType</param>
''' <returns>
''' <c>true</c> if the specified file is type; otherwise, <c>false</c>.
''' </returns>
Public Shared Function isType
(file As String, type
As FileType
) As Boolean
Return isType
(New FileInfo
(file), type
)
End Function
#End Region
End Class
#End Region