python 根据文件头部信息判断文件类型

Posted on Posted in python
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8

"""
根据文件头部信息判断文件类型
"""

import struct

def typeList():
    '''各种文件头部码表自己试试,或到往上搜搜'''
    return {
        "89504E47":"png",
        "FFD8FF":"jpg/jpeg"
    }
    
def bytes2hex(bytes):
    '''字节码转 16 进制'''
    num = len(bytes)
    hexstr = u""
    for i in range(num):
        t = u"%x" % bytes[i]
        if len(t) % 2:
            hexstr += u"0"
        hexstr += t
    return hexstr.upper()
    
def filetype(filename):
    '''获取文件类型'''
    binfile = open(filename,'rb') # 二进制读取
    tl = typeList()
    ftype = 'unkown'
    for hcode in tl.keys():
        num0fBytes = int(len(hcode) / 2) # 需要读多少字节
        binfile.seek(0) # 每次读取都要回到文件头,不然会一直读取
        hbytes = struct.unpack_from("B" * num0fBytes, binfile.read(num0fBytes)) # 一个B表示一个字节
        f_hcode = bytes2hex(hbytes)
        if f_hcode == hcode:
            ftype = tl[hcode]
            break
    binfile.close()
    return ftype

if __name__ == '__main__':
    print(filetype("123.png"))