Python:PluginReader

From Devicenull's Code

Jump to: navigation, search

This code will parse various information from a compiled Sourcemod plugin. Right now it can pull a list of all the strings, natives used, any public functions/variables, and a list of tags used (the last is rather useless). It can handle compressed plugins, and hasn't been well tested at all.

Requires the python-bitstring module: http://code.google.com/p/python-bitstring/

class FileReader:
        def __init__(self,filename):
                self.file = open(filename,"rb")
                data = self.file.read(24)
                hdr = BitString(data=data)
                hdr = hdr.read("uintle:32","uintle:16","uintle:8","uintle:32","uintle:32","uintle:8","uintle:32","uintle:32")
                if hdr[0] != 1397769798:
                    print 'Invalid file format'
                self.hdr = { 'magic':hdr[0], 'version':hdr[1], 'compression':hdr[2]
                            ,'disksize':hdr[3], 'imagesize':hdr[4], 'sections':hdr[5]
                            ,'stringtab':hdr[6], 'dataoffs':int(hdr[7]) }
                info("Version %x Compression %i" % (hdr[1],hdr[2]))
                if hdr[2] == 1:
                    self.compress = True
                    self.contents = "%s%s" % (data,self.readCompressedData())
                else:
                    self.compress = False
                    self.contents = self.file.read(self.hdr['imagesize'])
 
        def readCompressedData(self):
                #debug("sectsize %i" % (self.hdr['dataoffs']-24))
                hdr_data = self.file.read(self.hdr['dataoffs']-24)
                hdr = BitString(data=hdr_data).read("uintle:32","uintle:32","uintle:32")
                #debug("nameoffs %i dataoffs %i size %i" % (hdr[0],hdr[1],hdr[2]))
                totalsize = self.hdr['disksize']-self.hdr['dataoffs']
                info("total size %i" % totalsize)
                data = zlib.decompress(self.file.read(totalsize))
                info("uncompressed size %i" % len(data))
                return "%s%s" % (hdr_data,data)
 
        def readString(self,offset):
            done = False
            ret = ""
            while not done:
                if self.contents[offset] != "\x00":
                    ret = "%s%s" % (ret,self.contents[offset])
                else:
                        done = True
                offset += 1
                if offset > len(self.contents):
                    done = True
            return ret
 
        def readDataStr(self,offset):
            done = False
            ret = ""
            while not done:
                if self.data['data'][offset] != "\x00":
                    ret = "%s%s" % (ret,self.data['data'][offset])
                else:
                        done = True
                offset += 1
                if offset > len(self.data['data']):
                    done = True
            return ret
 
        def readInt32(self,offset):
            return BitString(data=self.contents[offset:offset+8]).read("uintle:32")
 
        def readPlugin(self):
            self.readSectionList()
            dbg_sym = ""
            section_handlers = { ".publics":self.readPublicsSection
                                ,".pubvars":self.readPubvarsSection
                                ,".natives":self.readNativesSection
                                ,".code":self.readCodeSection
                                ,".data":self.readDataSection
                                ,".tags":self.readTagsSection
                                ,".names":self.readNamesSection
                                # defer until after dbg.info has been run
                                #,".dbg.symbols":self.readDbgSymSection
                                ,".dbg.info":self.readDbgInfoSection
                                 }
            for cur in self.sections:
                if section_handlers.has_key(cur['name']):
                    section_handlers[cur['name']](cur)
                else:
                    warn("Unhandled section %s" % cur['name'])
 
        def readSectionList(self):
            self.sections = []
            for i in range(0,self.hdr['sections']):
                temp = BitString(data=self.contents[24+i*12:]).read("uintle:32","uintle:32","uintle:32")
                nameoffs = self.hdr['stringtab']+temp[0]
                name = self.readString(nameoffs)
                if name == ".names":
                    self.stringbase = temp[1]
                elif name == ".dbg.strings":
                    self.dbgstring = temp[1]
                debug("section %i name %s" % (i,name))
                self.sections.append({"num":i,"name":name,"dataoffs":temp[1]
                                      ,"nameoffs":temp[0]
                                      ,"size":temp[2]
                                      })
 
        def readPublicsSection(self,sect):
            self.publics = []
            num_publics = sect['size']/8
            debug("%i publics" % num_publics)
            for i in range(0,num_publics):
                temp = BitString(data=self.contents[sect['dataoffs']+i*8:]).read("uintle:32","uintle:32")
                curpub = {'address':temp[0]
                            ,'nameoffs':temp[1]
                            ,'index':i}
                curpub['name'] = self.readString(self.stringbase+curpub['nameoffs'])
                self.publics.append(curpub)
                #debug(curpub)
 
        def readPubvarsSection(self,sect):
            self.pubvars = []
            num_pubvars = sect['size']/8
            debug("%i pubvars" % num_pubvars)
            for i in range(0,num_pubvars):
                temp = BitString(data=self.contents[sect['dataoffs']+i*8:]).read("uintle:32","uintle:32")
                curpub = {'address':temp[0]
                            ,'nameoffs':temp[1]}
                curpub['name'] = self.readString(self.stringbase+curpub['nameoffs'])
                self.pubvars.append(curpub)
                #debug(curpub)
 
        def readNativesSection(self,sect):        
            self.natives = []
            num_natives = sect['size']/4
            debug("%i natives" % num_natives)
            for i in range(0,num_natives):
                curnat = {'nameoffs':self.readInt32(sect['dataoffs']+i*4), 'index':i}
                curnat['name'] = self.readString(self.stringbase+curnat['nameoffs'])
                self.natives.append(curnat)
                #debug(curnat)
 
        def readCodeSection(self,sect):
            temp = BitString(data=self.contents[sect['dataoffs']:]).read("uintle:32","uintle:8","uintle:8","uintle:16","uintle:32","uintle:32")
            self.code = { "codesize":temp[0], "cellsize":temp[1], "codeversion":temp[2]
                          , "flags":temp[3], "main":temp[4], "codeoffs":temp[5]}
            # the "main" option currently seems to be unused
            code_base = sect['dataoffs']+self.code['codeoffs']
            self.code['data'] = self.contents[code_base:code_base+self.code['codesize']]
            debug("%i bytes of code, version=%i, cellsize=%i" % (self.code['codesize'],self.code['codeversion'],self.code['cellsize']))
 
        def readDataSection(self,sect):
            temp = BitString(data=self.contents[sect['dataoffs']:]).read("uintle:32","uintle:32","uintle:32")
            self.data = { "datasize":temp[0], "memsize":temp[1], "dataoffs":temp[2] }
            # confusing, sect contains the offset to the section data start, self.data contains the offset
            # to the actual "data"
            data_base = sect['dataoffs']+self.data['dataoffs']
            self.data['data'] = self.contents[data_base:data_base+self.data['datasize']]
            debug("%i bytes of data" % (self.data['datasize']))
 
        def readTagsSection(self,sect):
            self.tags = []
            num_tags = sect['size']/8
            debug("%i tags" % num_tags)
            for i in range(0,num_tags):
                temp = BitString(data=self.contents[sect['dataoffs']+i*8:]).read("uintle:32","uintle:32")
                curtag = {'nameoffs':temp[1], 'tag_id':temp[0], 'index':i}
                curtag['name'] = self.readString(self.stringbase+curtag['nameoffs'])
                self.tags.append(curtag)
                #debug(curtag)
 
 
        def readNamesSection(self,sect):
            start = sect['dataoffs']
            self.names = self.contents[start:start+sect['size']].split("\x00")
            debug("%i names" % len(self.names))
            #for cur in self.names:
            #    debug(cur)
 
        def readDbgInfoSection(self,sect):
            temp = BitString(data=self.contents[sect['dataoffs']:]).read("uintle:32","uintle:32","uintle:32","uintle:32")
            self.dbginfo = { "files":temp[0], "lines":temp[1], "symbols":temp[2], "arrays":temp[3] }
            print self.dbginfo
            for cur in self.sections:
                if cur['name'] == ".dbg.symbols":
                    self.readDbgSymSection(cur)
 
        def readDbgSymSection(self,sect):
            # here be dragons.  for some absurd reason, gcc reports the size of the sp_u_fdbg_symbol_t struct
            # as 24, when it's really 22.  I've no idea why, but this code seems to work.  It also reports the
            # size of the sp_u_fdbg_arraydim_t struct as 8, when it's really 6
 
            # that combined with the fact this section has dynamically sized sections, make this code.. scary
            rd_string = "<ihIIbbhI"
            debug("%i debug symbols" % self.dbginfo['symbols'])
            self.dbg_symbols = []
            arr_string = "<hI"
            debug("array size %i",struct.calcsize(arr_string))
 
            current_sym = 0
            current_pos = 0
            while current_sym < self.dbginfo['symbols']:
                temp = struct.unpack_from(rd_string,self.contents[sect['dataoffs']+current_pos:])
                cursym = {'addr':temp[0],'tagid':temp[1],'codestart':temp[2],'codeend':temp[3]
                          ,'ident':temp[4],'vclass':temp[5],'dimcount':temp[6],'nameoffs':temp[7], 'index':current_sym}
                cursym['name'] = self.readString(self.dbgstring+cursym['nameoffs'])
 
                current_pos = current_pos+22
                if cursym['dimcount'] > 0:
                        cursym['diminfo'] = {}
                        for i in range(0,cursym['dimcount']):
                            temp = struct.unpack_from(arr_string,self.contents[sect['dataoffs']+current_pos:])
                            cursym['diminfo'][i] = { 'tagid':temp[0], 'size':temp[1] }
                            current_pos = current_pos+6
 
                self.dbg_symbols.append(cursym)
                #print cursym
 
                current_sym = current_sym +1
 
        def getStrings(self):
            ret = []
            [ret.append(i) for i in self.data['data'].split("\x00") if not ret.count(i)]
            return ret
 
fr = FileReader("rcon_lock.smx")
fr.readPlugin()
 
print fr.getStrings()