source: SHX/trunk/src/SeismicHandler/core/parser.py @ 233

Revision 233, 18.5 KB checked in by marcus, 13 years ago (diff)

Parser work. Translation of system variables (not all covered yet) and symbols work now.

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Rev Id Date
Line 
1# -*- coding: utf-8 -*-
2
3#    This file is part of Seismic Handler eXtended (SHX).
4#
5#    SHX is free software: you can redistribute it and/or modify
6#    it under the terms of the GNU Lesser General Public License as published
7#    by the Free Software Foundation, either version 3 of the License, or
8#    (at your option) any later version.
9#
10#    SHX is distributed in the hope that it will be useful,
11#    but WITHOUT ANY WARRANTY; without even the implied warranty of
12#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13#    GNU Lesser General Public License for more details.
14#
15#    You should have received a copy of the GNU Lesser General Public License
16#    along with SHX.  If not, see <http://www.gnu.org/licenses/>.
17
18"""
19These classes cover the processing of SH scripting language.
20
21- "script" processes a stream (open file, stringIO, ...) of commands
22- "parse" parses one command
23- "translate" manages variable substitution
24- "symbol" cares for local and global symbols
25
26It's intended to be compatible with the original C code SH. Please file a ticket
27at http://www.seismic-handler.org/ if there's something wrong or missing.
28"""
29
30import re
31import inspect
32from SeismicHandler.core import Settings, Logging
33import SeismicHandler.commands as commands
34
35class parse(object):
36    """
37    Parse SH scripting language.
38
39    After successful parsing these attributes are set:
40    - input (original input),
41    - input_conv (maybe changed to upper case, depends on global setting),
42    - parameter
43    - qualifier
44    - suspectedFilename (unix filenames contain slashes, it just a guess)
45
46    These values can be also accessed via:
47    - class attribute "parsed" (dictionary)
48    - class attribute with "shx_" prefix (e.g. shx_input)
49
50    The parser analyses the command string:
51    >>> cmd = 'COMMAND /SWITCH1 PARAMETER1 /SWITCH2=4 PARAMETER2'
52    >>> x = parse(cmd).parsed
53
54    The parsed command name is always lower case. It's used for comparision with
55    the "provides" attribute of the command classes.
56    >>> x["shx_command"]
57    'command'
58
59    Switches may occur at any position, but are commonly placed following the
60    command name:
61    >>> sorted(x["shx_qualifiers"].keys())
62    ['SWITCH1', 'SWITCH2']
63    >>> x["shx_qualifiers"]["SWITCH1"]
64    True
65    >>> x["shx_qualifiers"]["SWITCH2"]
66    '4'
67
68    Parameter are returned as list in the order of occurrence:
69    >>> x["shx_parameter"]
70    ['PARAMETER1', 'PARAMETER2']
71
72    One new feature is that the algorithm guesses a file name (slashes normally
73    indicate qualifiers). This only works, if the switches follow directly after
74    the command name!
75    >>> cmd = 'DELAY+SUM/BLA/FOO=1 BAR /tmp/test /SW FOO'
76    >>> x = parse(cmd).parsed
77    >>> x["shx_parameter"]
78    ['BAR', 'FOO']
79    >>> x["shx_qualifiers"]["FOO"]
80    '1'
81
82    Please note that the "suspected" file name is also present as switch:
83    >>> sorted(x["shx_qualifiers"].keys())
84    ['BLA', 'FOO', 'SW', 'TEST', 'TMP']
85    >>> x["shx_suspectedFilename"]
86    '/TMP/TEST'
87
88    If more than one "possible filename is given, only the first one is
89    recognized. All following are only processed as qualifiers.
90    >>> x = parse('COMMAND /tmp/x /tmp/y').parsed
91    >>> x["shx_suspectedFilename"]
92    '/TMP/X'
93
94    The semicolon is an alternative separator that is mostly used to skip
95    parameters:
96    >>> x = parse("ECHO;;2 3 4 ;; 6 7").parsed
97    >>> x["shx_parameter"]
98    ['', '2', '3', '4', '', '6', '7']
99
100    >>> x = parse('ECHO;;;;foo;bar;;').parsed
101    >>> x["shx_parameter"]
102    ['', '', '', 'FOO', 'BAR', '']
103    """
104
105    # regular expressions for parsing
106    re_cmd = "[+\w]+"
107    re_qual = "/[\w=]+"
108    re_file = " (/\w+)+ ?"
109
110    def __init__(self, input):
111        self.input = input
112
113        converted = False
114        if Settings.swCapconv:
115            # If a command starts with @, no conversion to upper case is
116            # performed but the indicator char is removed.
117            if input.startswith("@"):
118                input = input[1:]
119            else:
120                input = input.upper()
121                converted = True
122
123        c = re.compile(self.re_cmd)
124        q = re.compile(self.re_qual)
125        f = re.compile(self.re_file)
126
127        cmd = c.search(input).group(0)
128        qual = q.findall(input)
129
130        # guess file
131        try:
132            sfile = f.search(input).group(0).strip()
133        except:
134            sfile = None
135
136        qualifiers = {}
137        # remove cmd and qualifiers from string
138        cmd_par = input.replace(cmd, "")
139        for qq in qual:
140            cmd_par = cmd_par.replace(qq, "")
141
142            # build dict of qualifiers
143            i = qq.split("=")
144            if len(i) > 1:
145                qualifiers[i[0][1:]] = i[1]
146            else:
147                qualifiers[i[0][1:]] = True
148
149        par = cmd_par.split()
150
151        # check for semicolon placeholders
152        parameter = []
153        for j in par:
154            if ";" in j:
155                j = j.split(";")
156
157                # correct leading and trailing
158                if j[:2] == ['','']:
159                    j = j[1:]
160                if j[-2:] == ['','']:
161                    j = j[:-1]
162
163                parameter.extend(j)
164            else:
165                parameter.append(j)
166
167        self.__parsed = {
168                "shx_input": self.input,
169                "shx_input_conv": input,
170                "shx_command": cmd.lower(), # command always in lower case
171                "shx_converted": converted, # indicates cap conversion
172                "shx_parameter": parameter,
173                "shx_qualifiers": qualifiers,
174                "shx_suspectedFilename": sfile,
175        }
176
177    def __getattr__(self, name):
178        if name.startswith("shx_"):
179            return self.__parsed[name]
180
181        if "shx_%s" % name in self.__parsed.keys():
182            return self.__parsed["shx_%s" % name]
183
184        if name == "parsed":
185            return self.__parsed
186
187        try:
188            return self.__dict__[name]
189        except KeyError:
190            raise AttributeError(name)
191
192class script(object):
193    """
194    Read commands from stream.
195
196    GOTO targets are cached and removed from stream so that any GOTO command
197    must be handled inside this class.
198
199    Also the IF condition is evaluated here.
200
201    All other commands are promoted to their handler or treated as new script.
202
203    For testing purposes we use a stream and fill it with commands. Also a
204    symbol set is needed. To echo the commands, the global "Echo" switch must
205    be turned on.
206    >>> from StringIO import StringIO
207    >>> strm = StringIO("echo line1\\necho line2\\n")
208    >>> symb = symbol()
209    >>> Settings.swEcho = True
210    >>> x = script(strm, symb)
211    >>> x.run()
212    echo line1
213    echo line2
214
215    It's possible to add more commands at runtime. The script will continue
216    there.
217    >>> x.feed(StringIO("echo line3\\n"))
218    >>> x.run()
219    echo line3
220    """
221
222    def __init__(self, input, symbols, parameters=None):
223        """
224        Read in stream, skip empty lines and comments. Remember GOTO targets.
225
226        Input parameters are:
227        - stream object having readline function
228        - symbol class instance (containing global and local variables
229        """
230
231        self.content = []
232        self.targets = {}
233
234        self.pointer = 0
235        self.symbols = symbols
236
237        # If a script is initially called there might be parameters and
238        # also qualifiers.
239        if parameters:
240            self.parameters = translate(parameters, self)
241
242        try:
243            if hasattr(input, "read"):
244                stream = input
245            elif type(input) == str:
246                stream = open("input", "r")
247            _ = stream
248        except:
249            # cannot open
250            raise Exception("Input not readable!")
251
252        # Cache self-handled commands. These methods begin with "command".
253        commands = []
254        for i in inspect.getmembers(self, lambda x: inspect.ismethod(x)):
255            if i[0].startswith("command"):
256                commands.append(i[0][7:].lower())
257
258        self.commands = commands
259
260        self.feed(stream)
261
262    def feed(self, stream):
263        """
264        Method for populating command list.
265
266        This method can be run several times in order to run commands from
267        interactive input. The local symbol set is hereby not changed.
268
269        Execution of commands will automatically continue after the last
270        command.
271
272        So it's no possible to use loops also in interactive scripts. But be
273        warned: It's not easy to get a proper exit GOTO target!
274        """
275
276        content = self.content
277        targets = self.targets
278        skipped = 0
279
280        for line, cmdstr in enumerate(stream):
281            cmdstr = cmdstr.strip()
282
283            # skip empty lines or comments
284            if len(cmdstr) and cmdstr[0] not in "!#-":
285                # remember goto target line?
286                if cmdstr.split()[0][-1] == ":":
287                    targets[cmdstr.upper()] = line - skipped
288
289                content.append(cmdstr)
290            else:
291                skipped += 1
292
293        self.content = content
294        self.targets = targets
295
296    def run(self):
297        while True:
298            try:
299                cmd = self.next()
300
301                if Settings.swEcho:
302                    print cmd
303
304                cmd = parse(cmd).parsed
305
306                # Execute command...
307                if cmd["shx_command"] in commands.list:
308                    # translate variables
309                    _ = translate(cmd, self)
310
311                    if Settings.swVerify:
312                        print cmd["shx_translated"]
313                   
314                    # also supply recent symbolset
315                    commands.list[cmd["shx_command"]](shx_symbols=self.symbols, \
316                                 *cmd["shx_parameter"], **cmd["shx_qualifiers"])
317
318                # .. or start script.
319                else:
320                    symb = symbol()
321                    try:
322                        ns = script(cmd["shx_command"], symb, parameters=cmd)
323                        ns.run()
324                    except Exception, e:
325                        msg = "Cannot run script '%s'!" % cmd["shx_command"]
326
327                        # Respect global settings:
328                        if Settings.swSherrstop:
329                            import sys
330                            print >> sys.stderr, msg
331                            quit()
332
333                        if Settings.swCmderrstop:
334                            raise Exception(msg)
335
336                        if not Settings.swNoerrmsg:
337                            import warnings
338                            warnings.warn(msg)
339
340            except StopIteration:
341                break
342
343    def next(self):
344        """
345        Iterate over commands.
346        """
347
348        try:
349            # skip goto targets
350            while self.pointer in self.targets.values():
351                self.pointer += 1
352
353            pnt = self.pointer
354            self.pointer += 1
355
356            return self.content[pnt]
357        except IndexError:
358            # lower pointer
359            self.pointer -= 1
360            raise StopIteration
361
362    def commandGoto(self, target):
363        try:
364            self.pointer = self.targets[target.upper()]
365        except KeyError:
366            raise NameError(target.upper())
367
368    def commandIf(self, cmd):
369        pass
370
371    def IfGoto(self, cmd):
372        # cast format
373        cast = {
374                "S": str,
375                "I": int,
376                "R": float
377                }
378
379        cmp, check = cmd.p[2].value, cmd.p[2].value[:2]
380
381        var1 = cast[cmp[-1]](cmd.p[1].value)
382        var2 = cast[cmp[-1]](cmd.p[3].value)
383
384        # comparison
385        try:
386            comp = {
387                    "EQ": var1.__eq__,
388                    "NE": var1.__ne__,
389                    "GT": var1.__gt__,
390                    "GE": var1.__ge__,
391                    "LT": var1.__lt__,
392                    "LE": var1.__le__,
393                    }
394        except AttributeError:
395            # in python 2.5 integers have no __eq__, __gt__, ... methods :(
396            comp = {
397                    "EQ": lambda x: var1 == x,
398                    "NE": lambda x: var1 != x,
399                    "GT": lambda x: var1 > x,
400                    "GE": lambda x: var1 >= x,
401                    "LT": lambda x: var1 < x,
402                    "LE": lambda x: var1 <= x,
403                    }
404
405        # check condition
406        if comp[check](var2):
407            self.Goto(cmd.p[5].value)
408
409class translate(object):
410    """
411    Translate variables in command. If a script class is used as second
412    parameter, it's symbol set will be used (e.g. for global symbols).
413
414    There are five basic types of variables:
415    1. user-defined symbols start with a quote: "foo
416    2. system variables start with a dollar sign: $DSPTRCS
417    3. trace variables start with a caret: ^delta(3)
418    4. passed options to command procedures start with a hash: #1
419    5. data from file access start with a percent: %filename(1)
420
421    After translation all parameter and qualifiers are replaced.
422   
423    If the global option "Verify" is set the translated command string is saved
424    into "shx_translated". This string is rebuild from the translated parts,
425    so qualifiers may appear not in original order.
426
427    In order to test this class, we define a dummy command object.
428    >>> Settings.swVerify = True
429    >>> cmd = {
430    ...    'shx_command': 'echo',
431    ...    'shx_converted': True,
432    ...    'shx_parameter': ['$PI', '$EXCLAMATION'],
433    ...    'shx_qualifiers': {'FOO': '$DOLLAR', 'BAR': True},
434    ... }
435    >>> _ = translate(cmd)
436    >>> cmd['shx_translated']
437    'ECHO 3.1415926535897931 ! /FOO=$ /BAR'
438    """
439
440    system = {
441        "DOLLAR": "$",
442        "PI": "3.1415926535897931",
443        "SLASH": "/",
444        "HAT": "^",
445        "EXCLAMATION": "!",
446
447        # XXX todo
448        "DSPTRCS": lambda: None,
449        "TOTTRCS": lambda: None,
450    }
451
452    def __init__(self, cmd, script=None):
453        self.script = script
454       
455        # translate parameters
456        for i, p in enumerate(cmd["shx_parameter"]):
457            if p[0] not in '"$^#%':
458                continue
459
460            id = p[0]
461            # system
462            if id == "$":
463                cmd["shx_parameter"][i] = self.handleSystem(p[1:])
464            # symbols
465            elif id == '"':
466                cmd["shx_parameter"][i] = self.handleSymbol(p[1:])
467            # trace XXX
468            elif id == '^':
469                cmd["shx_parameter"][i] = self.handleTrace(p[1:])
470            # options at startup XXX
471            elif id == '#':
472                cmd["shx_parameter"][i] = self.handleOption(p[1:])
473            # file XXX
474            elif id == '%':
475                cmd["shx_parameter"][i] = self.handleFile(p[1:])
476
477        # translate qualifiers
478        for q in cmd["shx_qualifiers"]:
479            p = cmd["shx_qualifiers"][q]
480
481            # Skip qualifiers that act as switches.
482            if type(p) == bool:
483                continue
484
485            id = p[0]
486
487            if id == "$":
488                cmd["shx_qualifiers"][q] = self.handleSystem(p[1:])
489            elif id == '"':
490                cmd["shx_qualifiers"][q] = self.handleSymbol(p[1:])
491            elif id == '^':
492                cmd["shx_qualifiers"][q] = self.handleTrace(p[1:])
493            elif id == '#':
494                cmd["shx_qualifiers"][q] = self.handleOption(p[1:])
495            elif id == '%':
496                cmd["shx_qualifiers"][q] = self.handleFile(p[1:])
497
498        # Actually this is only for debugging purposes.
499        if not Settings.swVerify:
500            return
501       
502        qual = []
503        for q in cmd["shx_qualifiers"]:
504            if type(cmd["shx_qualifiers"][q]) == bool:
505                qual.append(q)
506            else:
507                qual.append("%s=%s" % (q, cmd["shx_qualifiers"][q]))
508           
509        cmd["shx_translated"] = " ".join([
510            cmd["shx_converted"] and cmd["shx_command"].upper() or cmd["shx_command"],
511            " ".join(cmd["shx_parameter"]),
512            len(qual) and "/" + " /".join(qual) or "",
513        ])
514
515    def handleSystem(self, name):
516        try:
517            x = self.system[name.upper()]
518        except KeyError:
519            raise NameError("System variable '%s' not found!" % name)
520
521        if callable(x):
522            return x()
523        else:
524            return x
525
526    def handleSymbol(self, name):
527        try:
528            return getattr(self.script.symbols, name)
529        except:
530            raise NameError("Symbol '%s' not found!" % name)
531
532    def handleTrace(self, name):
533        raise NotImplementedError
534
535    def handleOption(self, name):
536        raise NotImplementedError
537
538    def handleFile(self, name):
539        raise NotImplementedError
540
541class symbol(object):
542    """
543    This class holds locals symbols. Access to global symbols is granted.
544
545    Important note: All symbol names are converted into upper case!
546
547    >>> s = symbol()
548    >>> s.foo = 1
549    >>> s.foo
550    1
551    >>> s.setGlobal("bar", "qux")
552    >>> s.bar
553    'qux'
554
555    Local symbols mask global ones! This can be undone by deleting the symbol.
556    >>> s.bar = 5
557    >>> s.bar
558    5
559    >>> del s.bar
560    >>> s.bar
561    'qux'
562
563    If no local symbol exists, the global one will be removed.
564    >>> del s.bar
565    >>> s.bar #doctest: +ELLIPSIS
566    Traceback (most recent call last):
567    ...
568    Exception: Symbol BAR not found!
569
570    If one wants to delete a global symbol but keep the local symbol:
571    >>> s.setGlobal("bar", "global")
572    >>> s.bar
573    'global'
574    >>> s.bar = "local"
575    >>> s.deleteGlobal("bar")
576    >>> s.bar
577    'local'
578    """
579
580    def __init__(self):
581        self.__dict__["__globals"] = Settings.Globals
582
583    def __getattr__(self, name):
584        name = name.upper()
585        try:
586            return self.__dict__[name]
587        except KeyError:
588            pass
589
590        try:
591            return self.__dict__["__globals"][name]
592        except KeyError:
593            pass
594
595        raise Exception("Symbol %s not found!" % name)
596
597    def __setattr__(self, name, value=None):
598        name = name.upper()
599
600        self.__dict__[name] = value
601
602    def __delattr__(self, name):
603        name = name.upper()
604
605        # delete local symbol
606        try:
607            del self.__dict__[name]
608        except KeyError:
609            pass
610        else:
611            return
612
613        try:
614            del self.__dict__["__globals"][name]
615        except KeyError:
616            pass
617        else:
618            return
619
620        raise Exception("Symbol %s not found!" % name)
621
622    def setGlobal(self, name, value=None):
623        name = name.upper()
624
625        self.__dict__["__globals"][name] = value
626
627    def deleteGlobal(self, name):
628        name = name.upper()
629
630        del self.__dict__["__globals"][name]
631
632if __name__ == "__main__":
633    import doctest
634    doctest.testmod(exclude_empty=True)
Note: See TracBrowser for help on using the repository browser.