1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
-----------------------------------------------------------------------------
-- |
-- Module : Language.C.Data.Ident
-- Copyright : (c) [1995..1999] Manuel M. T. Chakravarty
-- (c) 2008 Benedikt Huber
-- License : BSD-style
-- Maintainer : benedikt.huber@gmail.com
-- Stability : experimental
-- Portability : ghc
--
-- This module provides the notion of identifiers in C, speed up using hashing.
-- Identifiers are associated optionally associated with a 'NodeInfo', i.e. with
-- a unique 'Name' and a source location ('Position'). The ordering relation on
-- identifiers is based on the hash and does not follow the lexical order.
-----------------------------------------------------------------------------
-- TODO (comment from manuel):
-- * Hashing is not 8bit clean.
-- | References uniquely determining a struct, union or enum type.
-- Those are either identified by an string identifier, or by a unique
-- name (anonymous types).
data SUERef = AnonymousRef Name
| NamedRef Ident
--, Read
-- | Return true if the struct\/union\/enum reference is anonymous.
isAnonymousRef (AnonymousRef _) = True
isAnonymousRef _ = False
-- | C identifiers
data Ident = Ident String -- lexeme
!Int -- hash to speed up equality check
NodeInfo -- attributes of this ident. incl. position
-- Read
-- the definition of the equality allows identifiers to be equal that are
-- defined at different source text positions, and aims at speeding up the
-- equality test, by comparing the lexemes only if the two numbers are equal
--
(Ident s h _) == (Ident s' h' _) = (h == h') && (s == s')
-- this does *not* follow the alphanumerical ordering of the lexemes
--
compare (Ident s h _) (Ident s' h' _) = compare (h, s) (h', s')
-- identifiers are attributed
nodeInfo (Ident _ _ at) = at
posOf = posOfNode . nodeInfo
-- to speed up the equality test we compute some hash-like value for each
-- identifiers lexeme and store it in the identifiers representation
-- hash function from the dragon book pp437; assumes 7 bit characters and needs
-- the (nearly) full range of values guaranteed for `Int' by the Haskell
-- language definition; can handle 8 bit characters provided we have 29 bit
-- for the `Int's without sign
--
quad (c1:c2:c3:c4:s) = ((ord c4 * bits21
+ ord c3 * bits14
+ ord c2 * bits7
+ ord c1)
`mod` bits28)
+ (quad s `mod` bits28)
quad (c1:c2:c3:[] ) = ord c3 * bits14 + ord c2 * bits7 + ord c1
quad (c1:c2:[] ) = ord c2 * bits7 + ord c1
quad (c1:[] ) = ord c1
quad ([] ) = 0
bits7 = 2^(7::Int)
bits14 = 2^(14::Int)
bits21 = 2^(21::Int)
bits28 = 2^(28::Int)
-- | build an identifier from a string.
--
-- * only minimal error checking, e.g., the characters of the identifier are
-- not checked for being alphanumerical only; the correct lexis of the
-- identifier should be ensured by the caller, e.g., the scanner.
--
-- * for reasons of simplicity the complete lexeme is hashed.
mkIdent pos s name = Ident s (quad s) (mkNodeInfo' pos (pos,length s) name)
-- | returns an /internal/ identifier (has internal position and no unique name)
internalIdent s = Ident s (quad s) (mkNodeInfoOnlyPos internalPos)
-- | return an /internal/ identifier with position info
internalIdentAt pos s = Ident s (quad s) (mkNodeInfoPosLen pos (pos, length s))
-- | returns a /builtin/ identifier (has builtin position and no unique name)
builtinIdent s = Ident s (quad s) (mkNodeInfoOnlyPos builtinPos)
-- | return @True@ if the given identifier is /internal/
isInternalIdent (Ident _ _ nodeinfo) = isInternalPos (posOfNode nodeinfo)
-- | string of an identifier
identToString (Ident s _ _) = s
-- | string of a SUE ref (empty if anonymous)
sueRefToString (AnonymousRef _) = ""
sueRefToString (NamedRef ident) = identToString ident
-- | dump the identifier string and its positions for debugging purposes
dumpIdent ide = identToString ide ++ " at " ++ show (nodeInfo ide)