source: trunk/minix/lib/zlib-1.2.3/contrib/masm686/match.asm@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago

Minix 3.1.2a

File size: 11.1 KB
Line 
1
2; match.asm -- Pentium-Pro optimized version of longest_match()
3;
4; Updated for zlib 1.1.3 and converted to MASM 6.1x
5; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com>
6; and Chuck Walbourn <chuckw@kinesoft.com>
7; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro>
8;
9; This is free software; you can redistribute it and/or modify it
10; under the terms of the GNU General Public License.
11
12; Based on match.S
13; Written for zlib 1.1.2
14; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
15;
16; Modified by Gilles Vollant (2005) for add gzhead and gzindex
17
18 .686P
19 .MODEL FLAT
20
21;===========================================================================
22; EQUATES
23;===========================================================================
24
25MAX_MATCH EQU 258
26MIN_MATCH EQU 3
27MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1)
28MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7))
29
30;===========================================================================
31; STRUCTURES
32;===========================================================================
33
34; This STRUCT assumes a 4-byte alignment
35
36DEFLATE_STATE STRUCT
37ds_strm dd ?
38ds_status dd ?
39ds_pending_buf dd ?
40ds_pending_buf_size dd ?
41ds_pending_out dd ?
42ds_pending dd ?
43ds_wrap dd ?
44; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h)
45ds_gzhead dd ?
46ds_gzindex dd ?
47ds_data_type db ?
48ds_method db ?
49 db ? ; padding
50 db ? ; padding
51ds_last_flush dd ?
52ds_w_size dd ? ; used
53ds_w_bits dd ?
54ds_w_mask dd ? ; used
55ds_window dd ? ; used
56ds_window_size dd ?
57ds_prev dd ? ; used
58ds_head dd ?
59ds_ins_h dd ?
60ds_hash_size dd ?
61ds_hash_bits dd ?
62ds_hash_mask dd ?
63ds_hash_shift dd ?
64ds_block_start dd ?
65ds_match_length dd ? ; used
66ds_prev_match dd ? ; used
67ds_match_available dd ?
68ds_strstart dd ? ; used
69ds_match_start dd ? ; used
70ds_lookahead dd ? ; used
71ds_prev_length dd ? ; used
72ds_max_chain_length dd ? ; used
73ds_max_laxy_match dd ?
74ds_level dd ?
75ds_strategy dd ?
76ds_good_match dd ? ; used
77ds_nice_match dd ? ; used
78
79; Don't need anymore of the struct for match
80DEFLATE_STATE ENDS
81
82;===========================================================================
83; CODE
84;===========================================================================
85_TEXT SEGMENT
86
87;---------------------------------------------------------------------------
88; match_init
89;---------------------------------------------------------------------------
90 ALIGN 4
91PUBLIC _match_init
92_match_init PROC
93 ; no initialization needed
94 ret
95_match_init ENDP
96
97;---------------------------------------------------------------------------
98; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
99;---------------------------------------------------------------------------
100 ALIGN 4
101
102PUBLIC _longest_match
103_longest_match PROC
104
105; Since this code uses EBP for a scratch register, the stack frame must
106; be manually constructed and referenced relative to the ESP register.
107
108; Stack image
109; Variables
110chainlenwmask = 0 ; high word: current chain len
111 ; low word: s->wmask
112window = 4 ; local copy of s->window
113windowbestlen = 8 ; s->window + bestlen
114scanend = 12 ; last two bytes of string
115scanstart = 16 ; first two bytes of string
116scanalign = 20 ; dword-misalignment of string
117nicematch = 24 ; a good enough match size
118bestlen = 28 ; size of best match so far
119scan = 32 ; ptr to string wanting match
120varsize = 36 ; number of bytes (also offset to last saved register)
121
122; Saved Registers (actually pushed into place)
123ebx_save = 36
124edi_save = 40
125esi_save = 44
126ebp_save = 48
127
128; Parameters
129retaddr = 52
130deflatestate = 56
131curmatch = 60
132
133; Save registers that the compiler may be using
134 push ebp
135 push edi
136 push esi
137 push ebx
138
139; Allocate local variable space
140 sub esp,varsize
141
142; Retrieve the function arguments. ecx will hold cur_match
143; throughout the entire function. edx will hold the pointer to the
144; deflate_state structure during the function's setup (before
145; entering the main loop).
146
147 mov edx, [esp+deflatestate]
148ASSUME edx:PTR DEFLATE_STATE
149
150 mov ecx, [esp+curmatch]
151
152; uInt wmask = s->w_mask;
153; unsigned chain_length = s->max_chain_length;
154; if (s->prev_length >= s->good_match) {
155; chain_length >>= 2;
156; }
157
158 mov eax, [edx].ds_prev_length
159 mov ebx, [edx].ds_good_match
160 cmp eax, ebx
161 mov eax, [edx].ds_w_mask
162 mov ebx, [edx].ds_max_chain_length
163 jl SHORT LastMatchGood
164 shr ebx, 2
165LastMatchGood:
166
167; chainlen is decremented once beforehand so that the function can
168; use the sign flag instead of the zero flag for the exit test.
169; It is then shifted into the high word, to make room for the wmask
170; value, which it will always accompany.
171
172 dec ebx
173 shl ebx, 16
174 or ebx, eax
175 mov [esp+chainlenwmask], ebx
176
177; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
178
179 mov eax, [edx].ds_nice_match
180 mov ebx, [edx].ds_lookahead
181 cmp ebx, eax
182 jl SHORT LookaheadLess
183 mov ebx, eax
184LookaheadLess:
185 mov [esp+nicematch], ebx
186
187;/* register Bytef *scan = s->window + s->strstart; */
188
189 mov esi, [edx].ds_window
190 mov [esp+window], esi
191 mov ebp, [edx].ds_strstart
192 lea edi, [esi+ebp]
193 mov [esp+scan],edi
194
195;/* Determine how many bytes the scan ptr is off from being */
196;/* dword-aligned. */
197
198 mov eax, edi
199 neg eax
200 and eax, 3
201 mov [esp+scanalign], eax
202
203;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
204;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
205
206 mov eax, [edx].ds_w_size
207 sub eax, MIN_LOOKAHEAD
208 sub ebp, eax
209 jg SHORT LimitPositive
210 xor ebp, ebp
211LimitPositive:
212
213;/* int best_len = s->prev_length; */
214
215 mov eax, [edx].ds_prev_length
216 mov [esp+bestlen], eax
217
218;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
219
220 add esi, eax
221 mov [esp+windowbestlen], esi
222
223;/* register ush scan_start = *(ushf*)scan; */
224;/* register ush scan_end = *(ushf*)(scan+best_len-1); */
225;/* Posf *prev = s->prev; */
226
227 movzx ebx, WORD PTR[edi]
228 mov [esp+scanstart], ebx
229 movzx ebx, WORD PTR[eax+edi-1]
230 mov [esp+scanend], ebx
231 mov edi, [edx].ds_prev
232
233;/* Jump into the main loop. */
234
235 mov edx, [esp+chainlenwmask]
236 jmp SHORT LoopEntry
237
238;/* do {
239; * match = s->window + cur_match;
240; * if (*(ushf*)(match+best_len-1) != scan_end ||
241; * *(ushf*)match != scan_start) continue;
242; * [...]
243; * } while ((cur_match = prev[cur_match & wmask]) > limit
244; * && --chain_length != 0);
245; *
246; * Here is the inner loop of the function. The function will spend the
247; * majority of its time in this loop, and majority of that time will
248; * be spent in the first ten instructions.
249; *
250; * Within this loop:
251; * %ebx = scanend
252; * %ecx = curmatch
253; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
254; * %esi = windowbestlen - i.e., (window + bestlen)
255; * %edi = prev
256; * %ebp = limit
257; */
258
259 ALIGN 4
260LookupLoop:
261 and ecx, edx
262 movzx ecx, WORD PTR[edi+ecx*2]
263 cmp ecx, ebp
264 jbe LeaveNow
265 sub edx, 000010000H
266 js LeaveNow
267
268LoopEntry:
269 movzx eax, WORD PTR[esi+ecx-1]
270 cmp eax, ebx
271 jnz SHORT LookupLoop
272
273 mov eax, [esp+window]
274 movzx eax, WORD PTR[eax+ecx]
275 cmp eax, [esp+scanstart]
276 jnz SHORT LookupLoop
277
278;/* Store the current value of chainlen. */
279
280 mov [esp+chainlenwmask], edx
281
282;/* Point %edi to the string under scrutiny, and %esi to the string we */
283;/* are hoping to match it up with. In actuality, %esi and %edi are */
284;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
285;/* initialized to -(MAX_MATCH_8 - scanalign). */
286
287 mov esi, [esp+window]
288 mov edi, [esp+scan]
289 add esi, ecx
290 mov eax, [esp+scanalign]
291 mov edx, -MAX_MATCH_8
292 lea edi, [edi+eax+MAX_MATCH_8]
293 lea esi, [esi+eax+MAX_MATCH_8]
294
295;/* Test the strings for equality, 8 bytes at a time. At the end,
296; * adjust %edx so that it is offset to the exact byte that mismatched.
297; *
298; * We already know at this point that the first three bytes of the
299; * strings match each other, and they can be safely passed over before
300; * starting the compare loop. So what this code does is skip over 0-3
301; * bytes, as much as necessary in order to dword-align the %edi
302; * pointer. (%esi will still be misaligned three times out of four.)
303; *
304; * It should be confessed that this loop usually does not represent
305; * much of the total running time. Replacing it with a more
306; * straightforward "rep cmpsb" would not drastically degrade
307; * performance.
308; */
309
310LoopCmps:
311 mov eax, DWORD PTR[esi+edx]
312 xor eax, DWORD PTR[edi+edx]
313 jnz SHORT LeaveLoopCmps
314
315 mov eax, DWORD PTR[esi+edx+4]
316 xor eax, DWORD PTR[edi+edx+4]
317 jnz SHORT LeaveLoopCmps4
318
319 add edx, 8
320 jnz SHORT LoopCmps
321 jmp LenMaximum
322 ALIGN 4
323
324LeaveLoopCmps4:
325 add edx, 4
326
327LeaveLoopCmps:
328 test eax, 00000FFFFH
329 jnz SHORT LenLower
330
331 add edx, 2
332 shr eax, 16
333
334LenLower:
335 sub al, 1
336 adc edx, 0
337
338;/* Calculate the length of the match. If it is longer than MAX_MATCH, */
339;/* then automatically accept it as the best possible match and leave. */
340
341 lea eax, [edi+edx]
342 mov edi, [esp+scan]
343 sub eax, edi
344 cmp eax, MAX_MATCH
345 jge SHORT LenMaximum
346
347;/* If the length of the match is not longer than the best match we */
348;/* have so far, then forget it and return to the lookup loop. */
349
350 mov edx, [esp+deflatestate]
351 mov ebx, [esp+bestlen]
352 cmp eax, ebx
353 jg SHORT LongerMatch
354 mov esi, [esp+windowbestlen]
355 mov edi, [edx].ds_prev
356 mov ebx, [esp+scanend]
357 mov edx, [esp+chainlenwmask]
358 jmp LookupLoop
359 ALIGN 4
360
361;/* s->match_start = cur_match; */
362;/* best_len = len; */
363;/* if (len >= nice_match) break; */
364;/* scan_end = *(ushf*)(scan+best_len-1); */
365
366LongerMatch:
367 mov ebx, [esp+nicematch]
368 mov [esp+bestlen], eax
369 mov [edx].ds_match_start, ecx
370 cmp eax, ebx
371 jge SHORT LeaveNow
372 mov esi, [esp+window]
373 add esi, eax
374 mov [esp+windowbestlen], esi
375 movzx ebx, WORD PTR[edi+eax-1]
376 mov edi, [edx].ds_prev
377 mov [esp+scanend], ebx
378 mov edx, [esp+chainlenwmask]
379 jmp LookupLoop
380 ALIGN 4
381
382;/* Accept the current string, with the maximum possible length. */
383
384LenMaximum:
385 mov edx, [esp+deflatestate]
386 mov DWORD PTR[esp+bestlen], MAX_MATCH
387 mov [edx].ds_match_start, ecx
388
389;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
390;/* return s->lookahead; */
391
392LeaveNow:
393 mov edx, [esp+deflatestate]
394 mov ebx, [esp+bestlen]
395 mov eax, [edx].ds_lookahead
396 cmp ebx, eax
397 jg SHORT LookaheadRet
398 mov eax, ebx
399LookaheadRet:
400
401; Restore the stack and return from whence we came.
402
403 add esp, varsize
404 pop ebx
405 pop esi
406 pop edi
407 pop ebp
408 ret
409
410_longest_match ENDP
411
412_TEXT ENDS
413END
Note: See TracBrowser for help on using the repository browser.