File: | src/bin/pax/tables.c |
Warning: | line 1766, column 7 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: tables.c,v 1.54 2019/06/28 05:35:34 deraadt Exp $ */ | |||
2 | /* $NetBSD: tables.c,v 1.4 1995/03/21 09:07:45 cgd Exp $ */ | |||
3 | ||||
4 | /*- | |||
5 | * Copyright (c) 1992 Keith Muller. | |||
6 | * Copyright (c) 1992, 1993 | |||
7 | * The Regents of the University of California. All rights reserved. | |||
8 | * | |||
9 | * This code is derived from software contributed to Berkeley by | |||
10 | * Keith Muller of the University of California, San Diego. | |||
11 | * | |||
12 | * Redistribution and use in source and binary forms, with or without | |||
13 | * modification, are permitted provided that the following conditions | |||
14 | * are met: | |||
15 | * 1. Redistributions of source code must retain the above copyright | |||
16 | * notice, this list of conditions and the following disclaimer. | |||
17 | * 2. Redistributions in binary form must reproduce the above copyright | |||
18 | * notice, this list of conditions and the following disclaimer in the | |||
19 | * documentation and/or other materials provided with the distribution. | |||
20 | * 3. Neither the name of the University nor the names of its contributors | |||
21 | * may be used to endorse or promote products derived from this software | |||
22 | * without specific prior written permission. | |||
23 | * | |||
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |||
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |||
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
34 | * SUCH DAMAGE. | |||
35 | */ | |||
36 | ||||
37 | #include <sys/types.h> | |||
38 | #include <sys/stat.h> | |||
39 | #include <errno(*__errno()).h> | |||
40 | #include <fcntl.h> | |||
41 | #include <limits.h> | |||
42 | #include <signal.h> | |||
43 | #include <stdio.h> | |||
44 | #include <stdlib.h> | |||
45 | #include <string.h> | |||
46 | #include <unistd.h> | |||
47 | ||||
48 | #include "pax.h" | |||
49 | #include "extern.h" | |||
50 | ||||
51 | /* | |||
52 | * Routines for controlling the contents of all the different databases pax | |||
53 | * keeps. Tables are dynamically created only when they are needed. The | |||
54 | * goal was speed and the ability to work with HUGE archives. The databases | |||
55 | * were kept simple, but do have complex rules for when the contents change. | |||
56 | * As of this writing, the posix library functions were more complex than | |||
57 | * needed for this application (pax databases have very short lifetimes and | |||
58 | * do not survive after pax is finished). Pax is required to handle very | |||
59 | * large archives. These database routines carefully combine memory usage and | |||
60 | * temporary file storage in ways which will not significantly impact runtime | |||
61 | * performance while allowing the largest possible archives to be handled. | |||
62 | * Trying to force the fit to the posix database routines was not considered | |||
63 | * time well spent. | |||
64 | */ | |||
65 | ||||
66 | /* | |||
67 | * data structures and constants used by the different databases kept by pax | |||
68 | */ | |||
69 | ||||
70 | /* | |||
71 | * Hash Table Sizes MUST BE PRIME, if set too small performance suffers. | |||
72 | * Probably safe to expect 500000 inodes per tape. Assuming good key | |||
73 | * distribution (inodes) chains of under 50 long (worst case) is ok. | |||
74 | */ | |||
75 | #define L_TAB_SZ2503 2503 /* hard link hash table size */ | |||
76 | #define F_TAB_SZ50503 50503 /* file time hash table size */ | |||
77 | #define N_TAB_SZ541 541 /* interactive rename hash table */ | |||
78 | #define D_TAB_SZ317 317 /* unique device mapping table */ | |||
79 | #define A_TAB_SZ317 317 /* ftree dir access time reset table */ | |||
80 | #define SL_TAB_SZ317 317 /* escape symlink tables */ | |||
81 | #define MAXKEYLEN64 64 /* max number of chars for hash */ | |||
82 | #define DIRP_SIZE64 64 /* initial size of created dir table */ | |||
83 | ||||
84 | /* | |||
85 | * file hard link structure (hashed by dev/ino and chained) used to find the | |||
86 | * hard links in a file system or with some archive formats (cpio) | |||
87 | */ | |||
88 | typedef struct hrdlnk { | |||
89 | ino_t ino; /* files inode number */ | |||
90 | char *name; /* name of first file seen with this ino/dev */ | |||
91 | dev_t dev; /* files device number */ | |||
92 | u_long nlink; /* expected link count */ | |||
93 | struct hrdlnk *fow; | |||
94 | } HRDLNK; | |||
95 | ||||
96 | /* | |||
97 | * Archive write update file time table (the -u, -C flag), hashed by filename. | |||
98 | * Filenames are stored in a scratch file at seek offset into the file. The | |||
99 | * file time (mod time) and the file name length (for a quick check) are | |||
100 | * stored in a hash table node. We were forced to use a scratch file because | |||
101 | * with -u, the mtime for every node in the archive must always be available | |||
102 | * to compare against (and this data can get REALLY large with big archives). | |||
103 | * By being careful to read only when we have a good chance of a match, the | |||
104 | * performance loss is not measurable (and the size of the archive we can | |||
105 | * handle is greatly increased). | |||
106 | */ | |||
107 | typedef struct ftm { | |||
108 | off_t seek; /* location in scratch file */ | |||
109 | struct timespec mtim; /* files last modification time */ | |||
110 | struct ftm *fow; | |||
111 | int namelen; /* file name length */ | |||
112 | } FTM; | |||
113 | ||||
114 | /* | |||
115 | * Interactive rename table (-i flag), hashed by orig filename. | |||
116 | * We assume this will not be a large table as this mapping data can only be | |||
117 | * obtained through interactive input by the user. Nobody is going to type in | |||
118 | * changes for 500000 files? We use chaining to resolve collisions. | |||
119 | */ | |||
120 | ||||
121 | typedef struct namt { | |||
122 | char *oname; /* old name */ | |||
123 | char *nname; /* new name typed in by the user */ | |||
124 | struct namt *fow; | |||
125 | } NAMT; | |||
126 | ||||
127 | /* | |||
128 | * Unique device mapping tables. Some protocols (e.g. cpio) require that the | |||
129 | * <c_dev,c_ino> pair will uniquely identify a file in an archive unless they | |||
130 | * are links to the same file. Appending to archives can break this. For those | |||
131 | * protocols that have this requirement we map c_dev to a unique value not seen | |||
132 | * in the archive when we append. We also try to handle inode truncation with | |||
133 | * this table. (When the inode field in the archive header are too small, we | |||
134 | * remap the dev on writes to remove accidental collisions). | |||
135 | * | |||
136 | * The list is hashed by device number using chain collision resolution. Off of | |||
137 | * each DEVT are linked the various remaps for this device based on those bits | |||
138 | * in the inode which were truncated. For example if we are just remapping to | |||
139 | * avoid a device number during an update append, off the DEVT we would have | |||
140 | * only a single DLIST that has a truncation id of 0 (no inode bits were | |||
141 | * stripped for this device so far). When we spot inode truncation we create | |||
142 | * a new mapping based on the set of bits in the inode which were stripped off. | |||
143 | * so if the top four bits of the inode are stripped and they have a pattern of | |||
144 | * 0110...... (where . are those bits not truncated) we would have a mapping | |||
145 | * assigned for all inodes that has the same 0110.... pattern (with this dev | |||
146 | * number of course). This keeps the mapping sparse and should be able to store | |||
147 | * close to the limit of files which can be represented by the optimal | |||
148 | * combination of dev and inode bits, and without creating a fouled up archive. | |||
149 | * Note we also remap truncated devs in the same way (an exercise for the | |||
150 | * dedicated reader; always wanted to say that...:) | |||
151 | */ | |||
152 | ||||
153 | typedef struct devt { | |||
154 | dev_t dev; /* the orig device number we now have to map */ | |||
155 | struct devt *fow; /* new device map list */ | |||
156 | struct dlist *list; /* map list based on inode truncation bits */ | |||
157 | } DEVT; | |||
158 | ||||
159 | typedef struct dlist { | |||
160 | ino_t trunc_bits; /* truncation pattern for a specific map */ | |||
161 | dev_t dev; /* the new device id we use */ | |||
162 | struct dlist *fow; | |||
163 | } DLIST; | |||
164 | ||||
165 | /* | |||
166 | * ftree directory access time reset table. When we are done with a | |||
167 | * subtree we reset the access and mod time of the directory when the tflag is | |||
168 | * set. Not really explicitly specified in the pax spec, but easy and fast to | |||
169 | * do (and this may have even been intended in the spec, it is not clear). | |||
170 | * table is hashed by inode with chaining. | |||
171 | */ | |||
172 | ||||
173 | typedef struct atdir { | |||
174 | struct file_times ft; | |||
175 | struct atdir *fow; | |||
176 | } ATDIR; | |||
177 | ||||
178 | /* | |||
179 | * created directory time and mode storage entry. After pax is finished during | |||
180 | * extraction or copy, we must reset directory access modes and times that | |||
181 | * may have been modified after creation (they no longer have the specified | |||
182 | * times and/or modes). We must reset time in the reverse order of creation, | |||
183 | * because entries are added from the top of the file tree to the bottom. | |||
184 | * We MUST reset times from leaf to root (it will not work the other | |||
185 | * direction). | |||
186 | */ | |||
187 | ||||
188 | typedef struct dirdata { | |||
189 | struct file_times ft; | |||
190 | u_int16_t mode; /* file mode to restore */ | |||
191 | u_int16_t frc_mode; /* do we force mode settings? */ | |||
192 | } DIRDATA; | |||
193 | ||||
194 | static HRDLNK **ltab = NULL((void *)0); /* hard link table for detecting hard links */ | |||
195 | static FTM **ftab = NULL((void *)0); /* file time table for updating arch */ | |||
196 | static NAMT **ntab = NULL((void *)0); /* interactive rename storage table */ | |||
197 | #ifndef NOCPIO | |||
198 | static DEVT **dtab = NULL((void *)0); /* device/inode mapping tables */ | |||
199 | #endif | |||
200 | static ATDIR **atab = NULL((void *)0); /* file tree directory time reset table */ | |||
201 | static DIRDATA *dirp = NULL((void *)0); /* storage for setting created dir time/mode */ | |||
202 | static size_t dirsize; /* size of dirp table */ | |||
203 | static size_t dircnt = 0; /* entries in dir time/mode storage */ | |||
204 | static int ffd = -1; /* tmp file for file time table name storage */ | |||
205 | ||||
206 | /* | |||
207 | * hard link table routines | |||
208 | * | |||
209 | * The hard link table tries to detect hard links to files using the device and | |||
210 | * inode values. We do this when writing an archive, so we can tell the format | |||
211 | * write routine that this file is a hard link to another file. The format | |||
212 | * write routine then can store this file in whatever way it wants (as a hard | |||
213 | * link if the format supports that like tar, or ignore this info like cpio). | |||
214 | * (Actually a field in the format driver table tells us if the format wants | |||
215 | * hard link info. if not, we do not waste time looking for them). We also use | |||
216 | * the same table when reading an archive. In that situation, this table is | |||
217 | * used by the format read routine to detect hard links from stored dev and | |||
218 | * inode numbers (like cpio). This will allow pax to create a link when one | |||
219 | * can be detected by the archive format. | |||
220 | */ | |||
221 | ||||
222 | /* | |||
223 | * lnk_start | |||
224 | * Creates the hard link table. | |||
225 | * Return: | |||
226 | * 0 if created, -1 if failure | |||
227 | */ | |||
228 | ||||
229 | int | |||
230 | lnk_start(void) | |||
231 | { | |||
232 | if (ltab != NULL((void *)0)) | |||
233 | return(0); | |||
234 | if ((ltab = calloc(L_TAB_SZ2503, sizeof(HRDLNK *))) == NULL((void *)0)) { | |||
235 | paxwarn(1, "Cannot allocate memory for hard link table"); | |||
236 | return(-1); | |||
237 | } | |||
238 | return(0); | |||
239 | } | |||
240 | ||||
241 | /* | |||
242 | * chk_lnk() | |||
243 | * Looks up entry in hard link hash table. If found, it copies the name | |||
244 | * of the file it is linked to (we already saw that file) into ln_name. | |||
245 | * lnkcnt is decremented and if goes to 1 the node is deleted from the | |||
246 | * database. (We have seen all the links to this file). If not found, | |||
247 | * we add the file to the database if it has the potential for having | |||
248 | * hard links to other files we may process (it has a link count > 1) | |||
249 | * Return: | |||
250 | * if found returns 1; if not found returns 0; -1 on error | |||
251 | */ | |||
252 | ||||
253 | int | |||
254 | chk_lnk(ARCHD *arcn) | |||
255 | { | |||
256 | HRDLNK *pt; | |||
257 | HRDLNK **ppt; | |||
258 | u_int indx; | |||
259 | ||||
260 | if (ltab == NULL((void *)0)) | |||
261 | return(-1); | |||
262 | /* | |||
263 | * ignore those nodes that cannot have hard links | |||
264 | */ | |||
265 | if ((arcn->type == PAX_DIR1) || (arcn->sb.st_nlink <= 1)) | |||
266 | return(0); | |||
267 | ||||
268 | /* | |||
269 | * hash inode number and look for this file | |||
270 | */ | |||
271 | indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ2503; | |||
272 | if ((pt = ltab[indx]) != NULL((void *)0)) { | |||
273 | /* | |||
274 | * its hash chain in not empty, walk down looking for it | |||
275 | */ | |||
276 | ppt = &(ltab[indx]); | |||
277 | while (pt != NULL((void *)0)) { | |||
278 | if ((pt->ino == arcn->sb.st_ino) && | |||
279 | (pt->dev == arcn->sb.st_dev)) | |||
280 | break; | |||
281 | ppt = &(pt->fow); | |||
282 | pt = pt->fow; | |||
283 | } | |||
284 | ||||
285 | if (pt != NULL((void *)0)) { | |||
286 | /* | |||
287 | * found a link. set the node type and copy in the | |||
288 | * name of the file it is to link to. we need to | |||
289 | * handle hardlinks to regular files differently than | |||
290 | * other links. | |||
291 | */ | |||
292 | arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name, | |||
293 | sizeof(arcn->ln_name)); | |||
294 | /* XXX truncate? */ | |||
295 | if ((size_t)arcn->nlen >= sizeof(arcn->name)) | |||
296 | arcn->nlen = sizeof(arcn->name) - 1; | |||
297 | if (arcn->type == PAX_REG4) | |||
298 | arcn->type = PAX_HRG9; | |||
299 | else | |||
300 | arcn->type = PAX_HLK8; | |||
301 | ||||
302 | /* | |||
303 | * if we have found all the links to this file, remove | |||
304 | * it from the database | |||
305 | */ | |||
306 | if (--pt->nlink <= 1) { | |||
307 | *ppt = pt->fow; | |||
308 | free(pt->name); | |||
309 | free(pt); | |||
310 | } | |||
311 | return(1); | |||
312 | } | |||
313 | } | |||
314 | ||||
315 | /* | |||
316 | * we never saw this file before. It has links so we add it to the | |||
317 | * front of this hash chain | |||
318 | */ | |||
319 | if ((pt = malloc(sizeof(HRDLNK))) != NULL((void *)0)) { | |||
320 | if ((pt->name = strdup(arcn->name)) != NULL((void *)0)) { | |||
321 | pt->dev = arcn->sb.st_dev; | |||
322 | pt->ino = arcn->sb.st_ino; | |||
323 | pt->nlink = arcn->sb.st_nlink; | |||
324 | pt->fow = ltab[indx]; | |||
325 | ltab[indx] = pt; | |||
326 | return(0); | |||
327 | } | |||
328 | free(pt); | |||
329 | } | |||
330 | ||||
331 | paxwarn(1, "Hard link table out of memory"); | |||
332 | return(-1); | |||
333 | } | |||
334 | ||||
335 | /* | |||
336 | * purg_lnk | |||
337 | * remove reference for a file that we may have added to the data base as | |||
338 | * a potential source for hard links. We ended up not using the file, so | |||
339 | * we do not want to accidently point another file at it later on. | |||
340 | */ | |||
341 | ||||
342 | void | |||
343 | purg_lnk(ARCHD *arcn) | |||
344 | { | |||
345 | HRDLNK *pt; | |||
346 | HRDLNK **ppt; | |||
347 | u_int indx; | |||
348 | ||||
349 | if (ltab == NULL((void *)0)) | |||
350 | return; | |||
351 | /* | |||
352 | * do not bother to look if it could not be in the database | |||
353 | */ | |||
354 | if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR1) || | |||
355 | PAX_IS_HARDLINK(arcn->type)((arcn->type) == 8 || (arcn->type) == 9)) | |||
356 | return; | |||
357 | ||||
358 | /* | |||
359 | * find the hash chain for this inode value, if empty return | |||
360 | */ | |||
361 | indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ2503; | |||
362 | if ((pt = ltab[indx]) == NULL((void *)0)) | |||
363 | return; | |||
364 | ||||
365 | /* | |||
366 | * walk down the list looking for the inode/dev pair, unlink and | |||
367 | * free if found | |||
368 | */ | |||
369 | ppt = &(ltab[indx]); | |||
370 | while (pt != NULL((void *)0)) { | |||
371 | if ((pt->ino == arcn->sb.st_ino) && | |||
372 | (pt->dev == arcn->sb.st_dev)) | |||
373 | break; | |||
374 | ppt = &(pt->fow); | |||
375 | pt = pt->fow; | |||
376 | } | |||
377 | if (pt == NULL((void *)0)) | |||
378 | return; | |||
379 | ||||
380 | /* | |||
381 | * remove and free it | |||
382 | */ | |||
383 | *ppt = pt->fow; | |||
384 | free(pt->name); | |||
385 | free(pt); | |||
386 | } | |||
387 | ||||
388 | /* | |||
389 | * lnk_end() | |||
390 | * pull apart a existing link table so we can reuse it. We do this between | |||
391 | * read and write phases of append with update. (The format may have | |||
392 | * used the link table, and we need to start with a fresh table for the | |||
393 | * write phase | |||
394 | */ | |||
395 | ||||
396 | void | |||
397 | lnk_end(void) | |||
398 | { | |||
399 | int i; | |||
400 | HRDLNK *pt; | |||
401 | HRDLNK *ppt; | |||
402 | ||||
403 | if (ltab == NULL((void *)0)) | |||
404 | return; | |||
405 | ||||
406 | for (i = 0; i < L_TAB_SZ2503; ++i) { | |||
407 | if (ltab[i] == NULL((void *)0)) | |||
408 | continue; | |||
409 | pt = ltab[i]; | |||
410 | ltab[i] = NULL((void *)0); | |||
411 | ||||
412 | /* | |||
413 | * free up each entry on this chain | |||
414 | */ | |||
415 | while (pt != NULL((void *)0)) { | |||
416 | ppt = pt; | |||
417 | pt = ppt->fow; | |||
418 | free(ppt->name); | |||
419 | free(ppt); | |||
420 | } | |||
421 | } | |||
422 | } | |||
423 | ||||
424 | /* | |||
425 | * modification time table routines | |||
426 | * | |||
427 | * The modification time table keeps track of last modification times for all | |||
428 | * files stored in an archive during a write phase when -u is set. We only | |||
429 | * add a file to the archive if it is newer than a file with the same name | |||
430 | * already stored on the archive (if there is no other file with the same | |||
431 | * name on the archive it is added). This applies to writes and appends. | |||
432 | * An append with an -u must read the archive and store the modification time | |||
433 | * for every file on that archive before starting the write phase. It is clear | |||
434 | * that this is one HUGE database. To save memory space, the actual file names | |||
435 | * are stored in a scratch file and indexed by an in-memory hash table. The | |||
436 | * hash table is indexed by hashing the file path. The nodes in the table store | |||
437 | * the length of the filename and the lseek offset within the scratch file | |||
438 | * where the actual name is stored. Since there are never any deletions from | |||
439 | * this table, fragmentation of the scratch file is never a issue. Lookups | |||
440 | * seem to not exhibit any locality at all (files in the database are rarely | |||
441 | * looked up more than once...), so caching is just a waste of memory. The | |||
442 | * only limitation is the amount of scratch file space available to store the | |||
443 | * path names. | |||
444 | */ | |||
445 | ||||
446 | /* | |||
447 | * ftime_start() | |||
448 | * create the file time hash table and open for read/write the scratch | |||
449 | * file. (after created it is unlinked, so when we exit we leave | |||
450 | * no witnesses). | |||
451 | * Return: | |||
452 | * 0 if the table and file was created ok, -1 otherwise | |||
453 | */ | |||
454 | ||||
455 | int | |||
456 | ftime_start(void) | |||
457 | { | |||
458 | ||||
459 | if (ftab != NULL((void *)0)) | |||
460 | return(0); | |||
461 | if ((ftab = calloc(F_TAB_SZ50503, sizeof(FTM *))) == NULL((void *)0)) { | |||
462 | paxwarn(1, "Cannot allocate memory for file time table"); | |||
463 | return(-1); | |||
464 | } | |||
465 | ||||
466 | /* | |||
467 | * get random name and create temporary scratch file, unlink name | |||
468 | * so it will get removed on exit | |||
469 | */ | |||
470 | memcpy(tempbase, _TFILE_BASE"paxXXXXXXXXXX", sizeof(_TFILE_BASE"paxXXXXXXXXXX")); | |||
471 | if ((ffd = mkstemp(tempfile)) == -1) { | |||
472 | syswarn(1, errno(*__errno()), "Unable to create temporary file: %s", | |||
473 | tempfile); | |||
474 | return(-1); | |||
475 | } | |||
476 | (void)unlink(tempfile); | |||
477 | ||||
478 | return(0); | |||
479 | } | |||
480 | ||||
481 | /* | |||
482 | * chk_ftime() | |||
483 | * looks up entry in file time hash table. If not found, the file is | |||
484 | * added to the hash table and the file named stored in the scratch file. | |||
485 | * If a file with the same name is found, the file times are compared and | |||
486 | * the most recent file time is retained. If the new file was younger (or | |||
487 | * was not in the database) the new file is selected for storage. | |||
488 | * Return: | |||
489 | * 0 if file should be added to the archive, 1 if it should be skipped, | |||
490 | * -1 on error | |||
491 | */ | |||
492 | ||||
493 | int | |||
494 | chk_ftime(ARCHD *arcn) | |||
495 | { | |||
496 | FTM *pt; | |||
497 | int namelen; | |||
498 | u_int indx; | |||
499 | char ckname[PAXPATHLEN3072+1]; | |||
500 | ||||
501 | /* | |||
502 | * no info, go ahead and add to archive | |||
503 | */ | |||
504 | if (ftab == NULL((void *)0)) | |||
505 | return(0); | |||
506 | ||||
507 | /* | |||
508 | * hash the pathname and look up in table | |||
509 | */ | |||
510 | namelen = arcn->nlen; | |||
511 | indx = st_hash(arcn->name, namelen, F_TAB_SZ50503); | |||
512 | if ((pt = ftab[indx]) != NULL((void *)0)) { | |||
513 | /* | |||
514 | * the hash chain is not empty, walk down looking for match | |||
515 | * only read up the path names if the lengths match, speeds | |||
516 | * up the search a lot | |||
517 | */ | |||
518 | while (pt != NULL((void *)0)) { | |||
519 | if (pt->namelen == namelen) { | |||
520 | /* | |||
521 | * potential match, have to read the name | |||
522 | * from the scratch file. | |||
523 | */ | |||
524 | if (lseek(ffd,pt->seek,SEEK_SET0) != pt->seek) { | |||
525 | syswarn(1, errno(*__errno()), | |||
526 | "Failed ftime table seek"); | |||
527 | return(-1); | |||
528 | } | |||
529 | if (read(ffd, ckname, namelen) != namelen) { | |||
530 | syswarn(1, errno(*__errno()), | |||
531 | "Failed ftime table read"); | |||
532 | return(-1); | |||
533 | } | |||
534 | ||||
535 | /* | |||
536 | * if the names match, we are done | |||
537 | */ | |||
538 | if (!strncmp(ckname, arcn->name, namelen)) | |||
539 | break; | |||
540 | } | |||
541 | ||||
542 | /* | |||
543 | * try the next entry on the chain | |||
544 | */ | |||
545 | pt = pt->fow; | |||
546 | } | |||
547 | ||||
548 | if (pt != NULL((void *)0)) { | |||
549 | /* | |||
550 | * found the file, compare the times, save the newer | |||
551 | */ | |||
552 | if (timespeccmp(&arcn->sb.st_mtim, &pt->mtim, >)(((&arcn->sb.st_mtim)->tv_sec == (&pt->mtim) ->tv_sec) ? ((&arcn->sb.st_mtim)->tv_nsec > ( &pt->mtim)->tv_nsec) : ((&arcn->sb.st_mtim)-> tv_sec > (&pt->mtim)->tv_sec))) { | |||
553 | /* | |||
554 | * file is newer | |||
555 | */ | |||
556 | pt->mtim = arcn->sb.st_mtim; | |||
557 | return(0); | |||
558 | } | |||
559 | /* | |||
560 | * file is older | |||
561 | */ | |||
562 | return(1); | |||
563 | } | |||
564 | } | |||
565 | ||||
566 | /* | |||
567 | * not in table, add it | |||
568 | */ | |||
569 | if ((pt = malloc(sizeof(FTM))) != NULL((void *)0)) { | |||
570 | /* | |||
571 | * add the name at the end of the scratch file, saving the | |||
572 | * offset. add the file to the head of the hash chain | |||
573 | */ | |||
574 | if ((pt->seek = lseek(ffd, 0, SEEK_END2)) >= 0) { | |||
575 | if (write(ffd, arcn->name, namelen) == namelen) { | |||
576 | pt->mtim = arcn->sb.st_mtim; | |||
577 | pt->namelen = namelen; | |||
578 | pt->fow = ftab[indx]; | |||
579 | ftab[indx] = pt; | |||
580 | return(0); | |||
581 | } | |||
582 | syswarn(1, errno(*__errno()), "Failed write to file time table"); | |||
583 | } else | |||
584 | syswarn(1, errno(*__errno()), "Failed seek on file time table"); | |||
585 | } else | |||
586 | paxwarn(1, "File time table ran out of memory"); | |||
587 | ||||
588 | if (pt != NULL((void *)0)) | |||
589 | free(pt); | |||
590 | return(-1); | |||
591 | } | |||
592 | ||||
593 | /* | |||
594 | * escaping (absolute or w/"..") symlink table routines | |||
595 | * | |||
596 | * By default, an archive shouldn't be able extract to outside of the | |||
597 | * current directory. What should we do if the archive contains a symlink | |||
598 | * whose value is either absolute or contains ".." components? What we'll | |||
599 | * do is initially create the path as an empty file (to block attempts to | |||
600 | * reference _through_ it) and instead record its path and desired | |||
601 | * final value and mode. Then once all the other archive | |||
602 | * members are created (but before the pass to set timestamps on | |||
603 | * directories) we'll process those records, replacing the placeholder with | |||
604 | * the correct symlink and setting them to the correct mode, owner, group, | |||
605 | * and timestamps. | |||
606 | * | |||
607 | * Note: we also need to handle hardlinks to symlinks (barf) as well as | |||
608 | * hardlinks whose target is replaced by a later entry in the archive (barf^2). | |||
609 | * | |||
610 | * So we track things by dev+ino of the placeholder file, associating with | |||
611 | * that the value and mode of the final symlink and a list of paths that | |||
612 | * should all be hardlinks of that. We'll 'store' the symlink's desired | |||
613 | * timestamps, owner, and group by setting them on the placeholder file. | |||
614 | * | |||
615 | * The operations are: | |||
616 | * a) create an escaping symlink: create the placeholder file and add an entry | |||
617 | * for the new link | |||
618 | * b) create a hardlink: do the link. If the target turns out to be a | |||
619 | * zero-length file whose dev+ino are in the symlink table, then add this | |||
620 | * path to the list of names for that link | |||
621 | * c) perform deferred processing: for each entry, check each associated path: | |||
622 | * if it's a zero-length file with the correct dev+ino then recreate it as | |||
623 | * the specified symlink or hardlink to the first such | |||
624 | */ | |||
625 | ||||
626 | struct slpath { | |||
627 | char *sp_path; | |||
628 | struct slpath *sp_next; | |||
629 | }; | |||
630 | struct slinode { | |||
631 | ino_t sli_ino; | |||
632 | char *sli_value; | |||
633 | struct slpath sli_paths; | |||
634 | struct slinode *sli_fow; /* hash table chain */ | |||
635 | dev_t sli_dev; | |||
636 | mode_t sli_mode; | |||
637 | }; | |||
638 | ||||
639 | static struct slinode **slitab = NULL((void *)0); | |||
640 | ||||
641 | /* | |||
642 | * sltab_start() | |||
643 | * create the hash table | |||
644 | * Return: | |||
645 | * 0 if the table and file was created ok, -1 otherwise | |||
646 | */ | |||
647 | ||||
648 | int | |||
649 | sltab_start(void) | |||
650 | { | |||
651 | ||||
652 | if ((slitab = calloc(SL_TAB_SZ317, sizeof *slitab)) == NULL((void *)0)) { | |||
653 | syswarn(1, errno(*__errno()), "symlink table"); | |||
654 | return(-1); | |||
655 | } | |||
656 | ||||
657 | return(0); | |||
658 | } | |||
659 | ||||
660 | /* | |||
661 | * sltab_add_sym() | |||
662 | * Create the placeholder and tracking info for an escaping symlink. | |||
663 | * Return: | |||
664 | * 0 on success, -1 otherwise | |||
665 | */ | |||
666 | ||||
667 | int | |||
668 | sltab_add_sym(const char *path0, const char *value0, mode_t mode) | |||
669 | { | |||
670 | struct stat sb; | |||
671 | struct slinode *s; | |||
672 | struct slpath *p; | |||
673 | char *path, *value; | |||
674 | u_int indx; | |||
675 | int fd; | |||
676 | ||||
677 | /* create the placeholder */ | |||
678 | fd = open(path0, O_WRONLY0x0001 | O_CREAT0x0200 | O_EXCL0x0800 | O_CLOEXEC0x10000, 0600); | |||
679 | if (fd == -1) | |||
680 | return (-1); | |||
681 | if (fstat(fd, &sb) == -1) { | |||
682 | unlink(path0); | |||
683 | close(fd); | |||
684 | return (-1); | |||
685 | } | |||
686 | close(fd); | |||
687 | ||||
688 | if (havechd && *path0 != '/') { | |||
689 | if ((path = realpath(path0, NULL((void *)0))) == NULL((void *)0)) { | |||
690 | syswarn(1, errno(*__errno()), "Cannot canonicalize %s", path0); | |||
691 | unlink(path0); | |||
692 | return (-1); | |||
693 | } | |||
694 | } else if ((path = strdup(path0)) == NULL((void *)0)) { | |||
695 | syswarn(1, errno(*__errno()), "defered symlink path"); | |||
696 | unlink(path0); | |||
697 | return (-1); | |||
698 | } | |||
699 | if ((value = strdup(value0)) == NULL((void *)0)) { | |||
700 | syswarn(1, errno(*__errno()), "defered symlink value"); | |||
701 | unlink(path); | |||
702 | free(path); | |||
703 | return (-1); | |||
704 | } | |||
705 | ||||
706 | /* now check the hash table for conflicting entry */ | |||
707 | indx = (sb.st_ino ^ sb.st_dev) % SL_TAB_SZ317; | |||
708 | for (s = slitab[indx]; s != NULL((void *)0); s = s->sli_fow) { | |||
709 | if (s->sli_ino != sb.st_ino || s->sli_dev != sb.st_dev) | |||
710 | continue; | |||
711 | ||||
712 | /* | |||
713 | * One of our placeholders got removed behind our back and | |||
714 | * we've reused the inode. Weird, but clean up the mess. | |||
715 | */ | |||
716 | free(s->sli_value); | |||
717 | free(s->sli_paths.sp_path); | |||
718 | p = s->sli_paths.sp_next; | |||
719 | while (p != NULL((void *)0)) { | |||
720 | struct slpath *next_p = p->sp_next; | |||
721 | ||||
722 | free(p->sp_path); | |||
723 | free(p); | |||
724 | p = next_p; | |||
725 | } | |||
726 | goto set_value; | |||
727 | } | |||
728 | ||||
729 | /* Normal case: create a new node */ | |||
730 | if ((s = malloc(sizeof *s)) == NULL((void *)0)) { | |||
731 | syswarn(1, errno(*__errno()), "defered symlink"); | |||
732 | unlink(path); | |||
733 | free(path); | |||
734 | free(value); | |||
735 | return (-1); | |||
736 | } | |||
737 | s->sli_ino = sb.st_ino; | |||
738 | s->sli_dev = sb.st_dev; | |||
739 | s->sli_fow = slitab[indx]; | |||
740 | slitab[indx] = s; | |||
741 | ||||
742 | set_value: | |||
743 | s->sli_paths.sp_path = path; | |||
744 | s->sli_paths.sp_next = NULL((void *)0); | |||
745 | s->sli_value = value; | |||
746 | s->sli_mode = mode; | |||
747 | return (0); | |||
748 | } | |||
749 | ||||
750 | /* | |||
751 | * sltab_add_link() | |||
752 | * A hardlink was created; if it looks like a placeholder, handle the | |||
753 | * tracking. | |||
754 | * Return: | |||
755 | * 0 if things are ok, -1 if something went wrong | |||
756 | */ | |||
757 | ||||
758 | int | |||
759 | sltab_add_link(const char *path, const struct stat *sb) | |||
760 | { | |||
761 | struct slinode *s; | |||
762 | struct slpath *p; | |||
763 | u_int indx; | |||
764 | ||||
765 | if (!S_ISREG(sb->st_mode)((sb->st_mode & 0170000) == 0100000) || sb->st_size != 0) | |||
766 | return (1); | |||
767 | ||||
768 | /* find the hash table entry for this hardlink */ | |||
769 | indx = (sb->st_ino ^ sb->st_dev) % SL_TAB_SZ317; | |||
770 | for (s = slitab[indx]; s != NULL((void *)0); s = s->sli_fow) { | |||
771 | if (s->sli_ino != sb->st_ino || s->sli_dev != sb->st_dev) | |||
772 | continue; | |||
773 | ||||
774 | if ((p = malloc(sizeof *p)) == NULL((void *)0)) { | |||
775 | syswarn(1, errno(*__errno()), "deferred symlink hardlink"); | |||
776 | return (-1); | |||
777 | } | |||
778 | if (havechd && *path != '/') { | |||
779 | if ((p->sp_path = realpath(path, NULL((void *)0))) == NULL((void *)0)) { | |||
780 | syswarn(1, errno(*__errno()), "Cannot canonicalize %s", | |||
781 | path); | |||
782 | free(p); | |||
783 | return (-1); | |||
784 | } | |||
785 | } else if ((p->sp_path = strdup(path)) == NULL((void *)0)) { | |||
786 | syswarn(1, errno(*__errno()), "defered symlink hardlink path"); | |||
787 | free(p); | |||
788 | return (-1); | |||
789 | } | |||
790 | ||||
791 | /* link it in */ | |||
792 | p->sp_next = s->sli_paths.sp_next; | |||
793 | s->sli_paths.sp_next = p; | |||
794 | return (0); | |||
795 | } | |||
796 | ||||
797 | /* not found */ | |||
798 | return (1); | |||
799 | } | |||
800 | ||||
801 | ||||
802 | static int | |||
803 | sltab_process_one(struct slinode *s, struct slpath *p, const char *first, | |||
804 | int in_sig) | |||
805 | { | |||
806 | struct stat sb; | |||
807 | char *path = p->sp_path; | |||
808 | mode_t mode; | |||
809 | int err; | |||
810 | ||||
811 | /* | |||
812 | * is it the expected placeholder? This can fail legimately | |||
813 | * if the archive overwrote the link with another, later entry, | |||
814 | * so don't warn. | |||
815 | */ | |||
816 | if (stat(path, &sb) != 0 || !S_ISREG(sb.st_mode)((sb.st_mode & 0170000) == 0100000) || sb.st_size != 0 || | |||
817 | sb.st_ino != s->sli_ino || sb.st_dev != s->sli_dev) | |||
818 | return (0); | |||
819 | ||||
820 | if (unlink(path) && errno(*__errno()) != ENOENT2) { | |||
821 | if (!in_sig) | |||
822 | syswarn(1, errno(*__errno()), "deferred symlink removal"); | |||
823 | return (0); | |||
824 | } | |||
825 | ||||
826 | err = 0; | |||
827 | if (first != NULL((void *)0)) { | |||
828 | /* add another hardlink to the existing symlink */ | |||
829 | if (linkat(AT_FDCWD-100, first, AT_FDCWD-100, path, 0) == 0) | |||
830 | return (0); | |||
831 | ||||
832 | /* | |||
833 | * Couldn't hardlink the symlink for some reason, so we'll | |||
834 | * try creating it as its own symlink, but save the error | |||
835 | * for reporting if that fails. | |||
836 | */ | |||
837 | err = errno(*__errno()); | |||
838 | } | |||
839 | ||||
840 | if (symlink(s->sli_value, path)) { | |||
841 | if (!in_sig) { | |||
842 | const char *qualifier = ""; | |||
843 | if (err) | |||
844 | qualifier = " hardlink"; | |||
845 | else | |||
846 | err = errno(*__errno()); | |||
847 | ||||
848 | syswarn(1, err, "deferred symlink%s: %s", | |||
849 | qualifier, path); | |||
850 | } | |||
851 | return (0); | |||
852 | } | |||
853 | ||||
854 | /* success, so set the id, mode, and times */ | |||
855 | mode = s->sli_mode; | |||
856 | if (pids) { | |||
857 | /* if can't set the ids, force the set[ug]id bits off */ | |||
858 | if (set_ids(path, sb.st_uid, sb.st_gid)) | |||
859 | mode &= ~(SETBITS(0004000 | 0002000)); | |||
860 | } | |||
861 | ||||
862 | if (pmode) | |||
863 | set_pmode(path, mode); | |||
864 | ||||
865 | if (patime || pmtime) | |||
866 | set_ftime(path, &sb.st_mtim, &sb.st_atim, 0); | |||
867 | ||||
868 | /* | |||
869 | * If we tried to link to first but failed, then this new symlink | |||
870 | * might be a better one to try in the future. Guess from the errno. | |||
871 | */ | |||
872 | if (err == 0 || err == ENOENT2 || err == EMLINK31 || err == EOPNOTSUPP45) | |||
873 | return (1); | |||
874 | return (0); | |||
875 | } | |||
876 | ||||
877 | /* | |||
878 | * sltab_process() | |||
879 | * Do all the delayed process for escape symlinks | |||
880 | */ | |||
881 | ||||
882 | void | |||
883 | sltab_process(int in_sig) | |||
884 | { | |||
885 | struct slinode *s; | |||
886 | struct slpath *p; | |||
887 | char *first; | |||
888 | u_int indx; | |||
889 | ||||
890 | if (slitab == NULL((void *)0)) | |||
891 | return; | |||
892 | ||||
893 | /* walk across the entire hash table */ | |||
894 | for (indx = 0; indx < SL_TAB_SZ317; indx++) { | |||
895 | while ((s = slitab[indx]) != NULL((void *)0)) { | |||
896 | /* pop this entry */ | |||
897 | slitab[indx] = s->sli_fow; | |||
898 | ||||
899 | first = NULL((void *)0); | |||
900 | p = &s->sli_paths; | |||
901 | while (1) { | |||
902 | struct slpath *next_p; | |||
903 | ||||
904 | if (sltab_process_one(s, p, first, in_sig)) { | |||
905 | if (!in_sig) | |||
906 | free(first); | |||
907 | first = p->sp_path; | |||
908 | } else if (!in_sig) | |||
909 | free(p->sp_path); | |||
910 | ||||
911 | if ((next_p = p->sp_next) == NULL((void *)0)) | |||
912 | break; | |||
913 | *p = *next_p; | |||
914 | if (!in_sig) | |||
915 | free(next_p); | |||
916 | } | |||
917 | if (!in_sig) { | |||
918 | free(first); | |||
919 | free(s->sli_value); | |||
920 | free(s); | |||
921 | } | |||
922 | } | |||
923 | } | |||
924 | if (!in_sig) | |||
925 | free(slitab); | |||
926 | slitab = NULL((void *)0); | |||
927 | } | |||
928 | ||||
929 | ||||
930 | /* | |||
931 | * Interactive rename table routines | |||
932 | * | |||
933 | * The interactive rename table keeps track of the new names that the user | |||
934 | * assigns to files from tty input. Since this map is unique for each file | |||
935 | * we must store it in case there is a reference to the file later in archive | |||
936 | * (a link). Otherwise we will be unable to find the file we know was | |||
937 | * extracted. The remapping of these files is stored in a memory based hash | |||
938 | * table (it is assumed since input must come from /dev/tty, it is unlikely to | |||
939 | * be a very large table). | |||
940 | */ | |||
941 | ||||
942 | /* | |||
943 | * name_start() | |||
944 | * create the interactive rename table | |||
945 | * Return: | |||
946 | * 0 if successful, -1 otherwise | |||
947 | */ | |||
948 | ||||
949 | int | |||
950 | name_start(void) | |||
951 | { | |||
952 | if (ntab != NULL((void *)0)) | |||
953 | return(0); | |||
954 | if ((ntab = calloc(N_TAB_SZ541, sizeof(NAMT *))) == NULL((void *)0)) { | |||
955 | paxwarn(1, "Cannot allocate memory for interactive rename table"); | |||
956 | return(-1); | |||
957 | } | |||
958 | return(0); | |||
959 | } | |||
960 | ||||
961 | /* | |||
962 | * add_name() | |||
963 | * add the new name to old name mapping just created by the user. | |||
964 | * If an old name mapping is found (there may be duplicate names on an | |||
965 | * archive) only the most recent is kept. | |||
966 | * Return: | |||
967 | * 0 if added, -1 otherwise | |||
968 | */ | |||
969 | ||||
970 | int | |||
971 | add_name(char *oname, int onamelen, char *nname) | |||
972 | { | |||
973 | NAMT *pt; | |||
974 | u_int indx; | |||
975 | ||||
976 | if (ntab == NULL((void *)0)) { | |||
977 | /* | |||
978 | * should never happen | |||
979 | */ | |||
980 | paxwarn(0, "No interactive rename table, links may fail"); | |||
981 | return(0); | |||
982 | } | |||
983 | ||||
984 | /* | |||
985 | * look to see if we have already mapped this file, if so we | |||
986 | * will update it | |||
987 | */ | |||
988 | indx = st_hash(oname, onamelen, N_TAB_SZ541); | |||
989 | if ((pt = ntab[indx]) != NULL((void *)0)) { | |||
990 | /* | |||
991 | * look down the has chain for the file | |||
992 | */ | |||
993 | while ((pt != NULL((void *)0)) && (strcmp(oname, pt->oname) != 0)) | |||
994 | pt = pt->fow; | |||
995 | ||||
996 | if (pt != NULL((void *)0)) { | |||
997 | /* | |||
998 | * found an old mapping, replace it with the new one | |||
999 | * the user just input (if it is different) | |||
1000 | */ | |||
1001 | if (strcmp(nname, pt->nname) == 0) | |||
1002 | return(0); | |||
1003 | ||||
1004 | free(pt->nname); | |||
1005 | if ((pt->nname = strdup(nname)) == NULL((void *)0)) { | |||
1006 | paxwarn(1, "Cannot update rename table"); | |||
1007 | return(-1); | |||
1008 | } | |||
1009 | return(0); | |||
1010 | } | |||
1011 | } | |||
1012 | ||||
1013 | /* | |||
1014 | * this is a new mapping, add it to the table | |||
1015 | */ | |||
1016 | if ((pt = malloc(sizeof(NAMT))) != NULL((void *)0)) { | |||
1017 | if ((pt->oname = strdup(oname)) != NULL((void *)0)) { | |||
1018 | if ((pt->nname = strdup(nname)) != NULL((void *)0)) { | |||
1019 | pt->fow = ntab[indx]; | |||
1020 | ntab[indx] = pt; | |||
1021 | return(0); | |||
1022 | } | |||
1023 | free(pt->oname); | |||
1024 | } | |||
1025 | free(pt); | |||
1026 | } | |||
1027 | paxwarn(1, "Interactive rename table out of memory"); | |||
1028 | return(-1); | |||
1029 | } | |||
1030 | ||||
1031 | /* | |||
1032 | * sub_name() | |||
1033 | * look up a link name to see if it points at a file that has been | |||
1034 | * remapped by the user. If found, the link is adjusted to contain the | |||
1035 | * new name (oname is the link to name) | |||
1036 | */ | |||
1037 | ||||
1038 | void | |||
1039 | sub_name(char *oname, int *onamelen, int onamesize) | |||
1040 | { | |||
1041 | NAMT *pt; | |||
1042 | u_int indx; | |||
1043 | ||||
1044 | if (ntab == NULL((void *)0)) | |||
| ||||
1045 | return; | |||
1046 | /* | |||
1047 | * look the name up in the hash table | |||
1048 | */ | |||
1049 | indx = st_hash(oname, *onamelen, N_TAB_SZ541); | |||
1050 | if ((pt = ntab[indx]) == NULL((void *)0)) | |||
1051 | return; | |||
1052 | ||||
1053 | while (pt != NULL((void *)0)) { | |||
1054 | /* | |||
1055 | * walk down the hash chain looking for a match | |||
1056 | */ | |||
1057 | if (strcmp(oname, pt->oname) == 0) { | |||
1058 | /* | |||
1059 | * found it, replace it with the new name | |||
1060 | * and return (we know that oname has enough space) | |||
1061 | */ | |||
1062 | *onamelen = strlcpy(oname, pt->nname, onamesize); | |||
1063 | if (*onamelen >= onamesize) | |||
1064 | *onamelen = onamesize - 1; /* XXX truncate? */ | |||
1065 | return; | |||
1066 | } | |||
1067 | pt = pt->fow; | |||
1068 | } | |||
1069 | ||||
1070 | /* | |||
1071 | * no match, just return | |||
1072 | */ | |||
1073 | } | |||
1074 | ||||
1075 | #ifndef NOCPIO | |||
1076 | /* | |||
1077 | * device/inode mapping table routines | |||
1078 | * (used with formats that store device and inodes fields) | |||
1079 | * | |||
1080 | * device/inode mapping tables remap the device field in a archive header. The | |||
1081 | * device/inode fields are used to determine when files are hard links to each | |||
1082 | * other. However these values have very little meaning outside of that. This | |||
1083 | * database is used to solve one of two different problems. | |||
1084 | * | |||
1085 | * 1) when files are appended to an archive, while the new files may have hard | |||
1086 | * links to each other, you cannot determine if they have hard links to any | |||
1087 | * file already stored on the archive from a prior run of pax. We must assume | |||
1088 | * that these inode/device pairs are unique only within a SINGLE run of pax | |||
1089 | * (which adds a set of files to an archive). So we have to make sure the | |||
1090 | * inode/dev pairs we add each time are always unique. We do this by observing | |||
1091 | * while the inode field is very dense, the use of the dev field is fairly | |||
1092 | * sparse. Within each run of pax, we remap any device number of a new archive | |||
1093 | * member that has a device number used in a prior run and already stored in a | |||
1094 | * file on the archive. During the read phase of the append, we store the | |||
1095 | * device numbers used and mark them to not be used by any file during the | |||
1096 | * write phase. If during write we go to use one of those old device numbers, | |||
1097 | * we remap it to a new value. | |||
1098 | * | |||
1099 | * 2) Often the fields in the archive header used to store these values are | |||
1100 | * too small to store the entire value. The result is an inode or device value | |||
1101 | * which can be truncated. This really can foul up an archive. With truncation | |||
1102 | * we end up creating links between files that are really not links (after | |||
1103 | * truncation the inodes are the same value). We address that by detecting | |||
1104 | * truncation and forcing a remap of the device field to split truncated | |||
1105 | * inodes away from each other. Each truncation creates a pattern of bits that | |||
1106 | * are removed. We use this pattern of truncated bits to partition the inodes | |||
1107 | * on a single device to many different devices (each one represented by the | |||
1108 | * truncated bit pattern). All inodes on the same device that have the same | |||
1109 | * truncation pattern are mapped to the same new device. Two inodes that | |||
1110 | * truncate to the same value clearly will always have different truncation | |||
1111 | * bit patterns, so they will be split from away each other. When we spot | |||
1112 | * device truncation we remap the device number to a non truncated value. | |||
1113 | * (for more info see table.h for the data structures involved). | |||
1114 | */ | |||
1115 | ||||
1116 | static DEVT *chk_dev(dev_t, int); | |||
1117 | ||||
1118 | /* | |||
1119 | * dev_start() | |||
1120 | * create the device mapping table | |||
1121 | * Return: | |||
1122 | * 0 if successful, -1 otherwise | |||
1123 | */ | |||
1124 | ||||
1125 | int | |||
1126 | dev_start(void) | |||
1127 | { | |||
1128 | if (dtab != NULL((void *)0)) | |||
1129 | return(0); | |||
1130 | if ((dtab = calloc(D_TAB_SZ317, sizeof(DEVT *))) == NULL((void *)0)) { | |||
1131 | paxwarn(1, "Cannot allocate memory for device mapping table"); | |||
1132 | return(-1); | |||
1133 | } | |||
1134 | return(0); | |||
1135 | } | |||
1136 | ||||
1137 | /* | |||
1138 | * add_dev() | |||
1139 | * add a device number to the table. this will force the device to be | |||
1140 | * remapped to a new value if it be used during a write phase. This | |||
1141 | * function is called during the read phase of an append to prohibit the | |||
1142 | * use of any device number already in the archive. | |||
1143 | * Return: | |||
1144 | * 0 if added ok, -1 otherwise | |||
1145 | */ | |||
1146 | ||||
1147 | int | |||
1148 | add_dev(ARCHD *arcn) | |||
1149 | { | |||
1150 | if (chk_dev(arcn->sb.st_dev, 1) == NULL((void *)0)) | |||
1151 | return(-1); | |||
1152 | return(0); | |||
1153 | } | |||
1154 | ||||
1155 | /* | |||
1156 | * chk_dev() | |||
1157 | * check for a device value in the device table. If not found and the add | |||
1158 | * flag is set, it is added. This does NOT assign any mapping values, just | |||
1159 | * adds the device number as one that need to be remapped. If this device | |||
1160 | * is already mapped, just return with a pointer to that entry. | |||
1161 | * Return: | |||
1162 | * pointer to the entry for this device in the device map table. Null | |||
1163 | * if the add flag is not set and the device is not in the table (it is | |||
1164 | * not been seen yet). If add is set and the device cannot be added, null | |||
1165 | * is returned (indicates an error). | |||
1166 | */ | |||
1167 | ||||
1168 | static DEVT * | |||
1169 | chk_dev(dev_t dev, int add) | |||
1170 | { | |||
1171 | DEVT *pt; | |||
1172 | u_int indx; | |||
1173 | ||||
1174 | if (dtab == NULL((void *)0)) | |||
1175 | return(NULL((void *)0)); | |||
1176 | /* | |||
1177 | * look to see if this device is already in the table | |||
1178 | */ | |||
1179 | indx = ((unsigned)dev) % D_TAB_SZ317; | |||
1180 | if ((pt = dtab[indx]) != NULL((void *)0)) { | |||
1181 | while ((pt != NULL((void *)0)) && (pt->dev != dev)) | |||
1182 | pt = pt->fow; | |||
1183 | ||||
1184 | /* | |||
1185 | * found it, return a pointer to it | |||
1186 | */ | |||
1187 | if (pt != NULL((void *)0)) | |||
1188 | return(pt); | |||
1189 | } | |||
1190 | ||||
1191 | /* | |||
1192 | * not in table, we add it only if told to as this may just be a check | |||
1193 | * to see if a device number is being used. | |||
1194 | */ | |||
1195 | if (add == 0) | |||
1196 | return(NULL((void *)0)); | |||
1197 | ||||
1198 | /* | |||
1199 | * allocate a node for this device and add it to the front of the hash | |||
1200 | * chain. Note we do not assign remaps values here, so the pt->list | |||
1201 | * list must be NULL. | |||
1202 | */ | |||
1203 | if ((pt = malloc(sizeof(DEVT))) == NULL((void *)0)) { | |||
1204 | paxwarn(1, "Device map table out of memory"); | |||
1205 | return(NULL((void *)0)); | |||
1206 | } | |||
1207 | pt->dev = dev; | |||
1208 | pt->list = NULL((void *)0); | |||
1209 | pt->fow = dtab[indx]; | |||
1210 | dtab[indx] = pt; | |||
1211 | return(pt); | |||
1212 | } | |||
1213 | /* | |||
1214 | * map_dev() | |||
1215 | * given an inode and device storage mask (the mask has a 1 for each bit | |||
1216 | * the archive format is able to store in a header), we check for inode | |||
1217 | * and device truncation and remap the device as required. Device mapping | |||
1218 | * can also occur when during the read phase of append a device number was | |||
1219 | * seen (and was marked as do not use during the write phase). WE ASSUME | |||
1220 | * that unsigned longs are the same size or bigger than the fields used | |||
1221 | * for ino_t and dev_t. If not the types will have to be changed. | |||
1222 | * Return: | |||
1223 | * 0 if all ok, -1 otherwise. | |||
1224 | */ | |||
1225 | ||||
1226 | int | |||
1227 | map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask) | |||
1228 | { | |||
1229 | DEVT *pt; | |||
1230 | DLIST *dpt; | |||
1231 | static dev_t lastdev = 0; /* next device number to try */ | |||
1232 | int trc_ino = 0; | |||
1233 | int trc_dev = 0; | |||
1234 | ino_t trunc_bits = 0; | |||
1235 | ino_t nino; | |||
1236 | ||||
1237 | if (dtab == NULL((void *)0)) | |||
1238 | return(0); | |||
1239 | /* | |||
1240 | * check for device and inode truncation, and extract the truncated | |||
1241 | * bit pattern. | |||
1242 | */ | |||
1243 | if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev) | |||
1244 | ++trc_dev; | |||
1245 | if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) { | |||
1246 | ++trc_ino; | |||
1247 | trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask); | |||
1248 | } | |||
1249 | ||||
1250 | /* | |||
1251 | * see if this device is already being mapped, look up the device | |||
1252 | * then find the truncation bit pattern which applies | |||
1253 | */ | |||
1254 | if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL((void *)0)) { | |||
1255 | /* | |||
1256 | * this device is already marked to be remapped | |||
1257 | */ | |||
1258 | for (dpt = pt->list; dpt != NULL((void *)0); dpt = dpt->fow) | |||
1259 | if (dpt->trunc_bits == trunc_bits) | |||
1260 | break; | |||
1261 | ||||
1262 | if (dpt != NULL((void *)0)) { | |||
1263 | /* | |||
1264 | * we are being remapped for this device and pattern | |||
1265 | * change the device number to be stored and return | |||
1266 | */ | |||
1267 | arcn->sb.st_dev = dpt->dev; | |||
1268 | arcn->sb.st_ino = nino; | |||
1269 | return(0); | |||
1270 | } | |||
1271 | } else { | |||
1272 | /* | |||
1273 | * this device is not being remapped YET. if we do not have any | |||
1274 | * form of truncation, we do not need a remap | |||
1275 | */ | |||
1276 | if (!trc_ino && !trc_dev) | |||
1277 | return(0); | |||
1278 | ||||
1279 | /* | |||
1280 | * we have truncation, have to add this as a device to remap | |||
1281 | */ | |||
1282 | if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL((void *)0)) | |||
1283 | goto bad; | |||
1284 | ||||
1285 | /* | |||
1286 | * if we just have a truncated inode, we have to make sure that | |||
1287 | * all future inodes that do not truncate (they have the | |||
1288 | * truncation pattern of all 0's) continue to map to the same | |||
1289 | * device number. We probably have already written inodes with | |||
1290 | * this device number to the archive with the truncation | |||
1291 | * pattern of all 0's. So we add the mapping for all 0's to the | |||
1292 | * same device number. | |||
1293 | */ | |||
1294 | if (!trc_dev && (trunc_bits != 0)) { | |||
1295 | if ((dpt = malloc(sizeof(DLIST))) == NULL((void *)0)) | |||
1296 | goto bad; | |||
1297 | dpt->trunc_bits = 0; | |||
1298 | dpt->dev = arcn->sb.st_dev; | |||
1299 | dpt->fow = pt->list; | |||
1300 | pt->list = dpt; | |||
1301 | } | |||
1302 | } | |||
1303 | ||||
1304 | /* | |||
1305 | * look for a device number not being used. We must watch for wrap | |||
1306 | * around on lastdev (so we do not get stuck looking forever!) | |||
1307 | */ | |||
1308 | while (++lastdev > 0) { | |||
1309 | if (chk_dev(lastdev, 0) != NULL((void *)0)) | |||
1310 | continue; | |||
1311 | /* | |||
1312 | * found an unused value. If we have reached truncation point | |||
1313 | * for this format we are hosed, so we give up. Otherwise we | |||
1314 | * mark it as being used. | |||
1315 | */ | |||
1316 | if (((lastdev & ((dev_t)dev_mask)) != lastdev) || | |||
1317 | (chk_dev(lastdev, 1) == NULL((void *)0))) | |||
1318 | goto bad; | |||
1319 | break; | |||
1320 | } | |||
1321 | ||||
1322 | if ((lastdev <= 0) || ((dpt = malloc(sizeof(DLIST))) == NULL((void *)0))) | |||
1323 | goto bad; | |||
1324 | ||||
1325 | /* | |||
1326 | * got a new device number, store it under this truncation pattern. | |||
1327 | * change the device number this file is being stored with. | |||
1328 | */ | |||
1329 | dpt->trunc_bits = trunc_bits; | |||
1330 | dpt->dev = lastdev; | |||
1331 | dpt->fow = pt->list; | |||
1332 | pt->list = dpt; | |||
1333 | arcn->sb.st_dev = lastdev; | |||
1334 | arcn->sb.st_ino = nino; | |||
1335 | return(0); | |||
1336 | ||||
1337 | bad: | |||
1338 | paxwarn(1, "Unable to fix truncated inode/device field when storing %s", | |||
1339 | arcn->name); | |||
1340 | paxwarn(0, "Archive may create improper hard links when extracted"); | |||
1341 | return(0); | |||
1342 | } | |||
1343 | #endif /* NOCPIO */ | |||
1344 | ||||
1345 | /* | |||
1346 | * directory access/mod time reset table routines (for directories READ by pax) | |||
1347 | * | |||
1348 | * The pax -t flag requires that access times of archive files be the same | |||
1349 | * before being read by pax. For regular files, access time is restored after | |||
1350 | * the file has been copied. This database provides the same functionality for | |||
1351 | * directories read during file tree traversal. Restoring directory access time | |||
1352 | * is more complex than files since directories may be read several times until | |||
1353 | * all the descendants in their subtree are visited by fts. Directory access | |||
1354 | * and modification times are stored during the fts pre-order visit (done | |||
1355 | * before any descendants in the subtree are visited) and restored after the | |||
1356 | * fts post-order visit (after all the descendants have been visited). In the | |||
1357 | * case of premature exit from a subtree (like from the effects of -n), any | |||
1358 | * directory entries left in this database are reset during final cleanup | |||
1359 | * operations of pax. Entries are hashed by inode number for fast lookup. | |||
1360 | */ | |||
1361 | ||||
1362 | /* | |||
1363 | * atdir_start() | |||
1364 | * create the directory access time database for directories READ by pax. | |||
1365 | * Return: | |||
1366 | * 0 is created ok, -1 otherwise. | |||
1367 | */ | |||
1368 | ||||
1369 | int | |||
1370 | atdir_start(void) | |||
1371 | { | |||
1372 | if (atab != NULL((void *)0)) | |||
1373 | return(0); | |||
1374 | if ((atab = calloc(A_TAB_SZ317, sizeof(ATDIR *))) == NULL((void *)0)) { | |||
1375 | paxwarn(1,"Cannot allocate space for directory access time table"); | |||
1376 | return(-1); | |||
1377 | } | |||
1378 | return(0); | |||
1379 | } | |||
1380 | ||||
1381 | ||||
1382 | /* | |||
1383 | * atdir_end() | |||
1384 | * walk through the directory access time table and reset the access time | |||
1385 | * of any directory who still has an entry left in the database. These | |||
1386 | * entries are for directories READ by pax | |||
1387 | */ | |||
1388 | ||||
1389 | void | |||
1390 | atdir_end(void) | |||
1391 | { | |||
1392 | ATDIR *pt; | |||
1393 | int i; | |||
1394 | ||||
1395 | if (atab == NULL((void *)0)) | |||
1396 | return; | |||
1397 | /* | |||
1398 | * for each non-empty hash table entry reset all the directories | |||
1399 | * chained there. | |||
1400 | */ | |||
1401 | for (i = 0; i < A_TAB_SZ317; ++i) { | |||
1402 | if ((pt = atab[i]) == NULL((void *)0)) | |||
1403 | continue; | |||
1404 | /* | |||
1405 | * remember to force the times, set_ftime() looks at pmtime | |||
1406 | * and patime, which only applies to things CREATED by pax, | |||
1407 | * not read by pax. Read time reset is controlled by -t. | |||
1408 | */ | |||
1409 | for (; pt != NULL((void *)0); pt = pt->fow) | |||
1410 | set_attr(&pt->ft, 1, 0, 0, 0); | |||
1411 | } | |||
1412 | } | |||
1413 | ||||
1414 | /* | |||
1415 | * add_atdir() | |||
1416 | * add a directory to the directory access time table. Table is hashed | |||
1417 | * and chained by inode number. This is for directories READ by pax | |||
1418 | */ | |||
1419 | ||||
1420 | void | |||
1421 | add_atdir(char *fname, dev_t dev, ino_t ino, const struct timespec *mtimp, | |||
1422 | const struct timespec *atimp) | |||
1423 | { | |||
1424 | ATDIR *pt; | |||
1425 | sigset_t allsigs, savedsigs; | |||
1426 | u_int indx; | |||
1427 | ||||
1428 | if (atab == NULL((void *)0)) | |||
1429 | return; | |||
1430 | ||||
1431 | /* | |||
1432 | * make sure this directory is not already in the table, if so just | |||
1433 | * return (the older entry always has the correct time). The only | |||
1434 | * way this will happen is when the same subtree can be traversed by | |||
1435 | * different args to pax and the -n option is aborting fts out of a | |||
1436 | * subtree before all the post-order visits have been made. | |||
1437 | */ | |||
1438 | indx = ((unsigned)ino) % A_TAB_SZ317; | |||
1439 | if ((pt = atab[indx]) != NULL((void *)0)) { | |||
1440 | while (pt != NULL((void *)0)) { | |||
1441 | if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) | |||
1442 | break; | |||
1443 | pt = pt->fow; | |||
1444 | } | |||
1445 | ||||
1446 | /* | |||
1447 | * oops, already there. Leave it alone. | |||
1448 | */ | |||
1449 | if (pt != NULL((void *)0)) | |||
1450 | return; | |||
1451 | } | |||
1452 | ||||
1453 | /* | |||
1454 | * add it to the front of the hash chain | |||
1455 | */ | |||
1456 | sigfillset(&allsigs); | |||
1457 | sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs); | |||
1458 | if ((pt = malloc(sizeof *pt)) != NULL((void *)0)) { | |||
1459 | if ((pt->ft.ft_name = strdup(fname)) != NULL((void *)0)) { | |||
1460 | pt->ft.ft_dev = dev; | |||
1461 | pt->ft.ft_ino = ino; | |||
1462 | pt->ft.ft_mtim = *mtimp; | |||
1463 | pt->ft.ft_atim = *atimp; | |||
1464 | pt->fow = atab[indx]; | |||
1465 | atab[indx] = pt; | |||
1466 | sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0)); | |||
1467 | return; | |||
1468 | } | |||
1469 | free(pt); | |||
1470 | } | |||
1471 | ||||
1472 | sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0)); | |||
1473 | paxwarn(1, "Directory access time reset table ran out of memory"); | |||
1474 | } | |||
1475 | ||||
1476 | /* | |||
1477 | * get_atdir() | |||
1478 | * look up a directory by inode and device number to obtain the access | |||
1479 | * and modification time you want to set to. If found, the modification | |||
1480 | * and access time parameters are set and the entry is removed from the | |||
1481 | * table (as it is no longer needed). These are for directories READ by | |||
1482 | * pax | |||
1483 | * Return: | |||
1484 | * 0 if found, -1 if not found. | |||
1485 | */ | |||
1486 | ||||
1487 | int | |||
1488 | do_atdir(const char *name, dev_t dev, ino_t ino) | |||
1489 | { | |||
1490 | ATDIR *pt; | |||
1491 | ATDIR **ppt; | |||
1492 | sigset_t allsigs, savedsigs; | |||
1493 | u_int indx; | |||
1494 | ||||
1495 | if (atab == NULL((void *)0)) | |||
1496 | return(-1); | |||
1497 | /* | |||
1498 | * hash by inode and search the chain for an inode and device match | |||
1499 | */ | |||
1500 | indx = ((unsigned)ino) % A_TAB_SZ317; | |||
1501 | if ((pt = atab[indx]) == NULL((void *)0)) | |||
1502 | return(-1); | |||
1503 | ||||
1504 | ppt = &(atab[indx]); | |||
1505 | while (pt != NULL((void *)0)) { | |||
1506 | if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev)) | |||
1507 | break; | |||
1508 | /* | |||
1509 | * no match, go to next one | |||
1510 | */ | |||
1511 | ppt = &(pt->fow); | |||
1512 | pt = pt->fow; | |||
1513 | } | |||
1514 | ||||
1515 | /* | |||
1516 | * return if we did not find it. | |||
1517 | */ | |||
1518 | if (pt == NULL((void *)0) || pt->ft.ft_name == NULL((void *)0) || | |||
1519 | strcmp(name, pt->ft.ft_name) == 0) | |||
1520 | return(-1); | |||
1521 | ||||
1522 | /* | |||
1523 | * found it. set the times and remove the entry from the table. | |||
1524 | */ | |||
1525 | set_attr(&pt->ft, 1, 0, 0, 0); | |||
1526 | sigfillset(&allsigs); | |||
1527 | sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs); | |||
1528 | *ppt = pt->fow; | |||
1529 | sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0)); | |||
1530 | free(pt->ft.ft_name); | |||
1531 | free(pt); | |||
1532 | return(0); | |||
1533 | } | |||
1534 | ||||
1535 | /* | |||
1536 | * directory access mode and time storage routines (for directories CREATED | |||
1537 | * by pax). | |||
1538 | * | |||
1539 | * Pax requires that extracted directories, by default, have their access/mod | |||
1540 | * times and permissions set to the values specified in the archive. During the | |||
1541 | * actions of extracting (and creating the destination subtree during -rw copy) | |||
1542 | * directories extracted may be modified after being created. Even worse is | |||
1543 | * that these directories may have been created with file permissions which | |||
1544 | * prohibits any descendants of these directories from being extracted. When | |||
1545 | * directories are created by pax, access rights may be added to permit the | |||
1546 | * creation of files in their subtree. Every time pax creates a directory, the | |||
1547 | * times and file permissions specified by the archive are stored. After all | |||
1548 | * files have been extracted (or copied), these directories have their times | |||
1549 | * and file modes reset to the stored values. The directory info is restored in | |||
1550 | * reverse order as entries were added from root to leaf: to restore atime | |||
1551 | * properly, we must go backwards. | |||
1552 | */ | |||
1553 | ||||
1554 | /* | |||
1555 | * dir_start() | |||
1556 | * set up the directory time and file mode storage for directories CREATED | |||
1557 | * by pax. | |||
1558 | * Return: | |||
1559 | * 0 if ok, -1 otherwise | |||
1560 | */ | |||
1561 | ||||
1562 | int | |||
1563 | dir_start(void) | |||
1564 | { | |||
1565 | if (dirp != NULL((void *)0)) | |||
1566 | return(0); | |||
1567 | ||||
1568 | dirsize = DIRP_SIZE64; | |||
1569 | if ((dirp = reallocarray(NULL((void *)0), dirsize, sizeof(DIRDATA))) == NULL((void *)0)) { | |||
1570 | paxwarn(1, "Unable to allocate memory for directory times"); | |||
1571 | return(-1); | |||
1572 | } | |||
1573 | return(0); | |||
1574 | } | |||
1575 | ||||
1576 | /* | |||
1577 | * add_dir() | |||
1578 | * add the mode and times for a newly CREATED directory | |||
1579 | * name is name of the directory, psb the stat buffer with the data in it, | |||
1580 | * frc_mode is a flag that says whether to force the setting of the mode | |||
1581 | * (ignoring the user set values for preserving file mode). Frc_mode is | |||
1582 | * for the case where we created a file and found that the resulting | |||
1583 | * directory was not writeable and the user asked for file modes to NOT | |||
1584 | * be preserved. (we have to preserve what was created by default, so we | |||
1585 | * have to force the setting at the end. this is stated explicitly in the | |||
1586 | * pax spec) | |||
1587 | */ | |||
1588 | ||||
1589 | void | |||
1590 | add_dir(char *name, struct stat *psb, int frc_mode) | |||
1591 | { | |||
1592 | DIRDATA *dblk; | |||
1593 | sigset_t allsigs, savedsigs; | |||
1594 | char realname[PATH_MAX1024], *rp; | |||
1595 | ||||
1596 | if (dirp == NULL((void *)0)) | |||
1597 | return; | |||
1598 | ||||
1599 | if (havechd && *name != '/') { | |||
1600 | if ((rp = realpath(name, realname)) == NULL((void *)0)) { | |||
1601 | paxwarn(1, "Cannot canonicalize %s", name); | |||
1602 | return; | |||
1603 | } | |||
1604 | name = rp; | |||
1605 | } | |||
1606 | if (dircnt == dirsize) { | |||
1607 | dblk = reallocarray(dirp, dirsize * 2, sizeof(DIRDATA)); | |||
1608 | if (dblk == NULL((void *)0)) { | |||
1609 | paxwarn(1, "Unable to store mode and times for created" | |||
1610 | " directory: %s", name); | |||
1611 | return; | |||
1612 | } | |||
1613 | sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs); | |||
1614 | dirp = dblk; | |||
1615 | dirsize *= 2; | |||
1616 | sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0)); | |||
1617 | } | |||
1618 | dblk = &dirp[dircnt]; | |||
1619 | if ((dblk->ft.ft_name = strdup(name)) == NULL((void *)0)) { | |||
1620 | paxwarn(1, "Unable to store mode and times for created" | |||
1621 | " directory: %s", name); | |||
1622 | return; | |||
1623 | } | |||
1624 | dblk->ft.ft_mtim = psb->st_mtim; | |||
1625 | dblk->ft.ft_atim = psb->st_atim; | |||
1626 | dblk->ft.ft_ino = psb->st_ino; | |||
1627 | dblk->ft.ft_dev = psb->st_dev; | |||
1628 | dblk->mode = psb->st_mode & ABITS((0001000 | 0000700 | 0000070 | 0000007) | (0004000 | 0002000 )); | |||
1629 | dblk->frc_mode = frc_mode; | |||
1630 | sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs); | |||
1631 | ++dircnt; | |||
1632 | sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0)); | |||
1633 | } | |||
1634 | ||||
1635 | /* | |||
1636 | * delete_dir() | |||
1637 | * When we rmdir a directory, we may want to make sure we don't | |||
1638 | * later warn about being unable to set its mode and times. | |||
1639 | */ | |||
1640 | ||||
1641 | void | |||
1642 | delete_dir(dev_t dev, ino_t ino) | |||
1643 | { | |||
1644 | DIRDATA *dblk; | |||
1645 | char *name; | |||
1646 | size_t i; | |||
1647 | ||||
1648 | if (dirp == NULL((void *)0)) | |||
1649 | return; | |||
1650 | for (i = 0; i < dircnt; i++) { | |||
1651 | dblk = &dirp[i]; | |||
1652 | ||||
1653 | if (dblk->ft.ft_name == NULL((void *)0)) | |||
1654 | continue; | |||
1655 | if (dblk->ft.ft_dev == dev && dblk->ft.ft_ino == ino) { | |||
1656 | name = dblk->ft.ft_name; | |||
1657 | dblk->ft.ft_name = NULL((void *)0); | |||
1658 | free(name); | |||
1659 | break; | |||
1660 | } | |||
1661 | } | |||
1662 | } | |||
1663 | ||||
1664 | /* | |||
1665 | * proc_dir(int in_sig) | |||
1666 | * process all file modes and times stored for directories CREATED | |||
1667 | * by pax. If in_sig is set, we're in a signal handler and can't | |||
1668 | * free stuff. | |||
1669 | */ | |||
1670 | ||||
1671 | void | |||
1672 | proc_dir(int in_sig) | |||
1673 | { | |||
1674 | DIRDATA *dblk; | |||
1675 | size_t cnt; | |||
1676 | ||||
1677 | if (dirp == NULL((void *)0)) | |||
1678 | return; | |||
1679 | /* | |||
1680 | * read backwards through the file and process each directory | |||
1681 | */ | |||
1682 | cnt = dircnt; | |||
1683 | while (cnt-- > 0) { | |||
1684 | dblk = &dirp[cnt]; | |||
1685 | /* | |||
1686 | * If we remove a directory we created, we replace the | |||
1687 | * ft_name with NULL. Ignore those. | |||
1688 | */ | |||
1689 | if (dblk->ft.ft_name == NULL((void *)0)) | |||
1690 | continue; | |||
1691 | ||||
1692 | /* | |||
1693 | * frc_mode set, make sure we set the file modes even if | |||
1694 | * the user didn't ask for it (see file_subs.c for more info) | |||
1695 | */ | |||
1696 | set_attr(&dblk->ft, 0, dblk->mode, pmode || dblk->frc_mode, | |||
1697 | in_sig); | |||
1698 | if (!in_sig) | |||
1699 | free(dblk->ft.ft_name); | |||
1700 | } | |||
1701 | ||||
1702 | if (!in_sig) | |||
1703 | free(dirp); | |||
1704 | dirp = NULL((void *)0); | |||
1705 | dircnt = 0; | |||
1706 | } | |||
1707 | ||||
1708 | /* | |||
1709 | * database independent routines | |||
1710 | */ | |||
1711 | ||||
1712 | /* | |||
1713 | * st_hash() | |||
1714 | * hashes filenames to a u_int for hashing into a table. Looks at the tail | |||
1715 | * end of file, as this provides far better distribution than any other | |||
1716 | * part of the name. For performance reasons we only care about the last | |||
1717 | * MAXKEYLEN chars (should be at LEAST large enough to pick off the file | |||
1718 | * name). Was tested on 500,000 name file tree traversal from the root | |||
1719 | * and gave almost a perfectly uniform distribution of keys when used with | |||
1720 | * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int) | |||
1721 | * chars at a time and pads with 0 for last addition. | |||
1722 | * Return: | |||
1723 | * the hash value of the string MOD (%) the table size. | |||
1724 | */ | |||
1725 | ||||
1726 | u_int | |||
1727 | st_hash(const char *name, int len, int tabsz) | |||
1728 | { | |||
1729 | const char *pt; | |||
1730 | char *dest; | |||
1731 | const char *end; | |||
1732 | int i; | |||
1733 | u_int key = 0; | |||
1734 | int steps; | |||
1735 | int res; | |||
1736 | u_int val; | |||
1737 | ||||
1738 | /* | |||
1739 | * only look at the tail up to MAXKEYLEN, we do not need to waste | |||
1740 | * time here (remember these are pathnames, the tail is what will | |||
1741 | * spread out the keys) | |||
1742 | */ | |||
1743 | if (len > MAXKEYLEN64) { | |||
1744 | pt = &(name[len - MAXKEYLEN64]); | |||
1745 | len = MAXKEYLEN64; | |||
1746 | } else | |||
1747 | pt = name; | |||
1748 | ||||
1749 | /* | |||
1750 | * calculate the number of u_int size steps in the string and if | |||
1751 | * there is a runt to deal with | |||
1752 | */ | |||
1753 | steps = len/sizeof(u_int); | |||
1754 | res = len % sizeof(u_int); | |||
1755 | ||||
1756 | /* | |||
1757 | * add up the value of the string in unsigned integer sized pieces | |||
1758 | * too bad we cannot have unsigned int aligned strings, then we | |||
1759 | * could avoid the expensive copy. | |||
1760 | */ | |||
1761 | for (i = 0; i < steps; ++i) { | |||
1762 | end = pt + sizeof(u_int); | |||
1763 | dest = (char *)&val; | |||
1764 | while (pt < end) | |||
1765 | *dest++ = *pt++; | |||
1766 | key += val; | |||
| ||||
1767 | } | |||
1768 | ||||
1769 | /* | |||
1770 | * add in the runt padded with zero to the right | |||
1771 | */ | |||
1772 | if (res) { | |||
1773 | val = 0; | |||
1774 | end = pt + res; | |||
1775 | dest = (char *)&val; | |||
1776 | while (pt < end) | |||
1777 | *dest++ = *pt++; | |||
1778 | key += val; | |||
1779 | } | |||
1780 | ||||
1781 | /* | |||
1782 | * return the result mod the table size | |||
1783 | */ | |||
1784 | return(key % tabsz); | |||
1785 | } |