Bug Summary

File:src/bin/pax/tables.c
Warning:line 1767, column 7
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name tables.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -pic-is-pie -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/bin/pax/obj -resource-dir /usr/local/llvm16/lib/clang/16 -internal-isystem /usr/local/llvm16/lib/clang/16/include -internal-externc-isystem /usr/include -O2 -fdebug-compilation-dir=/usr/src/bin/pax/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fno-jump-tables -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/scan/2024-01-11-140451-98009-1 -x c /usr/src/bin/pax/tables.c
1/* $OpenBSD: tables.c,v 1.55 2023/11/26 16:04:17 espie Exp $ */
2/* $NetBSD: tables.c,v 1.4 1995/03/21 09:07:45 cgd Exp $ */
3
4/*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/types.h>
38#include <sys/stat.h>
39#include <errno(*__errno()).h>
40#include <fcntl.h>
41#include <limits.h>
42#include <signal.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47
48#include "pax.h"
49#include "extern.h"
50static u_int st_hash(const char *, int, int);
51
52/*
53 * Routines for controlling the contents of all the different databases pax
54 * keeps. Tables are dynamically created only when they are needed. The
55 * goal was speed and the ability to work with HUGE archives. The databases
56 * were kept simple, but do have complex rules for when the contents change.
57 * As of this writing, the posix library functions were more complex than
58 * needed for this application (pax databases have very short lifetimes and
59 * do not survive after pax is finished). Pax is required to handle very
60 * large archives. These database routines carefully combine memory usage and
61 * temporary file storage in ways which will not significantly impact runtime
62 * performance while allowing the largest possible archives to be handled.
63 * Trying to force the fit to the posix database routines was not considered
64 * time well spent.
65 */
66
67/*
68 * data structures and constants used by the different databases kept by pax
69 */
70
71/*
72 * Hash Table Sizes MUST BE PRIME, if set too small performance suffers.
73 * Probably safe to expect 500000 inodes per tape. Assuming good key
74 * distribution (inodes) chains of under 50 long (worst case) is ok.
75 */
76#define L_TAB_SZ2503 2503 /* hard link hash table size */
77#define F_TAB_SZ50503 50503 /* file time hash table size */
78#define N_TAB_SZ541 541 /* interactive rename hash table */
79#define D_TAB_SZ317 317 /* unique device mapping table */
80#define A_TAB_SZ317 317 /* ftree dir access time reset table */
81#define SL_TAB_SZ317 317 /* escape symlink tables */
82#define MAXKEYLEN64 64 /* max number of chars for hash */
83#define DIRP_SIZE64 64 /* initial size of created dir table */
84
85/*
86 * file hard link structure (hashed by dev/ino and chained) used to find the
87 * hard links in a file system or with some archive formats (cpio)
88 */
89typedef struct hrdlnk {
90 ino_t ino; /* files inode number */
91 char *name; /* name of first file seen with this ino/dev */
92 dev_t dev; /* files device number */
93 u_long nlink; /* expected link count */
94 struct hrdlnk *fow;
95} HRDLNK;
96
97/*
98 * Archive write update file time table (the -u, -C flag), hashed by filename.
99 * Filenames are stored in a scratch file at seek offset into the file. The
100 * file time (mod time) and the file name length (for a quick check) are
101 * stored in a hash table node. We were forced to use a scratch file because
102 * with -u, the mtime for every node in the archive must always be available
103 * to compare against (and this data can get REALLY large with big archives).
104 * By being careful to read only when we have a good chance of a match, the
105 * performance loss is not measurable (and the size of the archive we can
106 * handle is greatly increased).
107 */
108typedef struct ftm {
109 off_t seek; /* location in scratch file */
110 struct timespec mtim; /* files last modification time */
111 struct ftm *fow;
112 int namelen; /* file name length */
113} FTM;
114
115/*
116 * Interactive rename table (-i flag), hashed by orig filename.
117 * We assume this will not be a large table as this mapping data can only be
118 * obtained through interactive input by the user. Nobody is going to type in
119 * changes for 500000 files? We use chaining to resolve collisions.
120 */
121
122typedef struct namt {
123 char *oname; /* old name */
124 char *nname; /* new name typed in by the user */
125 struct namt *fow;
126} NAMT;
127
128/*
129 * Unique device mapping tables. Some protocols (e.g. cpio) require that the
130 * <c_dev,c_ino> pair will uniquely identify a file in an archive unless they
131 * are links to the same file. Appending to archives can break this. For those
132 * protocols that have this requirement we map c_dev to a unique value not seen
133 * in the archive when we append. We also try to handle inode truncation with
134 * this table. (When the inode field in the archive header are too small, we
135 * remap the dev on writes to remove accidental collisions).
136 *
137 * The list is hashed by device number using chain collision resolution. Off of
138 * each DEVT are linked the various remaps for this device based on those bits
139 * in the inode which were truncated. For example if we are just remapping to
140 * avoid a device number during an update append, off the DEVT we would have
141 * only a single DLIST that has a truncation id of 0 (no inode bits were
142 * stripped for this device so far). When we spot inode truncation we create
143 * a new mapping based on the set of bits in the inode which were stripped off.
144 * so if the top four bits of the inode are stripped and they have a pattern of
145 * 0110...... (where . are those bits not truncated) we would have a mapping
146 * assigned for all inodes that has the same 0110.... pattern (with this dev
147 * number of course). This keeps the mapping sparse and should be able to store
148 * close to the limit of files which can be represented by the optimal
149 * combination of dev and inode bits, and without creating a fouled up archive.
150 * Note we also remap truncated devs in the same way (an exercise for the
151 * dedicated reader; always wanted to say that...:)
152 */
153
154typedef struct devt {
155 dev_t dev; /* the orig device number we now have to map */
156 struct devt *fow; /* new device map list */
157 struct dlist *list; /* map list based on inode truncation bits */
158} DEVT;
159
160typedef struct dlist {
161 ino_t trunc_bits; /* truncation pattern for a specific map */
162 dev_t dev; /* the new device id we use */
163 struct dlist *fow;
164} DLIST;
165
166/*
167 * ftree directory access time reset table. When we are done with a
168 * subtree we reset the access and mod time of the directory when the tflag is
169 * set. Not really explicitly specified in the pax spec, but easy and fast to
170 * do (and this may have even been intended in the spec, it is not clear).
171 * table is hashed by inode with chaining.
172 */
173
174typedef struct atdir {
175 struct file_times ft;
176 struct atdir *fow;
177} ATDIR;
178
179/*
180 * created directory time and mode storage entry. After pax is finished during
181 * extraction or copy, we must reset directory access modes and times that
182 * may have been modified after creation (they no longer have the specified
183 * times and/or modes). We must reset time in the reverse order of creation,
184 * because entries are added from the top of the file tree to the bottom.
185 * We MUST reset times from leaf to root (it will not work the other
186 * direction).
187 */
188
189typedef struct dirdata {
190 struct file_times ft;
191 u_int16_t mode; /* file mode to restore */
192 u_int16_t frc_mode; /* do we force mode settings? */
193} DIRDATA;
194
195static HRDLNK **ltab = NULL((void *)0); /* hard link table for detecting hard links */
196static FTM **ftab = NULL((void *)0); /* file time table for updating arch */
197static NAMT **ntab = NULL((void *)0); /* interactive rename storage table */
198#ifndef NOCPIO
199static DEVT **dtab = NULL((void *)0); /* device/inode mapping tables */
200#endif
201static ATDIR **atab = NULL((void *)0); /* file tree directory time reset table */
202static DIRDATA *dirp = NULL((void *)0); /* storage for setting created dir time/mode */
203static size_t dirsize; /* size of dirp table */
204static size_t dircnt = 0; /* entries in dir time/mode storage */
205static int ffd = -1; /* tmp file for file time table name storage */
206
207/*
208 * hard link table routines
209 *
210 * The hard link table tries to detect hard links to files using the device and
211 * inode values. We do this when writing an archive, so we can tell the format
212 * write routine that this file is a hard link to another file. The format
213 * write routine then can store this file in whatever way it wants (as a hard
214 * link if the format supports that like tar, or ignore this info like cpio).
215 * (Actually a field in the format driver table tells us if the format wants
216 * hard link info. if not, we do not waste time looking for them). We also use
217 * the same table when reading an archive. In that situation, this table is
218 * used by the format read routine to detect hard links from stored dev and
219 * inode numbers (like cpio). This will allow pax to create a link when one
220 * can be detected by the archive format.
221 */
222
223/*
224 * lnk_start
225 * Creates the hard link table.
226 * Return:
227 * 0 if created, -1 if failure
228 */
229
230int
231lnk_start(void)
232{
233 if (ltab != NULL((void *)0))
234 return(0);
235 if ((ltab = calloc(L_TAB_SZ2503, sizeof(HRDLNK *))) == NULL((void *)0)) {
236 paxwarn(1, "Cannot allocate memory for hard link table");
237 return(-1);
238 }
239 return(0);
240}
241
242/*
243 * chk_lnk()
244 * Looks up entry in hard link hash table. If found, it copies the name
245 * of the file it is linked to (we already saw that file) into ln_name.
246 * lnkcnt is decremented and if goes to 1 the node is deleted from the
247 * database. (We have seen all the links to this file). If not found,
248 * we add the file to the database if it has the potential for having
249 * hard links to other files we may process (it has a link count > 1)
250 * Return:
251 * if found returns 1; if not found returns 0; -1 on error
252 */
253
254int
255chk_lnk(ARCHD *arcn)
256{
257 HRDLNK *pt;
258 HRDLNK **ppt;
259 u_int indx;
260
261 if (ltab == NULL((void *)0))
262 return(-1);
263 /*
264 * ignore those nodes that cannot have hard links
265 */
266 if ((arcn->type == PAX_DIR1) || (arcn->sb.st_nlink <= 1))
267 return(0);
268
269 /*
270 * hash inode number and look for this file
271 */
272 indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ2503;
273 if ((pt = ltab[indx]) != NULL((void *)0)) {
274 /*
275 * its hash chain in not empty, walk down looking for it
276 */
277 ppt = &(ltab[indx]);
278 while (pt != NULL((void *)0)) {
279 if ((pt->ino == arcn->sb.st_ino) &&
280 (pt->dev == arcn->sb.st_dev))
281 break;
282 ppt = &(pt->fow);
283 pt = pt->fow;
284 }
285
286 if (pt != NULL((void *)0)) {
287 /*
288 * found a link. set the node type and copy in the
289 * name of the file it is to link to. we need to
290 * handle hardlinks to regular files differently than
291 * other links.
292 */
293 arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name,
294 sizeof(arcn->ln_name));
295 /* XXX truncate? */
296 if ((size_t)arcn->nlen >= sizeof(arcn->name))
297 arcn->nlen = sizeof(arcn->name) - 1;
298 if (arcn->type == PAX_REG4)
299 arcn->type = PAX_HRG9;
300 else
301 arcn->type = PAX_HLK8;
302
303 /*
304 * if we have found all the links to this file, remove
305 * it from the database
306 */
307 if (--pt->nlink <= 1) {
308 *ppt = pt->fow;
309 free(pt->name);
310 free(pt);
311 }
312 return(1);
313 }
314 }
315
316 /*
317 * we never saw this file before. It has links so we add it to the
318 * front of this hash chain
319 */
320 if ((pt = malloc(sizeof(HRDLNK))) != NULL((void *)0)) {
321 if ((pt->name = strdup(arcn->name)) != NULL((void *)0)) {
322 pt->dev = arcn->sb.st_dev;
323 pt->ino = arcn->sb.st_ino;
324 pt->nlink = arcn->sb.st_nlink;
325 pt->fow = ltab[indx];
326 ltab[indx] = pt;
327 return(0);
328 }
329 free(pt);
330 }
331
332 paxwarn(1, "Hard link table out of memory");
333 return(-1);
334}
335
336/*
337 * purg_lnk
338 * remove reference for a file that we may have added to the data base as
339 * a potential source for hard links. We ended up not using the file, so
340 * we do not want to accidently point another file at it later on.
341 */
342
343void
344purg_lnk(ARCHD *arcn)
345{
346 HRDLNK *pt;
347 HRDLNK **ppt;
348 u_int indx;
349
350 if (ltab == NULL((void *)0))
351 return;
352 /*
353 * do not bother to look if it could not be in the database
354 */
355 if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR1) ||
356 PAX_IS_HARDLINK(arcn->type)((arcn->type) == 8 || (arcn->type) == 9))
357 return;
358
359 /*
360 * find the hash chain for this inode value, if empty return
361 */
362 indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ2503;
363 if ((pt = ltab[indx]) == NULL((void *)0))
364 return;
365
366 /*
367 * walk down the list looking for the inode/dev pair, unlink and
368 * free if found
369 */
370 ppt = &(ltab[indx]);
371 while (pt != NULL((void *)0)) {
372 if ((pt->ino == arcn->sb.st_ino) &&
373 (pt->dev == arcn->sb.st_dev))
374 break;
375 ppt = &(pt->fow);
376 pt = pt->fow;
377 }
378 if (pt == NULL((void *)0))
379 return;
380
381 /*
382 * remove and free it
383 */
384 *ppt = pt->fow;
385 free(pt->name);
386 free(pt);
387}
388
389/*
390 * lnk_end()
391 * pull apart a existing link table so we can reuse it. We do this between
392 * read and write phases of append with update. (The format may have
393 * used the link table, and we need to start with a fresh table for the
394 * write phase
395 */
396
397void
398lnk_end(void)
399{
400 int i;
401 HRDLNK *pt;
402 HRDLNK *ppt;
403
404 if (ltab == NULL((void *)0))
405 return;
406
407 for (i = 0; i < L_TAB_SZ2503; ++i) {
408 if (ltab[i] == NULL((void *)0))
409 continue;
410 pt = ltab[i];
411 ltab[i] = NULL((void *)0);
412
413 /*
414 * free up each entry on this chain
415 */
416 while (pt != NULL((void *)0)) {
417 ppt = pt;
418 pt = ppt->fow;
419 free(ppt->name);
420 free(ppt);
421 }
422 }
423}
424
425/*
426 * modification time table routines
427 *
428 * The modification time table keeps track of last modification times for all
429 * files stored in an archive during a write phase when -u is set. We only
430 * add a file to the archive if it is newer than a file with the same name
431 * already stored on the archive (if there is no other file with the same
432 * name on the archive it is added). This applies to writes and appends.
433 * An append with an -u must read the archive and store the modification time
434 * for every file on that archive before starting the write phase. It is clear
435 * that this is one HUGE database. To save memory space, the actual file names
436 * are stored in a scratch file and indexed by an in-memory hash table. The
437 * hash table is indexed by hashing the file path. The nodes in the table store
438 * the length of the filename and the lseek offset within the scratch file
439 * where the actual name is stored. Since there are never any deletions from
440 * this table, fragmentation of the scratch file is never a issue. Lookups
441 * seem to not exhibit any locality at all (files in the database are rarely
442 * looked up more than once...), so caching is just a waste of memory. The
443 * only limitation is the amount of scratch file space available to store the
444 * path names.
445 */
446
447/*
448 * ftime_start()
449 * create the file time hash table and open for read/write the scratch
450 * file. (after created it is unlinked, so when we exit we leave
451 * no witnesses).
452 * Return:
453 * 0 if the table and file was created ok, -1 otherwise
454 */
455
456int
457ftime_start(void)
458{
459
460 if (ftab != NULL((void *)0))
461 return(0);
462 if ((ftab = calloc(F_TAB_SZ50503, sizeof(FTM *))) == NULL((void *)0)) {
463 paxwarn(1, "Cannot allocate memory for file time table");
464 return(-1);
465 }
466
467 /*
468 * get random name and create temporary scratch file, unlink name
469 * so it will get removed on exit
470 */
471 memcpy(tempbase, _TFILE_BASE"paxXXXXXXXXXX", sizeof(_TFILE_BASE"paxXXXXXXXXXX"));
472 if ((ffd = mkstemp(tempfile)) == -1) {
473 syswarn(1, errno(*__errno()), "Unable to create temporary file: %s",
474 tempfile);
475 return(-1);
476 }
477 (void)unlink(tempfile);
478
479 return(0);
480}
481
482/*
483 * chk_ftime()
484 * looks up entry in file time hash table. If not found, the file is
485 * added to the hash table and the file named stored in the scratch file.
486 * If a file with the same name is found, the file times are compared and
487 * the most recent file time is retained. If the new file was younger (or
488 * was not in the database) the new file is selected for storage.
489 * Return:
490 * 0 if file should be added to the archive, 1 if it should be skipped,
491 * -1 on error
492 */
493
494int
495chk_ftime(ARCHD *arcn)
496{
497 FTM *pt;
498 int namelen;
499 u_int indx;
500 char ckname[PAXPATHLEN3072+1];
501
502 /*
503 * no info, go ahead and add to archive
504 */
505 if (ftab == NULL((void *)0))
506 return(0);
507
508 /*
509 * hash the pathname and look up in table
510 */
511 namelen = arcn->nlen;
512 indx = st_hash(arcn->name, namelen, F_TAB_SZ50503);
513 if ((pt = ftab[indx]) != NULL((void *)0)) {
514 /*
515 * the hash chain is not empty, walk down looking for match
516 * only read up the path names if the lengths match, speeds
517 * up the search a lot
518 */
519 while (pt != NULL((void *)0)) {
520 if (pt->namelen == namelen) {
521 /*
522 * potential match, have to read the name
523 * from the scratch file.
524 */
525 if (lseek(ffd,pt->seek,SEEK_SET0) != pt->seek) {
526 syswarn(1, errno(*__errno()),
527 "Failed ftime table seek");
528 return(-1);
529 }
530 if (read(ffd, ckname, namelen) != namelen) {
531 syswarn(1, errno(*__errno()),
532 "Failed ftime table read");
533 return(-1);
534 }
535
536 /*
537 * if the names match, we are done
538 */
539 if (!strncmp(ckname, arcn->name, namelen))
540 break;
541 }
542
543 /*
544 * try the next entry on the chain
545 */
546 pt = pt->fow;
547 }
548
549 if (pt != NULL((void *)0)) {
550 /*
551 * found the file, compare the times, save the newer
552 */
553 if (timespeccmp(&arcn->sb.st_mtim, &pt->mtim, >)(((&arcn->sb.st_mtim)->tv_sec == (&pt->mtim)
->tv_sec) ? ((&arcn->sb.st_mtim)->tv_nsec > (
&pt->mtim)->tv_nsec) : ((&arcn->sb.st_mtim)->
tv_sec > (&pt->mtim)->tv_sec))
) {
554 /*
555 * file is newer
556 */
557 pt->mtim = arcn->sb.st_mtim;
558 return(0);
559 }
560 /*
561 * file is older
562 */
563 return(1);
564 }
565 }
566
567 /*
568 * not in table, add it
569 */
570 if ((pt = malloc(sizeof(FTM))) != NULL((void *)0)) {
571 /*
572 * add the name at the end of the scratch file, saving the
573 * offset. add the file to the head of the hash chain
574 */
575 if ((pt->seek = lseek(ffd, 0, SEEK_END2)) >= 0) {
576 if (write(ffd, arcn->name, namelen) == namelen) {
577 pt->mtim = arcn->sb.st_mtim;
578 pt->namelen = namelen;
579 pt->fow = ftab[indx];
580 ftab[indx] = pt;
581 return(0);
582 }
583 syswarn(1, errno(*__errno()), "Failed write to file time table");
584 } else
585 syswarn(1, errno(*__errno()), "Failed seek on file time table");
586 } else
587 paxwarn(1, "File time table ran out of memory");
588
589 if (pt != NULL((void *)0))
590 free(pt);
591 return(-1);
592}
593
594/*
595 * escaping (absolute or w/"..") symlink table routines
596 *
597 * By default, an archive shouldn't be able extract to outside of the
598 * current directory. What should we do if the archive contains a symlink
599 * whose value is either absolute or contains ".." components? What we'll
600 * do is initially create the path as an empty file (to block attempts to
601 * reference _through_ it) and instead record its path and desired
602 * final value and mode. Then once all the other archive
603 * members are created (but before the pass to set timestamps on
604 * directories) we'll process those records, replacing the placeholder with
605 * the correct symlink and setting them to the correct mode, owner, group,
606 * and timestamps.
607 *
608 * Note: we also need to handle hardlinks to symlinks (barf) as well as
609 * hardlinks whose target is replaced by a later entry in the archive (barf^2).
610 *
611 * So we track things by dev+ino of the placeholder file, associating with
612 * that the value and mode of the final symlink and a list of paths that
613 * should all be hardlinks of that. We'll 'store' the symlink's desired
614 * timestamps, owner, and group by setting them on the placeholder file.
615 *
616 * The operations are:
617 * a) create an escaping symlink: create the placeholder file and add an entry
618 * for the new link
619 * b) create a hardlink: do the link. If the target turns out to be a
620 * zero-length file whose dev+ino are in the symlink table, then add this
621 * path to the list of names for that link
622 * c) perform deferred processing: for each entry, check each associated path:
623 * if it's a zero-length file with the correct dev+ino then recreate it as
624 * the specified symlink or hardlink to the first such
625 */
626
627struct slpath {
628 char *sp_path;
629 struct slpath *sp_next;
630};
631struct slinode {
632 ino_t sli_ino;
633 char *sli_value;
634 struct slpath sli_paths;
635 struct slinode *sli_fow; /* hash table chain */
636 dev_t sli_dev;
637 mode_t sli_mode;
638};
639
640static struct slinode **slitab = NULL((void *)0);
641
642/*
643 * sltab_start()
644 * create the hash table
645 * Return:
646 * 0 if the table and file was created ok, -1 otherwise
647 */
648
649int
650sltab_start(void)
651{
652
653 if ((slitab = calloc(SL_TAB_SZ317, sizeof *slitab)) == NULL((void *)0)) {
654 syswarn(1, errno(*__errno()), "symlink table");
655 return(-1);
656 }
657
658 return(0);
659}
660
661/*
662 * sltab_add_sym()
663 * Create the placeholder and tracking info for an escaping symlink.
664 * Return:
665 * 0 on success, -1 otherwise
666 */
667
668int
669sltab_add_sym(const char *path0, const char *value0, mode_t mode)
670{
671 struct stat sb;
672 struct slinode *s;
673 struct slpath *p;
674 char *path, *value;
675 u_int indx;
676 int fd;
677
678 /* create the placeholder */
679 fd = open(path0, O_WRONLY0x0001 | O_CREAT0x0200 | O_EXCL0x0800 | O_CLOEXEC0x10000, 0600);
680 if (fd == -1)
681 return (-1);
682 if (fstat(fd, &sb) == -1) {
683 unlink(path0);
684 close(fd);
685 return (-1);
686 }
687 close(fd);
688
689 if (havechd && *path0 != '/') {
690 if ((path = realpath(path0, NULL((void *)0))) == NULL((void *)0)) {
691 syswarn(1, errno(*__errno()), "Cannot canonicalize %s", path0);
692 unlink(path0);
693 return (-1);
694 }
695 } else if ((path = strdup(path0)) == NULL((void *)0)) {
696 syswarn(1, errno(*__errno()), "defered symlink path");
697 unlink(path0);
698 return (-1);
699 }
700 if ((value = strdup(value0)) == NULL((void *)0)) {
701 syswarn(1, errno(*__errno()), "defered symlink value");
702 unlink(path);
703 free(path);
704 return (-1);
705 }
706
707 /* now check the hash table for conflicting entry */
708 indx = (sb.st_ino ^ sb.st_dev) % SL_TAB_SZ317;
709 for (s = slitab[indx]; s != NULL((void *)0); s = s->sli_fow) {
710 if (s->sli_ino != sb.st_ino || s->sli_dev != sb.st_dev)
711 continue;
712
713 /*
714 * One of our placeholders got removed behind our back and
715 * we've reused the inode. Weird, but clean up the mess.
716 */
717 free(s->sli_value);
718 free(s->sli_paths.sp_path);
719 p = s->sli_paths.sp_next;
720 while (p != NULL((void *)0)) {
721 struct slpath *next_p = p->sp_next;
722
723 free(p->sp_path);
724 free(p);
725 p = next_p;
726 }
727 goto set_value;
728 }
729
730 /* Normal case: create a new node */
731 if ((s = malloc(sizeof *s)) == NULL((void *)0)) {
732 syswarn(1, errno(*__errno()), "defered symlink");
733 unlink(path);
734 free(path);
735 free(value);
736 return (-1);
737 }
738 s->sli_ino = sb.st_ino;
739 s->sli_dev = sb.st_dev;
740 s->sli_fow = slitab[indx];
741 slitab[indx] = s;
742
743set_value:
744 s->sli_paths.sp_path = path;
745 s->sli_paths.sp_next = NULL((void *)0);
746 s->sli_value = value;
747 s->sli_mode = mode;
748 return (0);
749}
750
751/*
752 * sltab_add_link()
753 * A hardlink was created; if it looks like a placeholder, handle the
754 * tracking.
755 * Return:
756 * 0 if things are ok, -1 if something went wrong
757 */
758
759int
760sltab_add_link(const char *path, const struct stat *sb)
761{
762 struct slinode *s;
763 struct slpath *p;
764 u_int indx;
765
766 if (!S_ISREG(sb->st_mode)((sb->st_mode & 0170000) == 0100000) || sb->st_size != 0)
767 return (1);
768
769 /* find the hash table entry for this hardlink */
770 indx = (sb->st_ino ^ sb->st_dev) % SL_TAB_SZ317;
771 for (s = slitab[indx]; s != NULL((void *)0); s = s->sli_fow) {
772 if (s->sli_ino != sb->st_ino || s->sli_dev != sb->st_dev)
773 continue;
774
775 if ((p = malloc(sizeof *p)) == NULL((void *)0)) {
776 syswarn(1, errno(*__errno()), "deferred symlink hardlink");
777 return (-1);
778 }
779 if (havechd && *path != '/') {
780 if ((p->sp_path = realpath(path, NULL((void *)0))) == NULL((void *)0)) {
781 syswarn(1, errno(*__errno()), "Cannot canonicalize %s",
782 path);
783 free(p);
784 return (-1);
785 }
786 } else if ((p->sp_path = strdup(path)) == NULL((void *)0)) {
787 syswarn(1, errno(*__errno()), "defered symlink hardlink path");
788 free(p);
789 return (-1);
790 }
791
792 /* link it in */
793 p->sp_next = s->sli_paths.sp_next;
794 s->sli_paths.sp_next = p;
795 return (0);
796 }
797
798 /* not found */
799 return (1);
800}
801
802
803static int
804sltab_process_one(struct slinode *s, struct slpath *p, const char *first,
805 int in_sig)
806{
807 struct stat sb;
808 char *path = p->sp_path;
809 mode_t mode;
810 int err;
811
812 /*
813 * is it the expected placeholder? This can fail legimately
814 * if the archive overwrote the link with another, later entry,
815 * so don't warn.
816 */
817 if (stat(path, &sb) != 0 || !S_ISREG(sb.st_mode)((sb.st_mode & 0170000) == 0100000) || sb.st_size != 0 ||
818 sb.st_ino != s->sli_ino || sb.st_dev != s->sli_dev)
819 return (0);
820
821 if (unlink(path) && errno(*__errno()) != ENOENT2) {
822 if (!in_sig)
823 syswarn(1, errno(*__errno()), "deferred symlink removal");
824 return (0);
825 }
826
827 err = 0;
828 if (first != NULL((void *)0)) {
829 /* add another hardlink to the existing symlink */
830 if (linkat(AT_FDCWD-100, first, AT_FDCWD-100, path, 0) == 0)
831 return (0);
832
833 /*
834 * Couldn't hardlink the symlink for some reason, so we'll
835 * try creating it as its own symlink, but save the error
836 * for reporting if that fails.
837 */
838 err = errno(*__errno());
839 }
840
841 if (symlink(s->sli_value, path)) {
842 if (!in_sig) {
843 const char *qualifier = "";
844 if (err)
845 qualifier = " hardlink";
846 else
847 err = errno(*__errno());
848
849 syswarn(1, err, "deferred symlink%s: %s",
850 qualifier, path);
851 }
852 return (0);
853 }
854
855 /* success, so set the id, mode, and times */
856 mode = s->sli_mode;
857 if (pids) {
858 /* if can't set the ids, force the set[ug]id bits off */
859 if (set_ids(path, sb.st_uid, sb.st_gid))
860 mode &= ~(SETBITS(0004000 | 0002000));
861 }
862
863 if (pmode)
864 set_pmode(path, mode);
865
866 if (patime || pmtime)
867 set_ftime(path, &sb.st_mtim, &sb.st_atim, 0);
868
869 /*
870 * If we tried to link to first but failed, then this new symlink
871 * might be a better one to try in the future. Guess from the errno.
872 */
873 if (err == 0 || err == ENOENT2 || err == EMLINK31 || err == EOPNOTSUPP45)
874 return (1);
875 return (0);
876}
877
878/*
879 * sltab_process()
880 * Do all the delayed process for escape symlinks
881 */
882
883void
884sltab_process(int in_sig)
885{
886 struct slinode *s;
887 struct slpath *p;
888 char *first;
889 u_int indx;
890
891 if (slitab == NULL((void *)0))
892 return;
893
894 /* walk across the entire hash table */
895 for (indx = 0; indx < SL_TAB_SZ317; indx++) {
896 while ((s = slitab[indx]) != NULL((void *)0)) {
897 /* pop this entry */
898 slitab[indx] = s->sli_fow;
899
900 first = NULL((void *)0);
901 p = &s->sli_paths;
902 while (1) {
903 struct slpath *next_p;
904
905 if (sltab_process_one(s, p, first, in_sig)) {
906 if (!in_sig)
907 free(first);
908 first = p->sp_path;
909 } else if (!in_sig)
910 free(p->sp_path);
911
912 if ((next_p = p->sp_next) == NULL((void *)0))
913 break;
914 *p = *next_p;
915 if (!in_sig)
916 free(next_p);
917 }
918 if (!in_sig) {
919 free(first);
920 free(s->sli_value);
921 free(s);
922 }
923 }
924 }
925 if (!in_sig)
926 free(slitab);
927 slitab = NULL((void *)0);
928}
929
930
931/*
932 * Interactive rename table routines
933 *
934 * The interactive rename table keeps track of the new names that the user
935 * assigns to files from tty input. Since this map is unique for each file
936 * we must store it in case there is a reference to the file later in archive
937 * (a link). Otherwise we will be unable to find the file we know was
938 * extracted. The remapping of these files is stored in a memory based hash
939 * table (it is assumed since input must come from /dev/tty, it is unlikely to
940 * be a very large table).
941 */
942
943/*
944 * name_start()
945 * create the interactive rename table
946 * Return:
947 * 0 if successful, -1 otherwise
948 */
949
950int
951name_start(void)
952{
953 if (ntab != NULL((void *)0))
954 return(0);
955 if ((ntab = calloc(N_TAB_SZ541, sizeof(NAMT *))) == NULL((void *)0)) {
956 paxwarn(1, "Cannot allocate memory for interactive rename table");
957 return(-1);
958 }
959 return(0);
960}
961
962/*
963 * add_name()
964 * add the new name to old name mapping just created by the user.
965 * If an old name mapping is found (there may be duplicate names on an
966 * archive) only the most recent is kept.
967 * Return:
968 * 0 if added, -1 otherwise
969 */
970
971int
972add_name(char *oname, int onamelen, char *nname)
973{
974 NAMT *pt;
975 u_int indx;
976
977 if (ntab == NULL((void *)0)) {
978 /*
979 * should never happen
980 */
981 paxwarn(0, "No interactive rename table, links may fail");
982 return(0);
983 }
984
985 /*
986 * look to see if we have already mapped this file, if so we
987 * will update it
988 */
989 indx = st_hash(oname, onamelen, N_TAB_SZ541);
990 if ((pt = ntab[indx]) != NULL((void *)0)) {
991 /*
992 * look down the has chain for the file
993 */
994 while ((pt != NULL((void *)0)) && (strcmp(oname, pt->oname) != 0))
995 pt = pt->fow;
996
997 if (pt != NULL((void *)0)) {
998 /*
999 * found an old mapping, replace it with the new one
1000 * the user just input (if it is different)
1001 */
1002 if (strcmp(nname, pt->nname) == 0)
1003 return(0);
1004
1005 free(pt->nname);
1006 if ((pt->nname = strdup(nname)) == NULL((void *)0)) {
1007 paxwarn(1, "Cannot update rename table");
1008 return(-1);
1009 }
1010 return(0);
1011 }
1012 }
1013
1014 /*
1015 * this is a new mapping, add it to the table
1016 */
1017 if ((pt = malloc(sizeof(NAMT))) != NULL((void *)0)) {
1018 if ((pt->oname = strdup(oname)) != NULL((void *)0)) {
1019 if ((pt->nname = strdup(nname)) != NULL((void *)0)) {
1020 pt->fow = ntab[indx];
1021 ntab[indx] = pt;
1022 return(0);
1023 }
1024 free(pt->oname);
1025 }
1026 free(pt);
1027 }
1028 paxwarn(1, "Interactive rename table out of memory");
1029 return(-1);
1030}
1031
1032/*
1033 * sub_name()
1034 * look up a link name to see if it points at a file that has been
1035 * remapped by the user. If found, the link is adjusted to contain the
1036 * new name (oname is the link to name)
1037 */
1038
1039void
1040sub_name(char *oname, int *onamelen, int onamesize)
1041{
1042 NAMT *pt;
1043 u_int indx;
1044
1045 if (ntab == NULL((void *)0))
1
Assuming 'ntab' is not equal to NULL
2
Taking false branch
1046 return;
1047 /*
1048 * look the name up in the hash table
1049 */
1050 indx = st_hash(oname, *onamelen, N_TAB_SZ541);
3
Calling 'st_hash'
1051 if ((pt = ntab[indx]) == NULL((void *)0))
1052 return;
1053
1054 while (pt != NULL((void *)0)) {
1055 /*
1056 * walk down the hash chain looking for a match
1057 */
1058 if (strcmp(oname, pt->oname) == 0) {
1059 /*
1060 * found it, replace it with the new name
1061 * and return (we know that oname has enough space)
1062 */
1063 *onamelen = strlcpy(oname, pt->nname, onamesize);
1064 if (*onamelen >= onamesize)
1065 *onamelen = onamesize - 1; /* XXX truncate? */
1066 return;
1067 }
1068 pt = pt->fow;
1069 }
1070
1071 /*
1072 * no match, just return
1073 */
1074}
1075
1076#ifndef NOCPIO
1077/*
1078 * device/inode mapping table routines
1079 * (used with formats that store device and inodes fields)
1080 *
1081 * device/inode mapping tables remap the device field in a archive header. The
1082 * device/inode fields are used to determine when files are hard links to each
1083 * other. However these values have very little meaning outside of that. This
1084 * database is used to solve one of two different problems.
1085 *
1086 * 1) when files are appended to an archive, while the new files may have hard
1087 * links to each other, you cannot determine if they have hard links to any
1088 * file already stored on the archive from a prior run of pax. We must assume
1089 * that these inode/device pairs are unique only within a SINGLE run of pax
1090 * (which adds a set of files to an archive). So we have to make sure the
1091 * inode/dev pairs we add each time are always unique. We do this by observing
1092 * while the inode field is very dense, the use of the dev field is fairly
1093 * sparse. Within each run of pax, we remap any device number of a new archive
1094 * member that has a device number used in a prior run and already stored in a
1095 * file on the archive. During the read phase of the append, we store the
1096 * device numbers used and mark them to not be used by any file during the
1097 * write phase. If during write we go to use one of those old device numbers,
1098 * we remap it to a new value.
1099 *
1100 * 2) Often the fields in the archive header used to store these values are
1101 * too small to store the entire value. The result is an inode or device value
1102 * which can be truncated. This really can foul up an archive. With truncation
1103 * we end up creating links between files that are really not links (after
1104 * truncation the inodes are the same value). We address that by detecting
1105 * truncation and forcing a remap of the device field to split truncated
1106 * inodes away from each other. Each truncation creates a pattern of bits that
1107 * are removed. We use this pattern of truncated bits to partition the inodes
1108 * on a single device to many different devices (each one represented by the
1109 * truncated bit pattern). All inodes on the same device that have the same
1110 * truncation pattern are mapped to the same new device. Two inodes that
1111 * truncate to the same value clearly will always have different truncation
1112 * bit patterns, so they will be split from away each other. When we spot
1113 * device truncation we remap the device number to a non truncated value.
1114 * (for more info see table.h for the data structures involved).
1115 */
1116
1117static DEVT *chk_dev(dev_t, int);
1118
1119/*
1120 * dev_start()
1121 * create the device mapping table
1122 * Return:
1123 * 0 if successful, -1 otherwise
1124 */
1125
1126int
1127dev_start(void)
1128{
1129 if (dtab != NULL((void *)0))
1130 return(0);
1131 if ((dtab = calloc(D_TAB_SZ317, sizeof(DEVT *))) == NULL((void *)0)) {
1132 paxwarn(1, "Cannot allocate memory for device mapping table");
1133 return(-1);
1134 }
1135 return(0);
1136}
1137
1138/*
1139 * add_dev()
1140 * add a device number to the table. this will force the device to be
1141 * remapped to a new value if it be used during a write phase. This
1142 * function is called during the read phase of an append to prohibit the
1143 * use of any device number already in the archive.
1144 * Return:
1145 * 0 if added ok, -1 otherwise
1146 */
1147
1148int
1149add_dev(ARCHD *arcn)
1150{
1151 if (chk_dev(arcn->sb.st_dev, 1) == NULL((void *)0))
1152 return(-1);
1153 return(0);
1154}
1155
1156/*
1157 * chk_dev()
1158 * check for a device value in the device table. If not found and the add
1159 * flag is set, it is added. This does NOT assign any mapping values, just
1160 * adds the device number as one that need to be remapped. If this device
1161 * is already mapped, just return with a pointer to that entry.
1162 * Return:
1163 * pointer to the entry for this device in the device map table. Null
1164 * if the add flag is not set and the device is not in the table (it is
1165 * not been seen yet). If add is set and the device cannot be added, null
1166 * is returned (indicates an error).
1167 */
1168
1169static DEVT *
1170chk_dev(dev_t dev, int add)
1171{
1172 DEVT *pt;
1173 u_int indx;
1174
1175 if (dtab == NULL((void *)0))
1176 return(NULL((void *)0));
1177 /*
1178 * look to see if this device is already in the table
1179 */
1180 indx = ((unsigned)dev) % D_TAB_SZ317;
1181 if ((pt = dtab[indx]) != NULL((void *)0)) {
1182 while ((pt != NULL((void *)0)) && (pt->dev != dev))
1183 pt = pt->fow;
1184
1185 /*
1186 * found it, return a pointer to it
1187 */
1188 if (pt != NULL((void *)0))
1189 return(pt);
1190 }
1191
1192 /*
1193 * not in table, we add it only if told to as this may just be a check
1194 * to see if a device number is being used.
1195 */
1196 if (add == 0)
1197 return(NULL((void *)0));
1198
1199 /*
1200 * allocate a node for this device and add it to the front of the hash
1201 * chain. Note we do not assign remaps values here, so the pt->list
1202 * list must be NULL.
1203 */
1204 if ((pt = malloc(sizeof(DEVT))) == NULL((void *)0)) {
1205 paxwarn(1, "Device map table out of memory");
1206 return(NULL((void *)0));
1207 }
1208 pt->dev = dev;
1209 pt->list = NULL((void *)0);
1210 pt->fow = dtab[indx];
1211 dtab[indx] = pt;
1212 return(pt);
1213}
1214/*
1215 * map_dev()
1216 * given an inode and device storage mask (the mask has a 1 for each bit
1217 * the archive format is able to store in a header), we check for inode
1218 * and device truncation and remap the device as required. Device mapping
1219 * can also occur when during the read phase of append a device number was
1220 * seen (and was marked as do not use during the write phase). WE ASSUME
1221 * that unsigned longs are the same size or bigger than the fields used
1222 * for ino_t and dev_t. If not the types will have to be changed.
1223 * Return:
1224 * 0 if all ok, -1 otherwise.
1225 */
1226
1227int
1228map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)
1229{
1230 DEVT *pt;
1231 DLIST *dpt;
1232 static dev_t lastdev = 0; /* next device number to try */
1233 int trc_ino = 0;
1234 int trc_dev = 0;
1235 ino_t trunc_bits = 0;
1236 ino_t nino;
1237
1238 if (dtab == NULL((void *)0))
1239 return(0);
1240 /*
1241 * check for device and inode truncation, and extract the truncated
1242 * bit pattern.
1243 */
1244 if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)
1245 ++trc_dev;
1246 if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {
1247 ++trc_ino;
1248 trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);
1249 }
1250
1251 /*
1252 * see if this device is already being mapped, look up the device
1253 * then find the truncation bit pattern which applies
1254 */
1255 if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL((void *)0)) {
1256 /*
1257 * this device is already marked to be remapped
1258 */
1259 for (dpt = pt->list; dpt != NULL((void *)0); dpt = dpt->fow)
1260 if (dpt->trunc_bits == trunc_bits)
1261 break;
1262
1263 if (dpt != NULL((void *)0)) {
1264 /*
1265 * we are being remapped for this device and pattern
1266 * change the device number to be stored and return
1267 */
1268 arcn->sb.st_dev = dpt->dev;
1269 arcn->sb.st_ino = nino;
1270 return(0);
1271 }
1272 } else {
1273 /*
1274 * this device is not being remapped YET. if we do not have any
1275 * form of truncation, we do not need a remap
1276 */
1277 if (!trc_ino && !trc_dev)
1278 return(0);
1279
1280 /*
1281 * we have truncation, have to add this as a device to remap
1282 */
1283 if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL((void *)0))
1284 goto bad;
1285
1286 /*
1287 * if we just have a truncated inode, we have to make sure that
1288 * all future inodes that do not truncate (they have the
1289 * truncation pattern of all 0's) continue to map to the same
1290 * device number. We probably have already written inodes with
1291 * this device number to the archive with the truncation
1292 * pattern of all 0's. So we add the mapping for all 0's to the
1293 * same device number.
1294 */
1295 if (!trc_dev && (trunc_bits != 0)) {
1296 if ((dpt = malloc(sizeof(DLIST))) == NULL((void *)0))
1297 goto bad;
1298 dpt->trunc_bits = 0;
1299 dpt->dev = arcn->sb.st_dev;
1300 dpt->fow = pt->list;
1301 pt->list = dpt;
1302 }
1303 }
1304
1305 /*
1306 * look for a device number not being used. We must watch for wrap
1307 * around on lastdev (so we do not get stuck looking forever!)
1308 */
1309 while (++lastdev > 0) {
1310 if (chk_dev(lastdev, 0) != NULL((void *)0))
1311 continue;
1312 /*
1313 * found an unused value. If we have reached truncation point
1314 * for this format we are hosed, so we give up. Otherwise we
1315 * mark it as being used.
1316 */
1317 if (((lastdev & ((dev_t)dev_mask)) != lastdev) ||
1318 (chk_dev(lastdev, 1) == NULL((void *)0)))
1319 goto bad;
1320 break;
1321 }
1322
1323 if ((lastdev <= 0) || ((dpt = malloc(sizeof(DLIST))) == NULL((void *)0)))
1324 goto bad;
1325
1326 /*
1327 * got a new device number, store it under this truncation pattern.
1328 * change the device number this file is being stored with.
1329 */
1330 dpt->trunc_bits = trunc_bits;
1331 dpt->dev = lastdev;
1332 dpt->fow = pt->list;
1333 pt->list = dpt;
1334 arcn->sb.st_dev = lastdev;
1335 arcn->sb.st_ino = nino;
1336 return(0);
1337
1338 bad:
1339 paxwarn(1, "Unable to fix truncated inode/device field when storing %s",
1340 arcn->name);
1341 paxwarn(0, "Archive may create improper hard links when extracted");
1342 return(0);
1343}
1344#endif /* NOCPIO */
1345
1346/*
1347 * directory access/mod time reset table routines (for directories READ by pax)
1348 *
1349 * The pax -t flag requires that access times of archive files be the same
1350 * before being read by pax. For regular files, access time is restored after
1351 * the file has been copied. This database provides the same functionality for
1352 * directories read during file tree traversal. Restoring directory access time
1353 * is more complex than files since directories may be read several times until
1354 * all the descendants in their subtree are visited by fts. Directory access
1355 * and modification times are stored during the fts pre-order visit (done
1356 * before any descendants in the subtree are visited) and restored after the
1357 * fts post-order visit (after all the descendants have been visited). In the
1358 * case of premature exit from a subtree (like from the effects of -n), any
1359 * directory entries left in this database are reset during final cleanup
1360 * operations of pax. Entries are hashed by inode number for fast lookup.
1361 */
1362
1363/*
1364 * atdir_start()
1365 * create the directory access time database for directories READ by pax.
1366 * Return:
1367 * 0 is created ok, -1 otherwise.
1368 */
1369
1370int
1371atdir_start(void)
1372{
1373 if (atab != NULL((void *)0))
1374 return(0);
1375 if ((atab = calloc(A_TAB_SZ317, sizeof(ATDIR *))) == NULL((void *)0)) {
1376 paxwarn(1,"Cannot allocate space for directory access time table");
1377 return(-1);
1378 }
1379 return(0);
1380}
1381
1382
1383/*
1384 * atdir_end()
1385 * walk through the directory access time table and reset the access time
1386 * of any directory who still has an entry left in the database. These
1387 * entries are for directories READ by pax
1388 */
1389
1390void
1391atdir_end(void)
1392{
1393 ATDIR *pt;
1394 int i;
1395
1396 if (atab == NULL((void *)0))
1397 return;
1398 /*
1399 * for each non-empty hash table entry reset all the directories
1400 * chained there.
1401 */
1402 for (i = 0; i < A_TAB_SZ317; ++i) {
1403 if ((pt = atab[i]) == NULL((void *)0))
1404 continue;
1405 /*
1406 * remember to force the times, set_ftime() looks at pmtime
1407 * and patime, which only applies to things CREATED by pax,
1408 * not read by pax. Read time reset is controlled by -t.
1409 */
1410 for (; pt != NULL((void *)0); pt = pt->fow)
1411 set_attr(&pt->ft, 1, 0, 0, 0);
1412 }
1413}
1414
1415/*
1416 * add_atdir()
1417 * add a directory to the directory access time table. Table is hashed
1418 * and chained by inode number. This is for directories READ by pax
1419 */
1420
1421void
1422add_atdir(char *fname, dev_t dev, ino_t ino, const struct timespec *mtimp,
1423 const struct timespec *atimp)
1424{
1425 ATDIR *pt;
1426 sigset_t allsigs, savedsigs;
1427 u_int indx;
1428
1429 if (atab == NULL((void *)0))
1430 return;
1431
1432 /*
1433 * make sure this directory is not already in the table, if so just
1434 * return (the older entry always has the correct time). The only
1435 * way this will happen is when the same subtree can be traversed by
1436 * different args to pax and the -n option is aborting fts out of a
1437 * subtree before all the post-order visits have been made.
1438 */
1439 indx = ((unsigned)ino) % A_TAB_SZ317;
1440 if ((pt = atab[indx]) != NULL((void *)0)) {
1441 while (pt != NULL((void *)0)) {
1442 if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev))
1443 break;
1444 pt = pt->fow;
1445 }
1446
1447 /*
1448 * oops, already there. Leave it alone.
1449 */
1450 if (pt != NULL((void *)0))
1451 return;
1452 }
1453
1454 /*
1455 * add it to the front of the hash chain
1456 */
1457 sigfillset(&allsigs);
1458 sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs);
1459 if ((pt = malloc(sizeof *pt)) != NULL((void *)0)) {
1460 if ((pt->ft.ft_name = strdup(fname)) != NULL((void *)0)) {
1461 pt->ft.ft_dev = dev;
1462 pt->ft.ft_ino = ino;
1463 pt->ft.ft_mtim = *mtimp;
1464 pt->ft.ft_atim = *atimp;
1465 pt->fow = atab[indx];
1466 atab[indx] = pt;
1467 sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0));
1468 return;
1469 }
1470 free(pt);
1471 }
1472
1473 sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0));
1474 paxwarn(1, "Directory access time reset table ran out of memory");
1475}
1476
1477/*
1478 * get_atdir()
1479 * look up a directory by inode and device number to obtain the access
1480 * and modification time you want to set to. If found, the modification
1481 * and access time parameters are set and the entry is removed from the
1482 * table (as it is no longer needed). These are for directories READ by
1483 * pax
1484 * Return:
1485 * 0 if found, -1 if not found.
1486 */
1487
1488int
1489do_atdir(const char *name, dev_t dev, ino_t ino)
1490{
1491 ATDIR *pt;
1492 ATDIR **ppt;
1493 sigset_t allsigs, savedsigs;
1494 u_int indx;
1495
1496 if (atab == NULL((void *)0))
1497 return(-1);
1498 /*
1499 * hash by inode and search the chain for an inode and device match
1500 */
1501 indx = ((unsigned)ino) % A_TAB_SZ317;
1502 if ((pt = atab[indx]) == NULL((void *)0))
1503 return(-1);
1504
1505 ppt = &(atab[indx]);
1506 while (pt != NULL((void *)0)) {
1507 if ((pt->ft.ft_ino == ino) && (pt->ft.ft_dev == dev))
1508 break;
1509 /*
1510 * no match, go to next one
1511 */
1512 ppt = &(pt->fow);
1513 pt = pt->fow;
1514 }
1515
1516 /*
1517 * return if we did not find it.
1518 */
1519 if (pt == NULL((void *)0) || pt->ft.ft_name == NULL((void *)0) ||
1520 strcmp(name, pt->ft.ft_name) == 0)
1521 return(-1);
1522
1523 /*
1524 * found it. set the times and remove the entry from the table.
1525 */
1526 set_attr(&pt->ft, 1, 0, 0, 0);
1527 sigfillset(&allsigs);
1528 sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs);
1529 *ppt = pt->fow;
1530 sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0));
1531 free(pt->ft.ft_name);
1532 free(pt);
1533 return(0);
1534}
1535
1536/*
1537 * directory access mode and time storage routines (for directories CREATED
1538 * by pax).
1539 *
1540 * Pax requires that extracted directories, by default, have their access/mod
1541 * times and permissions set to the values specified in the archive. During the
1542 * actions of extracting (and creating the destination subtree during -rw copy)
1543 * directories extracted may be modified after being created. Even worse is
1544 * that these directories may have been created with file permissions which
1545 * prohibits any descendants of these directories from being extracted. When
1546 * directories are created by pax, access rights may be added to permit the
1547 * creation of files in their subtree. Every time pax creates a directory, the
1548 * times and file permissions specified by the archive are stored. After all
1549 * files have been extracted (or copied), these directories have their times
1550 * and file modes reset to the stored values. The directory info is restored in
1551 * reverse order as entries were added from root to leaf: to restore atime
1552 * properly, we must go backwards.
1553 */
1554
1555/*
1556 * dir_start()
1557 * set up the directory time and file mode storage for directories CREATED
1558 * by pax.
1559 * Return:
1560 * 0 if ok, -1 otherwise
1561 */
1562
1563int
1564dir_start(void)
1565{
1566 if (dirp != NULL((void *)0))
1567 return(0);
1568
1569 dirsize = DIRP_SIZE64;
1570 if ((dirp = reallocarray(NULL((void *)0), dirsize, sizeof(DIRDATA))) == NULL((void *)0)) {
1571 paxwarn(1, "Unable to allocate memory for directory times");
1572 return(-1);
1573 }
1574 return(0);
1575}
1576
1577/*
1578 * add_dir()
1579 * add the mode and times for a newly CREATED directory
1580 * name is name of the directory, psb the stat buffer with the data in it,
1581 * frc_mode is a flag that says whether to force the setting of the mode
1582 * (ignoring the user set values for preserving file mode). Frc_mode is
1583 * for the case where we created a file and found that the resulting
1584 * directory was not writeable and the user asked for file modes to NOT
1585 * be preserved. (we have to preserve what was created by default, so we
1586 * have to force the setting at the end. this is stated explicitly in the
1587 * pax spec)
1588 */
1589
1590void
1591add_dir(char *name, struct stat *psb, int frc_mode)
1592{
1593 DIRDATA *dblk;
1594 sigset_t allsigs, savedsigs;
1595 char realname[PATH_MAX1024], *rp;
1596
1597 if (dirp == NULL((void *)0))
1598 return;
1599
1600 if (havechd && *name != '/') {
1601 if ((rp = realpath(name, realname)) == NULL((void *)0)) {
1602 paxwarn(1, "Cannot canonicalize %s", name);
1603 return;
1604 }
1605 name = rp;
1606 }
1607 if (dircnt == dirsize) {
1608 dblk = reallocarray(dirp, dirsize * 2, sizeof(DIRDATA));
1609 if (dblk == NULL((void *)0)) {
1610 paxwarn(1, "Unable to store mode and times for created"
1611 " directory: %s", name);
1612 return;
1613 }
1614 sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs);
1615 dirp = dblk;
1616 dirsize *= 2;
1617 sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0));
1618 }
1619 dblk = &dirp[dircnt];
1620 if ((dblk->ft.ft_name = strdup(name)) == NULL((void *)0)) {
1621 paxwarn(1, "Unable to store mode and times for created"
1622 " directory: %s", name);
1623 return;
1624 }
1625 dblk->ft.ft_mtim = psb->st_mtim;
1626 dblk->ft.ft_atim = psb->st_atim;
1627 dblk->ft.ft_ino = psb->st_ino;
1628 dblk->ft.ft_dev = psb->st_dev;
1629 dblk->mode = psb->st_mode & ABITS((0001000 | 0000700 | 0000070 | 0000007) | (0004000 | 0002000
))
;
1630 dblk->frc_mode = frc_mode;
1631 sigprocmask(SIG_BLOCK1, &allsigs, &savedsigs);
1632 ++dircnt;
1633 sigprocmask(SIG_SETMASK3, &savedsigs, NULL((void *)0));
1634}
1635
1636/*
1637 * delete_dir()
1638 * When we rmdir a directory, we may want to make sure we don't
1639 * later warn about being unable to set its mode and times.
1640 */
1641
1642void
1643delete_dir(dev_t dev, ino_t ino)
1644{
1645 DIRDATA *dblk;
1646 char *name;
1647 size_t i;
1648
1649 if (dirp == NULL((void *)0))
1650 return;
1651 for (i = 0; i < dircnt; i++) {
1652 dblk = &dirp[i];
1653
1654 if (dblk->ft.ft_name == NULL((void *)0))
1655 continue;
1656 if (dblk->ft.ft_dev == dev && dblk->ft.ft_ino == ino) {
1657 name = dblk->ft.ft_name;
1658 dblk->ft.ft_name = NULL((void *)0);
1659 free(name);
1660 break;
1661 }
1662 }
1663}
1664
1665/*
1666 * proc_dir(int in_sig)
1667 * process all file modes and times stored for directories CREATED
1668 * by pax. If in_sig is set, we're in a signal handler and can't
1669 * free stuff.
1670 */
1671
1672void
1673proc_dir(int in_sig)
1674{
1675 DIRDATA *dblk;
1676 size_t cnt;
1677
1678 if (dirp == NULL((void *)0))
1679 return;
1680 /*
1681 * read backwards through the file and process each directory
1682 */
1683 cnt = dircnt;
1684 while (cnt-- > 0) {
1685 dblk = &dirp[cnt];
1686 /*
1687 * If we remove a directory we created, we replace the
1688 * ft_name with NULL. Ignore those.
1689 */
1690 if (dblk->ft.ft_name == NULL((void *)0))
1691 continue;
1692
1693 /*
1694 * frc_mode set, make sure we set the file modes even if
1695 * the user didn't ask for it (see file_subs.c for more info)
1696 */
1697 set_attr(&dblk->ft, 0, dblk->mode, pmode || dblk->frc_mode,
1698 in_sig);
1699 if (!in_sig)
1700 free(dblk->ft.ft_name);
1701 }
1702
1703 if (!in_sig)
1704 free(dirp);
1705 dirp = NULL((void *)0);
1706 dircnt = 0;
1707}
1708
1709/*
1710 * database independent routines
1711 */
1712
1713/*
1714 * st_hash()
1715 * hashes filenames to a u_int for hashing into a table. Looks at the tail
1716 * end of file, as this provides far better distribution than any other
1717 * part of the name. For performance reasons we only care about the last
1718 * MAXKEYLEN chars (should be at LEAST large enough to pick off the file
1719 * name). Was tested on 500,000 name file tree traversal from the root
1720 * and gave almost a perfectly uniform distribution of keys when used with
1721 * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)
1722 * chars at a time and pads with 0 for last addition.
1723 * Return:
1724 * the hash value of the string MOD (%) the table size.
1725 */
1726
1727static u_int
1728st_hash(const char *name, int len, int tabsz)
1729{
1730 const char *pt;
1731 char *dest;
1732 const char *end;
1733 int i;
1734 u_int key = 0;
1735 int steps;
1736 int res;
1737 u_int val;
4
'val' declared without an initial value
1738
1739 /*
1740 * only look at the tail up to MAXKEYLEN, we do not need to waste
1741 * time here (remember these are pathnames, the tail is what will
1742 * spread out the keys)
1743 */
1744 if (len > MAXKEYLEN64) {
5
Assuming 'len' is > MAXKEYLEN
6
Taking true branch
1745 pt = &(name[len - MAXKEYLEN64]);
1746 len = MAXKEYLEN64;
1747 } else
1748 pt = name;
1749
1750 /*
1751 * calculate the number of u_int size steps in the string and if
1752 * there is a runt to deal with
1753 */
1754 steps = len/sizeof(u_int);
1755 res = len % sizeof(u_int);
1756
1757 /*
1758 * add up the value of the string in unsigned integer sized pieces
1759 * too bad we cannot have unsigned int aligned strings, then we
1760 * could avoid the expensive copy.
1761 */
1762 for (i = 0; i < steps; ++i) {
7
Loop condition is true. Entering loop body
1763 end = pt + sizeof(u_int);
1764 dest = (char *)&val;
1765 while (pt < end)
8
Assuming 'pt' is >= 'end'
9
Loop condition is false. Execution continues on line 1767
1766 *dest++ = *pt++;
1767 key += val;
10
Assigned value is garbage or undefined
1768 }
1769
1770 /*
1771 * add in the runt padded with zero to the right
1772 */
1773 if (res) {
1774 val = 0;
1775 end = pt + res;
1776 dest = (char *)&val;
1777 while (pt < end)
1778 *dest++ = *pt++;
1779 key += val;
1780 }
1781
1782 /*
1783 * return the result mod the table size
1784 */
1785 return(key % tabsz);
1786}