Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: trunk/minix/commands/pax/tables.c@ 15

Last change on this file since 15 was 9, checked in by Mattia Monga, 14 years ago
Minix 3.1.2a
File size: 34.9 KB

Rev	Line
[9]	1	/*-
	2	* Copyright (c) 1992 Keith Muller.
	3	* Copyright (c) 1992, 1993
	4	* The Regents of the University of California. All rights reserved.
	5	*
	6	* This code is derived from software contributed to Berkeley by
	7	* Keith Muller of the University of California, San Diego.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	* 1. Redistributions of source code must retain the above copyright
	13	* notice, this list of conditions and the following disclaimer.
	14	* 2. Redistributions in binary form must reproduce the above copyright
	15	* notice, this list of conditions and the following disclaimer in the
	16	* documentation and/or other materials provided with the distribution.
	17	* 4. Neither the name of the University nor the names of its contributors
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	22	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	25	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	27	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	28	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	29	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	30	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	31	* SUCH DAMAGE.
	32	*/
	33
	34	#ifndef lint
	35	#if 0
	36	static char sccsid[] = "@(#)tables.c 8.1 (Berkeley) 5/31/93";
	37	#endif
	38	#endif /* not lint */
	39
	40	#include <sys/types.h>
	41	#include <sys/time.h>
	42	#include <sys/stat.h>
	43	#include <fcntl.h>
	44	#include <errno.h>
	45	#include <stdio.h>
	46	#include <stdlib.h>
	47	#include <string.h>
	48	#include <unistd.h>
	49	#include "pax.h"
	50	#include "tables.h"
	51	#include "extern.h"
	52
	53	/*
	54	* Routines for controlling the contents of all the different databases pax
	55	* keeps. Tables are dynamically created only when they are needed. The
	56	* goal was speed and the ability to work with HUGE archives. The databases
	57	* were kept simple, but do have complex rules for when the contents change.
	58	* As of this writing, the POSIX library functions were more complex than
	59	* needed for this application (pax databases have very short lifetimes and
	60	* do not survive after pax is finished). Pax is required to handle very
	61	* large archives. These database routines carefully combine memory usage and
	62	* temporary file storage in ways which will not significantly impact runtime
	63	* performance while allowing the largest possible archives to be handled.
	64	* Trying to force the fit to the POSIX databases routines was not considered
	65	* time well spent.
	66	*/
	67
	68	static HRDLNK *ltab = NULL; / hard link table for detecting hard links */
	69	static FTM *ftab = NULL; / file time table for updating arch */
	70	static NAMT *ntab = NULL; / interactive rename storage table */
	71	static DEVT *dtab = NULL; / device/inode mapping tables */
	72	static ATDIR *atab = NULL; / file tree directory time reset table */
	73	static int dirfd = -1; /* storage for setting created dir time/mode */
	74	static u_long dircnt; /* entries in dir time/mode storage */
	75	static int ffd = -1; /* tmp file for file time table name storage */
	76
	77	static DEVT *chk_dev(dev_t, int);
	78
	79	/*
	80	* hard link table routines
	81	*
	82	* The hard link table tries to detect hard links to files using the device and
	83	* inode values. We do this when writing an archive, so we can tell the format
	84	* write routine that this file is a hard link to another file. The format
	85	* write routine then can store this file in whatever way it wants (as a hard
	86	* link if the format supports that like tar, or ignore this info like cpio).
	87	* (Actually a field in the format driver table tells us if the format wants
	88	* hard link info. if not, we do not waste time looking for them). We also use
	89	* the same table when reading an archive. In that situation, this table is
	90	* used by the format read routine to detect hard links from stored dev and
	91	* inode numbers (like cpio). This will allow pax to create a link when one
	92	* can be detected by the archive format.
	93	*/
	94
	95	/*
	96	* lnk_start
	97	* Creates the hard link table.
	98	* Return:
	99	* 0 if created, -1 if failure
	100	*/
	101
	102	int
	103	lnk_start(void)
	104	{
	105	if (ltab != NULL)
	106	return(0);
	107	if ((ltab = (HRDLNK *)calloc(L_TAB_SZ, sizeof(HRDLNK ))) == NULL) {
	108	paxwarn(1, "Cannot allocate memory for hard link table");
	109	return(-1);
	110	}
	111	return(0);
	112	}
	113
	114	/*
	115	* chk_lnk()
	116	* Looks up entry in hard link hash table. If found, it copies the name
	117	* of the file it is linked to (we already saw that file) into ln_name.
	118	* lnkcnt is decremented and if goes to 1 the node is deleted from the
	119	* database. (We have seen all the links to this file). If not found,
	120	* we add the file to the database if it has the potential for having
	121	* hard links to other files we may process (it has a link count > 1)
	122	* Return:
	123	* if found returns 1; if not found returns 0; -1 on error
	124	*/
	125
	126	int
	127	chk_lnk(ARCHD *arcn)
	128	{
	129	HRDLNK *pt;
	130	HRDLNK **ppt;
	131	u_int indx;
	132
	133	if (ltab == NULL)
	134	return(-1);
	135	/*
	136	* ignore those nodes that cannot have hard links
	137	*/
	138	if ((arcn->type == PAX_DIR) \|\| (arcn->sb.st_nlink <= 1))
	139	return(0);
	140
	141	/*
	142	* hash inode number and look for this file
	143	*/
	144	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
	145	if ((pt = ltab[indx]) != NULL) {
	146	/*
	147	* it's hash chain in not empty, walk down looking for it
	148	*/
	149	ppt = &(ltab[indx]);
	150	while (pt != NULL) {
	151	if ((pt->ino == arcn->sb.st_ino) &&
	152	(pt->dev == arcn->sb.st_dev))
	153	break;
	154	ppt = &(pt->fow);
	155	pt = pt->fow;
	156	}
	157
	158	if (pt != NULL) {
	159	/*
	160	* found a link. set the node type and copy in the
	161	* name of the file it is to link to. we need to
	162	* handle hardlinks to regular files differently than
	163	* other links.
	164	*/
	165	arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name,
	166	sizeof(arcn->ln_name) - 1);
	167	arcn->ln_name[arcn->ln_nlen] = '\0';
	168	if (arcn->type == PAX_REG)
	169	arcn->type = PAX_HRG;
	170	else
	171	arcn->type = PAX_HLK;
	172
	173	/*
	174	* if we have found all the links to this file, remove
	175	* it from the database
	176	*/
	177	if (--pt->nlink <= 1) {
	178	*ppt = pt->fow;
	179	(void)free((char *)pt->name);
	180	(void)free((char *)pt);
	181	}
	182	return(1);
	183	}
	184	}
	185
	186	/*
	187	* we never saw this file before. It has links so we add it to the
	188	* front of this hash chain
	189	*/
	190	if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) {
	191	if ((pt->name = strdup(arcn->name)) != NULL) {
	192	pt->dev = arcn->sb.st_dev;
	193	pt->ino = arcn->sb.st_ino;
	194	pt->nlink = arcn->sb.st_nlink;
	195	pt->fow = ltab[indx];
	196	ltab[indx] = pt;
	197	return(0);
	198	}
	199	(void)free((char *)pt);
	200	}
	201
	202	paxwarn(1, "Hard link table out of memory");
	203	return(-1);
	204	}
	205
	206	/*
	207	* purg_lnk
	208	* remove reference for a file that we may have added to the data base as
	209	* a potential source for hard links. We ended up not using the file, so
	210	* we do not want to accidently point another file at it later on.
	211	*/
	212
	213	void
	214	purg_lnk(ARCHD *arcn)
	215	{
	216	HRDLNK *pt;
	217	HRDLNK **ppt;
	218	u_int indx;
	219
	220	if (ltab == NULL)
	221	return;
	222	/*
	223	* do not bother to look if it could not be in the database
	224	*/
	225	if ((arcn->sb.st_nlink <= 1) \|\| (arcn->type == PAX_DIR) \|\|
	226	(arcn->type == PAX_HLK) \|\| (arcn->type == PAX_HRG))
	227	return;
	228
	229	/*
	230	* find the hash chain for this inode value, if empty return
	231	*/
	232	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
	233	if ((pt = ltab[indx]) == NULL)
	234	return;
	235
	236	/*
	237	* walk down the list looking for the inode/dev pair, unlink and
	238	* free if found
	239	*/
	240	ppt = &(ltab[indx]);
	241	while (pt != NULL) {
	242	if ((pt->ino == arcn->sb.st_ino) &&
	243	(pt->dev == arcn->sb.st_dev))
	244	break;
	245	ppt = &(pt->fow);
	246	pt = pt->fow;
	247	}
	248	if (pt == NULL)
	249	return;
	250
	251	/*
	252	* remove and free it
	253	*/
	254	*ppt = pt->fow;
	255	(void)free((char *)pt->name);
	256	(void)free((char *)pt);
	257	}
	258
	259	/*
	260	* lnk_end()
	261	* Pull apart an existing link table so we can reuse it. We do this between
	262	* read and write phases of append with update. (The format may have
	263	* used the link table, and we need to start with a fresh table for the
	264	* write phase).
	265	*/
	266
	267	void
	268	lnk_end(void)
	269	{
	270	int i;
	271	HRDLNK *pt;
	272	HRDLNK *ppt;
	273
	274	if (ltab == NULL)
	275	return;
	276
	277	for (i = 0; i < L_TAB_SZ; ++i) {
	278	if (ltab[i] == NULL)
	279	continue;
	280	pt = ltab[i];
	281	ltab[i] = NULL;
	282
	283	/*
	284	* free up each entry on this chain
	285	*/
	286	while (pt != NULL) {
	287	ppt = pt;
	288	pt = ppt->fow;
	289	(void)free((char *)ppt->name);
	290	(void)free((char *)ppt);
	291	}
	292	}
	293	return;
	294	}
	295
	296	/*
	297	* modification time table routines
	298	*
	299	* The modification time table keeps track of last modification times for all
	300	* files stored in an archive during a write phase when -u is set. We only
	301	* add a file to the archive if it is newer than a file with the same name
	302	* already stored on the archive (if there is no other file with the same
	303	* name on the archive it is added). This applies to writes and appends.
	304	* An append with an -u must read the archive and store the modification time
	305	* for every file on that archive before starting the write phase. It is clear
	306	* that this is one HUGE database. To save memory space, the actual file names
	307	* are stored in a scatch file and indexed by an in memory hash table. The
	308	* hash table is indexed by hashing the file path. The nodes in the table store
	309	* the length of the filename and the lseek offset within the scratch file
	310	* where the actual name is stored. Since there are never any deletions to this
	311	* table, fragmentation of the scratch file is never an issue. Lookups seem to
	312	* not exhibit any locality at all (files in the database are rarely
	313	* looked up more than once...). So caching is just a waste of memory. The
	314	* only limitation is the amount of scatch file space available to store the
	315	* path names.
	316	*/
	317
	318	/*
	319	* ftime_start()
	320	* create the file time hash table and open for read/write the scratch
	321	* file. (after created it is unlinked, so when we exit we leave
	322	* no witnesses).
	323	* Return:
	324	* 0 if the table and file was created ok, -1 otherwise
	325	*/
	326
	327	int
	328	ftime_start(void)
	329	{
	330
	331	if (ftab != NULL)
	332	return(0);
	333	if ((ftab = (FTM *)calloc(F_TAB_SZ, sizeof(FTM ))) == NULL) {
	334	paxwarn(1, "Cannot allocate memory for file time table");
	335	return(-1);
	336	}
	337
	338	/*
	339	* get random name and create temporary scratch file, unlink name
	340	* so it will get removed on exit
	341	*/
	342	memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
	343	if ((ffd = mkstemp(tempfile)) < 0) {
	344	syswarn(1, errno, "Unable to create temporary file: %s",
	345	tempfile);
	346	return(-1);
	347	}
	348	(void)unlink(tempfile);
	349
	350	return(0);
	351	}
	352
	353	/*
	354	* chk_ftime()
	355	* looks up entry in file time hash table. If not found, the file is
	356	* added to the hash table and the file named stored in the scratch file.
	357	* If a file with the same name is found, the file times are compared and
	358	* the most recent file time is retained. If the new file was younger (or
	359	* was not in the database) the new file is selected for storage.
	360	* Return:
	361	* 0 if file should be added to the archive, 1 if it should be skipped,
	362	* -1 on error
	363	*/
	364
	365	int
	366	chk_ftime(ARCHD *arcn)
	367	{
	368	FTM *pt;
	369	int namelen;
	370	u_int indx;
	371	char ckname[PAXPATHLEN+1];
	372
	373	/*
	374	* no info, go ahead and add to archive
	375	*/
	376	if (ftab == NULL)
	377	return(0);
	378
	379	/*
	380	* hash the pathname and look up in table
	381	*/
	382	namelen = arcn->nlen;
	383	indx = st_hash(arcn->name, namelen, F_TAB_SZ);
	384	if ((pt = ftab[indx]) != NULL) {
	385	/*
	386	* the hash chain is not empty, walk down looking for match
	387	* only read up the path names if the lengths match, speeds
	388	* up the search a lot
	389	*/
	390	while (pt != NULL) {
	391	if (pt->namelen == namelen) {
	392	/*
	393	* potential match, have to read the name
	394	* from the scratch file.
	395	*/
	396	if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) {
	397	syswarn(1, errno,
	398	"Failed ftime table seek");
	399	return(-1);
	400	}
	401	if (read(ffd, ckname, namelen) != namelen) {
	402	syswarn(1, errno,
	403	"Failed ftime table read");
	404	return(-1);
	405	}
	406
	407	/*
	408	* if the names match, we are done
	409	*/
	410	if (!strncmp(ckname, arcn->name, namelen))
	411	break;
	412	}
	413
	414	/*
	415	* try the next entry on the chain
	416	*/
	417	pt = pt->fow;
	418	}
	419
	420	if (pt != NULL) {
	421	/*
	422	* found the file, compare the times, save the newer
	423	*/
	424	if (arcn->sb.st_mtime > pt->mtime) {
	425	/*
	426	* file is newer
	427	*/
	428	pt->mtime = arcn->sb.st_mtime;
	429	return(0);
	430	}
	431	/*
	432	* file is older
	433	*/
	434	return(1);
	435	}
	436	}
	437
	438	/*
	439	* not in table, add it
	440	*/
	441	if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) {
	442	/*
	443	* add the name at the end of the scratch file, saving the
	444	* offset. add the file to the head of the hash chain
	445	*/
	446	if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) {
	447	if (write(ffd, arcn->name, namelen) == namelen) {
	448	pt->mtime = arcn->sb.st_mtime;
	449	pt->namelen = namelen;
	450	pt->fow = ftab[indx];
	451	ftab[indx] = pt;
	452	return(0);
	453	}
	454	syswarn(1, errno, "Failed write to file time table");
	455	} else
	456	syswarn(1, errno, "Failed seek on file time table");
	457	} else
	458	paxwarn(1, "File time table ran out of memory");
	459
	460	if (pt != NULL)
	461	(void)free((char *)pt);
	462	return(-1);
	463	}
	464
	465	/*
	466	* Interactive rename table routines
	467	*
	468	* The interactive rename table keeps track of the new names that the user
	469	* assigns to files from tty input. Since this map is unique for each file
	470	* we must store it in case there is a reference to the file later in archive
	471	* (a link). Otherwise we will be unable to find the file we know was
	472	* extracted. The remapping of these files is stored in a memory based hash
	473	* table (it is assumed since input must come from /dev/tty, it is unlikely to
	474	* be a very large table).
	475	*/
	476
	477	/*
	478	* name_start()
	479	* create the interactive rename table
	480	* Return:
	481	* 0 if successful, -1 otherwise
	482	*/
	483
	484	int
	485	name_start(void)
	486	{
	487	if (ntab != NULL)
	488	return(0);
	489	if ((ntab = (NAMT *)calloc(N_TAB_SZ, sizeof(NAMT ))) == NULL) {
	490	paxwarn(1, "Cannot allocate memory for interactive rename table");
	491	return(-1);
	492	}
	493	return(0);
	494	}
	495
	496	/*
	497	* add_name()
	498	* add the new name to old name mapping just created by the user.
	499	* If an old name mapping is found (there may be duplicate names on an
	500	* archive) only the most recent is kept.
	501	* Return:
	502	* 0 if added, -1 otherwise
	503	*/
	504
	505	int
	506	add_name(char oname, int onamelen, char nname)
	507	{
	508	NAMT *pt;
	509	u_int indx;
	510
	511	if (ntab == NULL) {
	512	/*
	513	* should never happen
	514	*/
	515	paxwarn(0, "No interactive rename table, links may fail\n");
	516	return(0);
	517	}
	518
	519	/*
	520	* look to see if we have already mapped this file, if so we
	521	* will update it
	522	*/
	523	indx = st_hash(oname, onamelen, N_TAB_SZ);
	524	if ((pt = ntab[indx]) != NULL) {
	525	/*
	526	* look down the has chain for the file
	527	*/
	528	while ((pt != NULL) && (strcmp(oname, pt->oname) != 0))
	529	pt = pt->fow;
	530
	531	if (pt != NULL) {
	532	/*
	533	* found an old mapping, replace it with the new one
	534	* the user just input (if it is different)
	535	*/
	536	if (strcmp(nname, pt->nname) == 0)
	537	return(0);
	538
	539	(void)free((char *)pt->nname);
	540	if ((pt->nname = strdup(nname)) == NULL) {
	541	paxwarn(1, "Cannot update rename table");
	542	return(-1);
	543	}
	544	return(0);
	545	}
	546	}
	547
	548	/*
	549	* this is a new mapping, add it to the table
	550	*/
	551	if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) {
	552	if ((pt->oname = strdup(oname)) != NULL) {
	553	if ((pt->nname = strdup(nname)) != NULL) {
	554	pt->fow = ntab[indx];
	555	ntab[indx] = pt;
	556	return(0);
	557	}
	558	(void)free((char *)pt->oname);
	559	}
	560	(void)free((char *)pt);
	561	}
	562	paxwarn(1, "Interactive rename table out of memory");
	563	return(-1);
	564	}
	565
	566	/*
	567	* sub_name()
	568	* look up a link name to see if it points at a file that has been
	569	* remapped by the user. If found, the link is adjusted to contain the
	570	* new name (oname is the link to name)
	571	*/
	572
	573	void
	574	sub_name(char oname, int onamelen, size_t onamesize)
	575	{
	576	NAMT *pt;
	577	u_int indx;
	578
	579	if (ntab == NULL)
	580	return;
	581	/*
	582	* look the name up in the hash table
	583	*/
	584	indx = st_hash(oname, *onamelen, N_TAB_SZ);
	585	if ((pt = ntab[indx]) == NULL)
	586	return;
	587
	588	while (pt != NULL) {
	589	/*
	590	* walk down the hash chain looking for a match
	591	*/
	592	if (strcmp(oname, pt->oname) == 0) {
	593	/*
	594	* found it, replace it with the new name
	595	* and return (we know that oname has enough space)
	596	*/
	597	*onamelen = l_strncpy(oname, pt->nname, onamesize - 1);
	598	oname[*onamelen] = '\0';
	599	return;
	600	}
	601	pt = pt->fow;
	602	}
	603
	604	/*
	605	* no match, just return
	606	*/
	607	return;
	608	}
	609
	610	/*
	611	* device/inode mapping table routines
	612	* (used with formats that store device and inodes fields)
	613	*
	614	* device/inode mapping tables remap the device field in an archive header. The
	615	* device/inode fields are used to determine when files are hard links to each
	616	* other. However these values have very little meaning outside of that. This
	617	* database is used to solve one of two different problems.
	618	*
	619	* 1) when files are appended to an archive, while the new files may have hard
	620	* links to each other, you cannot determine if they have hard links to any
	621	* file already stored on the archive from a prior run of pax. We must assume
	622	* that these inode/device pairs are unique only within a SINGLE run of pax
	623	* (which adds a set of files to an archive). So we have to make sure the
	624	* inode/dev pairs we add each time are always unique. We do this by observing
	625	* while the inode field is very dense, the use of the dev field is fairly
	626	* sparse. Within each run of pax, we remap any device number of a new archive
	627	* member that has a device number used in a prior run and already stored in a
	628	* file on the archive. During the read phase of the append, we store the
	629	* device numbers used and mark them to not be used by any file during the
	630	* write phase. If during write we go to use one of those old device numbers,
	631	* we remap it to a new value.
	632	*
	633	* 2) Often the fields in the archive header used to store these values are
	634	* too small to store the entire value. The result is an inode or device value
	635	* which can be truncated. This really can foul up an archive. With truncation
	636	* we end up creating links between files that are really not links (after
	637	* truncation the inodes are the same value). We address that by detecting
	638	* truncation and forcing a remap of the device field to split truncated
	639	* inodes away from each other. Each truncation creates a pattern of bits that
	640	* are removed. We use this pattern of truncated bits to partition the inodes
	641	* on a single device to many different devices (each one represented by the
	642	* truncated bit pattern). All inodes on the same device that have the same
	643	* truncation pattern are mapped to the same new device. Two inodes that
	644	* truncate to the same value clearly will always have different truncation
	645	* bit patterns, so they will be split from away each other. When we spot
	646	* device truncation we remap the device number to a non truncated value.
	647	* (for more info see table.h for the data structures involved).
	648	*/
	649
	650	/*
	651	* dev_start()
	652	* create the device mapping table
	653	* Return:
	654	* 0 if successful, -1 otherwise
	655	*/
	656
	657	int
	658	dev_start(void)
	659	{
	660	if (dtab != NULL)
	661	return(0);
	662	if ((dtab = (DEVT *)calloc(D_TAB_SZ, sizeof(DEVT ))) == NULL) {
	663	paxwarn(1, "Cannot allocate memory for device mapping table");
	664	return(-1);
	665	}
	666	return(0);
	667	}
	668
	669	/*
	670	* add_dev()
	671	* add a device number to the table. this will force the device to be
	672	* remapped to a new value if it be used during a write phase. This
	673	* function is called during the read phase of an append to prohibit the
	674	* use of any device number already in the archive.
	675	* Return:
	676	* 0 if added ok, -1 otherwise
	677	*/
	678
	679	int
	680	add_dev(ARCHD *arcn)
	681	{
	682	if (chk_dev(arcn->sb.st_dev, 1) == NULL)
	683	return(-1);
	684	return(0);
	685	}
	686
	687	/*
	688	* chk_dev()
	689	* check for a device value in the device table. If not found and the add
	690	* flag is set, it is added. This does NOT assign any mapping values, just
	691	* adds the device number as one that need to be remapped. If this device
	692	* is already mapped, just return with a pointer to that entry.
	693	* Return:
	694	* pointer to the entry for this device in the device map table. Null
	695	* if the add flag is not set and the device is not in the table (it is
	696	* not been seen yet). If add is set and the device cannot be added, null
	697	* is returned (indicates an error).
	698	*/
	699
	700	static DEVT *
	701	chk_dev(dev_t dev, int add)
	702	{
	703	DEVT *pt;
	704	u_int indx;
	705
	706	if (dtab == NULL)
	707	return(NULL);
	708	/*
	709	* look to see if this device is already in the table
	710	*/
	711	indx = ((unsigned)dev) % D_TAB_SZ;
	712	if ((pt = dtab[indx]) != NULL) {
	713	while ((pt != NULL) && (pt->dev != dev))
	714	pt = pt->fow;
	715
	716	/*
	717	* found it, return a pointer to it
	718	*/
	719	if (pt != NULL)
	720	return(pt);
	721	}
	722
	723	/*
	724	* not in table, we add it only if told to as this may just be a check
	725	* to see if a device number is being used.
	726	*/
	727	if (add == 0)
	728	return(NULL);
	729
	730	/*
	731	* allocate a node for this device and add it to the front of the hash
	732	* chain. Note we do not assign remaps values here, so the pt->list
	733	* list must be NULL.
	734	*/
	735	if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) {
	736	paxwarn(1, "Device map table out of memory");
	737	return(NULL);
	738	}
	739	pt->dev = dev;
	740	pt->list = NULL;
	741	pt->fow = dtab[indx];
	742	dtab[indx] = pt;
	743	return(pt);
	744	}
	745	/*
	746	* map_dev()
	747	* given an inode and device storage mask (the mask has a 1 for each bit
	748	* the archive format is able to store in a header), we check for inode
	749	* and device truncation and remap the device as required. Device mapping
	750	* can also occur when during the read phase of append a device number was
	751	* seen (and was marked as do not use during the write phase). WE ASSUME
	752	* that unsigned longs are the same size or bigger than the fields used
	753	* for ino_t and dev_t. If not the types will have to be changed.
	754	* Return:
	755	* 0 if all ok, -1 otherwise.
	756	*/
	757
	758	int
	759	map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)
	760	{
	761	DEVT *pt;
	762	DLIST *dpt;
	763	static dev_t lastdev = 0; /* next device number to try */
	764	int trc_ino = 0;
	765	int trc_dev = 0;
	766	ino_t trunc_bits = 0;
	767	ino_t nino;
	768
	769	if (dtab == NULL)
	770	return(0);
	771	/*
	772	* check for device and inode truncation, and extract the truncated
	773	* bit pattern.
	774	*/
	775	if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)
	776	++trc_dev;
	777	if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {
	778	++trc_ino;
	779	trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);
	780	}
	781
	782	/*
	783	* see if this device is already being mapped, look up the device
	784	* then find the truncation bit pattern which applies
	785	*/
	786	if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) {
	787	/*
	788	* this device is already marked to be remapped
	789	*/
	790	for (dpt = pt->list; dpt != NULL; dpt = dpt->fow)
	791	if (dpt->trunc_bits == trunc_bits)
	792	break;
	793
	794	if (dpt != NULL) {
	795	/*
	796	* we are being remapped for this device and pattern
	797	* change the device number to be stored and return
	798	*/
	799	arcn->sb.st_dev = dpt->dev;
	800	arcn->sb.st_ino = nino;
	801	return(0);
	802	}
	803	} else {
	804	/*
	805	* this device is not being remapped YET. if we do not have any
	806	* form of truncation, we do not need a remap
	807	*/
	808	if (!trc_ino && !trc_dev)
	809	return(0);
	810
	811	/*
	812	* we have truncation, have to add this as a device to remap
	813	*/
	814	if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL)
	815	goto bad;
	816
	817	/*
	818	* if we just have a truncated inode, we have to make sure that
	819	* all future inodes that do not truncate (they have the
	820	* truncation pattern of all 0's) continue to map to the same
	821	* device number. We probably have already written inodes with
	822	* this device number to the archive with the truncation
	823	* pattern of all 0's. So we add the mapping for all 0's to the
	824	* same device number.
	825	*/
	826	if (!trc_dev && (trunc_bits != 0)) {
	827	if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)
	828	goto bad;
	829	dpt->trunc_bits = 0;
	830	dpt->dev = arcn->sb.st_dev;
	831	dpt->fow = pt->list;
	832	pt->list = dpt;
	833	}
	834	}
	835
	836	/*
	837	* look for a device number not being used. We must watch for wrap
	838	* around on lastdev (so we do not get stuck looking forever!)
	839	*/
	840	while (++lastdev > 0) {
	841	if (chk_dev(lastdev, 0) != NULL)
	842	continue;
	843	/*
	844	* found an unused value. If we have reached truncation point
	845	* for this format we are hosed, so we give up. Otherwise we
	846	* mark it as being used.
	847	*/
	848	if (((lastdev & ((dev_t)dev_mask)) != lastdev) \|\|
	849	(chk_dev(lastdev, 1) == NULL))
	850	goto bad;
	851	break;
	852	}
	853
	854	if ((lastdev <= 0) \|\| ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL))
	855	goto bad;
	856
	857	/*
	858	* got a new device number, store it under this truncation pattern.
	859	* change the device number this file is being stored with.
	860	*/
	861	dpt->trunc_bits = trunc_bits;
	862	dpt->dev = lastdev;
	863	dpt->fow = pt->list;
	864	pt->list = dpt;
	865	arcn->sb.st_dev = lastdev;
	866	arcn->sb.st_ino = nino;
	867	return(0);
	868
	869	bad:
	870	paxwarn(1, "Unable to fix truncated inode/device field when storing %s",
	871	arcn->name);
	872	paxwarn(0, "Archive may create improper hard links when extracted");
	873	return(0);
	874	}
	875
	876	/*
	877	* directory access/mod time reset table routines (for directories READ by pax)
	878	*
	879	* The pax -t flag requires that access times of archive files to be the same
	880	* before being read by pax. For regular files, access time is restored after
	881	* the file has been copied. This database provides the same functionality for
	882	* directories read during file tree traversal. Restoring directory access time
	883	* is more complex than files since directories may be read several times until
	884	* all the descendants in their subtree are visited by fts. Directory access
	885	* and modification times are stored during the fts pre-order visit (done
	886	* before any descendants in the subtree is visited) and restored after the
	887	* fts post-order visit (after all the descendants have been visited). In the
	888	* case of premature exit from a subtree (like from the effects of -n), any
	889	* directory entries left in this database are reset during final cleanup
	890	* operations of pax. Entries are hashed by inode number for fast lookup.
	891	*/
	892
	893	/*
	894	* atdir_start()
	895	* create the directory access time database for directories READ by pax.
	896	* Return:
	897	* 0 is created ok, -1 otherwise.
	898	*/
	899
	900	int
	901	atdir_start(void)
	902	{
	903	if (atab != NULL)
	904	return(0);
	905	if ((atab = (ATDIR *)calloc(A_TAB_SZ, sizeof(ATDIR ))) == NULL) {
	906	paxwarn(1,"Cannot allocate space for directory access time table");
	907	return(-1);
	908	}
	909	return(0);
	910	}
	911
	912
	913	/*
	914	* atdir_end()
	915	* walk through the directory access time table and reset the access time
	916	* of any directory who still has an entry left in the database. These
	917	* entries are for directories READ by pax
	918	*/
	919
	920	void
	921	atdir_end(void)
	922	{
	923	ATDIR *pt;
	924	int i;
	925
	926	if (atab == NULL)
	927	return;
	928	/*
	929	* for each non-empty hash table entry reset all the directories
	930	* chained there.
	931	*/
	932	for (i = 0; i < A_TAB_SZ; ++i) {
	933	if ((pt = atab[i]) == NULL)
	934	continue;
	935	/*
	936	* remember to force the times, set_ftime() looks at pmtime
	937	* and patime, which only applies to things CREATED by pax,
	938	* not read by pax. Read time reset is controlled by -t.
	939	*/
	940	for (; pt != NULL; pt = pt->fow)
	941	set_ftime(pt->name, pt->mtime, pt->atime, 1);
	942	}
	943	}
	944
	945	/*
	946	* add_atdir()
	947	* add a directory to the directory access time table. Table is hashed
	948	* and chained by inode number. This is for directories READ by pax
	949	*/
	950
	951	void
	952	add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime)
	953	{
	954	ATDIR *pt;
	955	u_int indx;
	956
	957	if (atab == NULL)
	958	return;
	959
	960	/*
	961	* make sure this directory is not already in the table, if so just
	962	* return (the older entry always has the correct time). The only
	963	* way this will happen is when the same subtree can be traversed by
	964	* different args to pax and the -n option is aborting fts out of a
	965	* subtree before all the post-order visits have been made).
	966	*/
	967	indx = ((unsigned)ino) % A_TAB_SZ;
	968	if ((pt = atab[indx]) != NULL) {
	969	while (pt != NULL) {
	970	if ((pt->ino == ino) && (pt->dev == dev))
	971	break;
	972	pt = pt->fow;
	973	}
	974
	975	/*
	976	* oops, already there. Leave it alone.
	977	*/
	978	if (pt != NULL)
	979	return;
	980	}
	981
	982	/*
	983	* add it to the front of the hash chain
	984	*/
	985	if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) {
	986	if ((pt->name = strdup(fname)) != NULL) {
	987	pt->dev = dev;
	988	pt->ino = ino;
	989	pt->mtime = mtime;
	990	pt->atime = atime;
	991	pt->fow = atab[indx];
	992	atab[indx] = pt;
	993	return;
	994	}
	995	(void)free((char *)pt);
	996	}
	997
	998	paxwarn(1, "Directory access time reset table ran out of memory");
	999	return;
	1000	}
	1001
	1002	/*
	1003	* get_atdir()
	1004	* look up a directory by inode and device number to obtain the access
	1005	* and modification time you want to set to. If found, the modification
	1006	* and access time parameters are set and the entry is removed from the
	1007	* table (as it is no longer needed). These are for directories READ by
	1008	* pax
	1009	* Return:
	1010	* 0 if found, -1 if not found.
	1011	*/
	1012
	1013	int
	1014	get_atdir(dev_t dev, ino_t ino, time_t mtime, time_t atime)
	1015	{
	1016	ATDIR *pt;
	1017	ATDIR **ppt;
	1018	u_int indx;
	1019
	1020	if (atab == NULL)
	1021	return(-1);
	1022	/*
	1023	* hash by inode and search the chain for an inode and device match
	1024	*/
	1025	indx = ((unsigned)ino) % A_TAB_SZ;
	1026	if ((pt = atab[indx]) == NULL)
	1027	return(-1);
	1028
	1029	ppt = &(atab[indx]);
	1030	while (pt != NULL) {
	1031	if ((pt->ino == ino) && (pt->dev == dev))
	1032	break;
	1033	/*
	1034	* no match, go to next one
	1035	*/
	1036	ppt = &(pt->fow);
	1037	pt = pt->fow;
	1038	}
	1039
	1040	/*
	1041	* return if we did not find it.
	1042	*/
	1043	if (pt == NULL)
	1044	return(-1);
	1045
	1046	/*
	1047	* found it. return the times and remove the entry from the table.
	1048	*/
	1049	*ppt = pt->fow;
	1050	*mtime = pt->mtime;
	1051	*atime = pt->atime;
	1052	(void)free((char *)pt->name);
	1053	(void)free((char *)pt);
	1054	return(0);
	1055	}
	1056
	1057	/*
	1058	* directory access mode and time storage routines (for directories CREATED
	1059	* by pax).
	1060	*
	1061	* Pax requires that extracted directories, by default, have their access/mod
	1062	* times and permissions set to the values specified in the archive. During the
	1063	* actions of extracting (and creating the destination subtree during -rw copy)
	1064	* directories extracted may be modified after being created. Even worse is
	1065	* that these directories may have been created with file permissions which
	1066	* prohibits any descendants of these directories from being extracted. When
	1067	* directories are created by pax, access rights may be added to permit the
	1068	* creation of files in their subtree. Every time pax creates a directory, the
	1069	* times and file permissions specified by the archive are stored. After all
	1070	* files have been extracted (or copied), these directories have their times
	1071	* and file modes reset to the stored values. The directory info is restored in
	1072	* reverse order as entries were added to the data file from root to leaf. To
	1073	* restore atime properly, we must go backwards. The data file consists of
	1074	* records with two parts, the file name followed by a DIRDATA trailer. The
	1075	* fixed sized trailer contains the size of the name plus the off_t location in
	1076	* the file. To restore we work backwards through the file reading the trailer
	1077	* then the file name.
	1078	*/
	1079
	1080	/*
	1081	* dir_start()
	1082	* set up the directory time and file mode storage for directories CREATED
	1083	* by pax.
	1084	* Return:
	1085	* 0 if ok, -1 otherwise
	1086	*/
	1087
	1088	int
	1089	dir_start(void)
	1090	{
	1091
	1092	if (dirfd != -1)
	1093	return(0);
	1094
	1095	/*
	1096	* unlink the file so it goes away at termination by itself
	1097	*/
	1098	memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
	1099	if ((dirfd = mkstemp(tempfile)) >= 0) {
	1100	(void)unlink(tempfile);
	1101	return(0);
	1102	}
	1103	paxwarn(1, "Unable to create temporary file for directory times: %s",
	1104	tempfile);
	1105	return(-1);
	1106	}
	1107
	1108	/*
	1109	* add_dir()
	1110	* add the mode and times for a newly CREATED directory
	1111	* name is name of the directory, psb the stat buffer with the data in it,
	1112	* frc_mode is a flag that says whether to force the setting of the mode
	1113	* (ignoring the user set values for preserving file mode). Frc_mode is
	1114	* for the case where we created a file and found that the resulting
	1115	* directory was not writeable and the user asked for file modes to NOT
	1116	* be preserved. (we have to preserve what was created by default, so we
	1117	* have to force the setting at the end. this is stated explicitly in the
	1118	* pax spec)
	1119	*/
	1120
	1121	void
	1122	add_dir(char name, int nlen, struct stat psb, int frc_mode)
	1123	{
	1124	DIRDATA dblk;
	1125
	1126	if (dirfd < 0)
	1127	return;
	1128
	1129	/*
	1130	* get current position (where file name will start) so we can store it
	1131	* in the trailer
	1132	*/
	1133	if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) {
	1134	paxwarn(1,"Unable to store mode and times for directory: %s",name);
	1135	return;
	1136	}
	1137
	1138	/*
	1139	* write the file name followed by the trailer
	1140	*/
	1141	dblk.nlen = nlen + 1;
	1142	dblk.mode = psb->st_mode & 0xffff;
	1143	dblk.mtime = psb->st_mtime;
	1144	dblk.atime = psb->st_atime;
	1145	dblk.frc_mode = frc_mode;
	1146	if ((write(dirfd, name, dblk.nlen) == dblk.nlen) &&
	1147	(write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) {
	1148	++dircnt;
	1149	return;
	1150	}
	1151
	1152	paxwarn(1,"Unable to store mode and times for created directory: %s",name);
	1153	return;
	1154	}
	1155
	1156	/*
	1157	* proc_dir()
	1158	* process all file modes and times stored for directories CREATED
	1159	* by pax
	1160	*/
	1161
	1162	void
	1163	proc_dir(void)
	1164	{
	1165	char name[PAXPATHLEN+1];
	1166	DIRDATA dblk;
	1167	u_long cnt;
	1168
	1169	if (dirfd < 0)
	1170	return;
	1171	/*
	1172	* read backwards through the file and process each directory
	1173	*/
	1174	for (cnt = 0; cnt < dircnt; ++cnt) {
	1175	/*
	1176	* read the trailer, then the file name, if this fails
	1177	* just give up.
	1178	*/
	1179	if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0)
	1180	break;
	1181	if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk))
	1182	break;
	1183	if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
	1184	break;
	1185	if (read(dirfd, name, dblk.nlen) != dblk.nlen)
	1186	break;
	1187	if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
	1188	break;
	1189
	1190	/*
	1191	* frc_mode set, make sure we set the file modes even if
	1192	* the user didn't ask for it (see file_subs.c for more info)
	1193	*/
	1194	if (pmode \|\| dblk.frc_mode)
	1195	set_pmode(name, dblk.mode);
	1196	if (patime \|\| pmtime)
	1197	set_ftime(name, dblk.mtime, dblk.atime, 0);
	1198	}
	1199
	1200	(void)close(dirfd);
	1201	dirfd = -1;
	1202	if (cnt != dircnt)
	1203	paxwarn(1,"Unable to set mode and times for created directories");
	1204	return;
	1205	}
	1206
	1207	/*
	1208	* database independent routines
	1209	*/
	1210
	1211	/*
	1212	* st_hash()
	1213	* hashes filenames to a u_int for hashing into a table. Looks at the tail
	1214	* end of file, as this provides far better distribution than any other
	1215	* part of the name. For performance reasons we only care about the last
	1216	* MAXKEYLEN chars (should be at LEAST large enough to pick off the file
	1217	* name). Was tested on 500,000 name file tree traversal from the root
	1218	* and gave almost a perfectly uniform distribution of keys when used with
	1219	* prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)
	1220	* chars at a time and pads with 0 for last addition.
	1221	* Return:
	1222	* the hash value of the string MOD (%) the table size.
	1223	*/
	1224
	1225	u_int
	1226	st_hash(char *name, int len, int tabsz)
	1227	{
	1228	char *pt;
	1229	char *dest;
	1230	char *end;
	1231	int i;
	1232	u_int key = 0;
	1233	int steps;
	1234	int res;
	1235	u_int val;
	1236
	1237	/*
	1238	* only look at the tail up to MAXKEYLEN, we do not need to waste
	1239	* time here (remember these are pathnames, the tail is what will
	1240	* spread out the keys)
	1241	*/
	1242	if (len > MAXKEYLEN) {
	1243	pt = &(name[len - MAXKEYLEN]);
	1244	len = MAXKEYLEN;
	1245	} else
	1246	pt = name;
	1247
	1248	/*
	1249	* calculate the number of u_int size steps in the string and if
	1250	* there is a runt to deal with
	1251	*/
	1252	steps = len/sizeof(u_int);
	1253	res = len % sizeof(u_int);
	1254
	1255	/*
	1256	* add up the value of the string in unsigned integer sized pieces
	1257	* too bad we cannot have unsigned int aligned strings, then we
	1258	* could avoid the expensive copy.
	1259	*/
	1260	for (i = 0; i < steps; ++i) {
	1261	end = pt + sizeof(u_int);
	1262	dest = (char *)&val;
	1263	while (pt < end)
	1264	dest++ = pt++;
	1265	key += val;
	1266	}
	1267
	1268	/*
	1269	* add in the runt padded with zero to the right
	1270	*/
	1271	if (res) {
	1272	val = 0;
	1273	end = pt + res;
	1274	dest = (char *)&val;
	1275	while (pt < end)
	1276	dest++ = pt++;
	1277	key += val;
	1278	}
	1279
	1280	/*
	1281	* return the result mod the table size
	1282	*/
	1283	return(key % tabsz);
	1284	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: